From 8bd2646044d54e88739f054ccdcfe4052625320c Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Fri, 12 May 2023 10:46:02 +0200
Subject: [PATCH 001/163] check CI with old version of MPI.jl (#1450)

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 9796ae5e04a..333f571e728 100644
--- a/Project.toml
+++ b/Project.toml
@@ -53,7 +53,7 @@ HDF5 = "0.14, 0.15, 0.16"
 IfElse = "0.1"
 LinearMaps = "2.7, 3.0"
 LoopVectorization = "0.12.118"
-MPI = "0.20"
+MPI = "0.20 - 0.20.8"
 MuladdMacro = "0.2.2"
 Octavian = "0.3.5"
 OffsetArrays = "1.3"

From 3a3692d303b6b9922b636eb4aee9815b3a0d5a93 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Fri, 12 May 2023 13:55:08 +0200
Subject: [PATCH 002/163] precompile less when running tests

---
 test/Project.toml | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index 35cc57fb67c..7d386415227 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,3 +1,12 @@
+[compat]
+BSON = "0.3.3"
+CairoMakie = "0.6, 0.7, 0.8, 0.9, 0.10"
+Flux = "0.13 - 0.13.12" # TODO: Return to "0.13" once https://github.com/FluxML/Flux.jl/issues/2204 is resolved
+ForwardDiff = "0.10"
+MPI = "0.20"
+OrdinaryDiffEq = "6.49.1"
+Plots = "1.16"
+
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
@@ -11,11 +20,12 @@ Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
-[compat]
-BSON = "0.3.3"
-CairoMakie = "0.6, 0.7, 0.8, 0.9, 0.10"
-Flux = "0.13 - 0.13.12" # TODO: Return to "0.13" once https://github.com/FluxML/Flux.jl/issues/2204 is resolved
-ForwardDiff = "0.10"
-MPI = "0.20"
-OrdinaryDiffEq = "6.49.1"
-Plots = "1.16"
+[preferences.OrdinaryDiffEq]
+PrecompileAutoSpecialize = false
+PrecompileAutoSwitch = false
+PrecompileDefaultSpecialize = true
+PrecompileFunctionWrapperSpecialize = false
+PrecompileLowStorage = true
+PrecompileNoSpecialize = false
+PrecompileNonStiff = true
+PrecompileStiff = false

From 445612cae4d062a673669c5f8f80436014c0c0ef Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Fri, 12 May 2023 13:55:25 +0200
Subject: [PATCH 003/163] Revert "precompile less when running tests"

This reverts commit 3a3692d303b6b9922b636eb4aee9815b3a0d5a93.
---
 test/Project.toml | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index 7d386415227..35cc57fb67c 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,12 +1,3 @@
-[compat]
-BSON = "0.3.3"
-CairoMakie = "0.6, 0.7, 0.8, 0.9, 0.10"
-Flux = "0.13 - 0.13.12" # TODO: Return to "0.13" once https://github.com/FluxML/Flux.jl/issues/2204 is resolved
-ForwardDiff = "0.10"
-MPI = "0.20"
-OrdinaryDiffEq = "6.49.1"
-Plots = "1.16"
-
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
@@ -20,12 +11,11 @@ Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
-[preferences.OrdinaryDiffEq]
-PrecompileAutoSpecialize = false
-PrecompileAutoSwitch = false
-PrecompileDefaultSpecialize = true
-PrecompileFunctionWrapperSpecialize = false
-PrecompileLowStorage = true
-PrecompileNoSpecialize = false
-PrecompileNonStiff = true
-PrecompileStiff = false
+[compat]
+BSON = "0.3.3"
+CairoMakie = "0.6, 0.7, 0.8, 0.9, 0.10"
+Flux = "0.13 - 0.13.12" # TODO: Return to "0.13" once https://github.com/FluxML/Flux.jl/issues/2204 is resolved
+ForwardDiff = "0.10"
+MPI = "0.20"
+OrdinaryDiffEq = "6.49.1"
+Plots = "1.16"

From 17583e358e18fcdfcc2cedc0f1a892e6242d40e8 Mon Sep 17 00:00:00 2001
From: Michael Schlottke-Lakemper <michael@sloede.com>
Date: Fri, 12 May 2023 14:55:52 +0200
Subject: [PATCH 004/163] Sort precompile options alphabetically (#1456)

---
 docs/src/troubleshooting.md | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/docs/src/troubleshooting.md b/docs/src/troubleshooting.md
index efe86f10b05..3f1846ab39e 100644
--- a/docs/src/troubleshooting.md
+++ b/docs/src/troubleshooting.md
@@ -175,14 +175,16 @@ to execute the following Julia code.
 
 ```julia
 using Preferences, UUIDs
-set_preferences!(UUID("1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"), "PrecompileNonStiff" => true)
-set_preferences!(UUID("1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"), "PrecompileStiff" => false)
-set_preferences!(UUID("1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"), "PrecompileAutoSwitch" => false)
-set_preferences!(UUID("1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"), "PrecompileLowStorage" => true)
-set_preferences!(UUID("1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"), "PrecompileDefaultSpecialize" => true)
-set_preferences!(UUID("1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"), "PrecompileAutoSpecialize" => false)
-set_preferences!(UUID("1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"), "PrecompileFunctionWrapperSpecialize" => false)
-set_preferences!(UUID("1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"), "PrecompileNoSpecialize" => false)
+let uuid = UUID("1dea7af3-3e70-54e6-95c3-0bf5283fa5ed")
+  set_preferences!(uuid, "PrecompileAutoSpecialize" => false)
+  set_preferences!(uuid, "PrecompileAutoSwitch" => false)
+  set_preferences!(uuid, "PrecompileDefaultSpecialize" => true)
+  set_preferences!(uuid, "PrecompileFunctionWrapperSpecialize" => false)
+  set_preferences!(uuid, "PrecompileLowStorage" => true)
+  set_preferences!(uuid, "PrecompileNoSpecialize" => false)
+  set_preferences!(uuid, "PrecompileNonStiff" => true)
+  set_preferences!(uuid, "PrecompileStiff" => false)
+end
 ```
 
 This disables precompilation of all implicit methods. This should usually not affect

From 6df64e996ea5670a357606b59323e3b6fb5794c5 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Fri, 12 May 2023 17:05:48 +0200
Subject: [PATCH 005/163] precompile less when running tests (#1455)

---
 test/Project.toml | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/test/Project.toml b/test/Project.toml
index 35cc57fb67c..7d386415227 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,3 +1,12 @@
+[compat]
+BSON = "0.3.3"
+CairoMakie = "0.6, 0.7, 0.8, 0.9, 0.10"
+Flux = "0.13 - 0.13.12" # TODO: Return to "0.13" once https://github.com/FluxML/Flux.jl/issues/2204 is resolved
+ForwardDiff = "0.10"
+MPI = "0.20"
+OrdinaryDiffEq = "6.49.1"
+Plots = "1.16"
+
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
@@ -11,11 +20,12 @@ Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
-[compat]
-BSON = "0.3.3"
-CairoMakie = "0.6, 0.7, 0.8, 0.9, 0.10"
-Flux = "0.13 - 0.13.12" # TODO: Return to "0.13" once https://github.com/FluxML/Flux.jl/issues/2204 is resolved
-ForwardDiff = "0.10"
-MPI = "0.20"
-OrdinaryDiffEq = "6.49.1"
-Plots = "1.16"
+[preferences.OrdinaryDiffEq]
+PrecompileAutoSpecialize = false
+PrecompileAutoSwitch = false
+PrecompileDefaultSpecialize = true
+PrecompileFunctionWrapperSpecialize = false
+PrecompileLowStorage = true
+PrecompileNoSpecialize = false
+PrecompileNonStiff = true
+PrecompileStiff = false

From 8d8619c975380296fb03322811215142120a04ee Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Sun, 14 May 2023 00:28:50 -0500
Subject: [PATCH 006/163] specialize `calc_boundary_flux!` for nonconservative
 terms for `DGMulti` (#1431)

* minor change for consistency

* formatting

* add nonconservative terms to DGMulti `calc_boundary_flux!`

* add noncon boundary flux

* fix dropped dg.surface_flux

* formatting

* clean up noncons BCs

* adding specialization of nonconservative Powell flux for BCs

* fix BoundaryConditionDoNothing for nonconservative terms

* add elixir

* add test

* comment

* importing norm

* import dot as well

* adding forgotten analysis callback

* Update src/solvers/dgmulti/dg.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* remove some name-based type instabilities

* replace some instances of `rd.Nfaces` with `StartUpDG.num_faces`

* Update examples/dgmulti_2d/elixir_mhd_reflective_BCs.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* fix StartUpDG.num_faces call

* Update src/basic_types.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update examples/dgmulti_2d/elixir_mhd_reflective_BCs.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* update test elixir

* Update src/solvers/dgmulti/dg.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* fix calc_boundary_flux! signature

* switch to dispatch for Dirichlet/DoNothing BCs when using noncons flux

* fix nonconservative BC

* fix type ambiguity

* fix type ambiguity by redesigning nonconservative BC signature

* Update src/basic_types.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update examples/dgmulti_2d/elixir_mhd_reflective_BCs.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/basic_types.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* make nonconservative BCs consistent with rest of Trixi

* renaming

* deleting unused boundary condition implementations

---------

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 .../dgmulti_2d/elixir_mhd_reflective_wall.jl  | 105 ++++++++++++++++++
 src/basic_types.jl                            |   8 +-
 src/callbacks_step/glm_speed_dg.jl            |   2 +-
 src/equations/equations.jl                    |   7 +-
 src/solvers/dgmulti/dg.jl                     |  77 +++++++++++--
 src/solvers/dgmulti/flux_differencing.jl      |   7 +-
 .../dgmulti/flux_differencing_gauss_sbp.jl    |  14 ++-
 src/solvers/dgsem_tree/dg_2d.jl               |   2 +-
 test/test_dgmulti_2d.jl                       |   8 ++
 9 files changed, 203 insertions(+), 27 deletions(-)
 create mode 100644 examples/dgmulti_2d/elixir_mhd_reflective_wall.jl

diff --git a/examples/dgmulti_2d/elixir_mhd_reflective_wall.jl b/examples/dgmulti_2d/elixir_mhd_reflective_wall.jl
new file mode 100644
index 00000000000..a1351cf8244
--- /dev/null
+++ b/examples/dgmulti_2d/elixir_mhd_reflective_wall.jl
@@ -0,0 +1,105 @@
+
+using OrdinaryDiffEq
+using Trixi
+using LinearAlgebra: norm, dot # for use in the MHD boundary condition
+
+###############################################################################
+# semidiscretization of the compressible ideal GLM-MHD equations
+equations = IdealGlmMhdEquations2D(1.4)
+
+function initial_condition_perturbation(x, t, equations::IdealGlmMhdEquations2D)
+  # pressure perturbation in a vertically magnetized field on the domain [-1, 1]^2
+
+  r2 = (x[1] + 0.25)^2 + (x[2] + 0.25)^2
+
+  rho = 1.0
+  v1  = 0.0
+  v2  = 0.0
+  v3  = 0.0
+  p   = 1 + 0.5 * exp(-100 * r2)
+
+  # the pressure and magnetic field are chosen to be strongly
+  # magnetized, such that p / ||B||^2 ≈ 0.01.
+  B1  = 0.0
+  B2  = 40.0 / sqrt(4.0 * pi)
+  B3  = 0.0
+
+  psi = 0.0
+  return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations)
+end
+initial_condition = initial_condition_perturbation
+
+surface_flux = (flux_lax_friedrichs, flux_nonconservative_powell)
+volume_flux  = (flux_hindenlang_gassner, flux_nonconservative_powell)
+
+solver = DGMulti(polydeg=3, element_type = Quad(), approximation_type = GaussSBP(),
+                 surface_integral = SurfaceIntegralWeakForm(surface_flux),
+                 volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+
+x_neg(x, tol=50*eps()) = abs(x[1] + 1) < tol
+x_pos(x, tol=50*eps()) = abs(x[1] - 1) < tol
+y_neg(x, tol=50*eps()) = abs(x[2] + 1) < tol
+y_pos(x, tol=50*eps()) = abs(x[2] - 1) < tol
+is_on_boundary = Dict(:x_neg => x_neg, :x_pos => x_pos, :y_neg => y_neg, :y_pos => y_pos)
+
+cells_per_dimension = (16, 16)
+mesh = DGMultiMesh(solver, cells_per_dimension; periodicity=(false, false), is_on_boundary)
+
+# Create a "reflective-like" boundary condition by mirroring the velocity but leaving the magnetic field alone.
+# Note that this boundary condition is probably not entropy stable.
+function boundary_condition_velocity_slip_wall(u_inner, normal_direction::AbstractVector, x, t,
+                                               surface_flux_function, equations::IdealGlmMhdEquations2D)
+
+  # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later
+  norm_ = norm(normal_direction)
+  normal = normal_direction / norm_
+
+  # compute the primitive variables
+  rho, v1, v2, v3, p, B1, B2, B3, psi = cons2prim(u_inner, equations)
+
+  v_normal = dot(normal, SVector(v1, v2))
+  u_mirror = prim2cons(SVector(rho, v1 - 2 * v_normal * normal[1],
+                                    v2 - 2 * v_normal * normal[2],
+                                    v3, p, B1, B2, B3, psi), equations)
+
+  return surface_flux_function(u_inner, u_mirror, normal, equations) * norm_
+end
+
+boundary_conditions = (; x_neg=boundary_condition_velocity_slip_wall,
+                         x_pos=boundary_condition_velocity_slip_wall,
+                         y_neg=boundary_condition_do_nothing,
+                         y_pos=BoundaryConditionDirichlet(initial_condition))
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver;
+                                    boundary_conditions=boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 0.075)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, uEltype=real(solver))
+alive_callback = AliveCallback(alive_interval=10)
+
+cfl = 0.5
+stepsize_callback = StepsizeCallback(cfl=cfl)
+glm_speed_callback = GlmSpeedCallback(glm_scale=0.5, cfl=cfl)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback,
+                        stepsize_callback,
+                        glm_speed_callback)
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+            dt=1e-5, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep=false, callback=callbacks);
+
+summary_callback() # print the timer summary
diff --git a/src/basic_types.jl b/src/basic_types.jl
index ea7d542c5d1..4539e26dea3 100644
--- a/src/basic_types.jl
+++ b/src/basic_types.jl
@@ -76,12 +76,14 @@ struct BoundaryConditionDoNothing end
 # This version can be called by hyperbolic solvers on logically Cartesian meshes
 @inline function (::BoundaryConditionDoNothing)(
     u_inner, orientation_or_normal_direction, direction::Integer, x, t, surface_flux, equations)
+
   return flux(u_inner, orientation_or_normal_direction, equations)
 end
 
 # This version can be called by hyperbolic solvers on unstructured, curved meshes
-@inline function (::BoundaryConditionDoNothing)(
-    u_inner, outward_direction::AbstractVector, x, t, surface_flux, equations)
+@inline function (::BoundaryConditionDoNothing)(u_inner, outward_direction::AbstractVector,
+                                                x, t, surface_flux, equations)
+
   return flux(u_inner, outward_direction, equations)
 end
 
@@ -89,7 +91,7 @@ end
 @inline function (::BoundaryConditionDoNothing)(inner_flux_or_state, other_args...)
   return inner_flux_or_state
 end
-    
+
 """
     boundary_condition_do_nothing = Trixi.BoundaryConditionDoNothing()
 
diff --git a/src/callbacks_step/glm_speed_dg.jl b/src/callbacks_step/glm_speed_dg.jl
index dce64d5d042..eef01ed0471 100644
--- a/src/callbacks_step/glm_speed_dg.jl
+++ b/src/callbacks_step/glm_speed_dg.jl
@@ -23,7 +23,7 @@ function calc_dt_for_cleaning_speed(cfl::Real, mesh,
 
   # Compute time step for GLM linear advection equation with c_h=1 for a DGMulti discretization.
   # Copies implementation behavior of `calc_dt_for_cleaning_speed` for DGSEM discretizations.
-  max_scaled_speed_for_c_h = (1 / minimum(md.J)) * ndims(equations)
+  max_scaled_speed_for_c_h = inv(minimum(md.J)) * ndims(equations)
 
   # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by
   # `polydeg+1`. This is because `nnodes(dg)` returns the total number of
diff --git a/src/equations/equations.jl b/src/equations/equations.jl
index c1669531def..e44270737e8 100644
--- a/src/equations/equations.jl
+++ b/src/equations/equations.jl
@@ -171,7 +171,8 @@ end
 @inline function (boundary_condition::BoundaryConditionDirichlet)(u_inner,
                                                                   normal_direction::AbstractVector,
                                                                   x, t,
-                                                                  surface_flux_function, equations)
+                                                                  surface_flux_function,
+                                                                  equations)
   # get the external value of the solution
   u_boundary = boundary_condition.boundary_value_function(x, t, equations)
 
@@ -328,7 +329,7 @@ include("compressible_euler_multicomponent_2d.jl")
     eachcomponent(equations::AbstractCompressibleEulerMulticomponentEquations)
 
 Return an iterator over the indices that specify the location in relevant data structures
-for the components in `AbstractCompressibleEulerMulticomponentEquations`. 
+for the components in `AbstractCompressibleEulerMulticomponentEquations`.
 In particular, not the components themselves are returned.
 """
 @inline eachcomponent(equations::AbstractCompressibleEulerMulticomponentEquations) = Base.OneTo(ncomponents(equations))
@@ -350,7 +351,7 @@ include("ideal_glm_mhd_multicomponent_2d.jl")
     eachcomponent(equations::AbstractIdealGlmMhdMulticomponentEquations)
 
 Return an iterator over the indices that specify the location in relevant data structures
-for the components in `AbstractIdealGlmMhdMulticomponentEquations`. 
+for the components in `AbstractIdealGlmMhdMulticomponentEquations`.
 In particular, not the components themselves are returned.
 """
 @inline eachcomponent(equations::AbstractIdealGlmMhdMulticomponentEquations) = Base.OneTo(ncomponents(equations))
diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl
index f9e30f8f871..5d087f0deb2 100644
--- a/src/solvers/dgmulti/dg.jl
+++ b/src/solvers/dgmulti/dg.jl
@@ -428,24 +428,27 @@ end
 
 # do nothing for periodic (default) boundary conditions
 calc_boundary_flux!(cache, t, boundary_conditions::BoundaryConditionPeriodic,
-                    mesh, equations, dg::DGMulti) = nothing
+                    mesh, have_nonconservative_terms, equations, dg::DGMulti) = nothing
 
 # "lispy tuple programming" instead of for loop for type stability
-function calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations, dg::DGMulti)
+function calc_boundary_flux!(cache, t, boundary_conditions, mesh,
+                             have_nonconservative_terms, equations, dg::DGMulti)
+
   # peel off first boundary condition
   calc_single_boundary_flux!(cache, t, first(boundary_conditions), first(keys(boundary_conditions)),
-                 mesh, equations, dg)
+                             mesh, have_nonconservative_terms, equations, dg)
 
   # recurse on the remainder of the boundary conditions
-  calc_boundary_flux!(cache, t, Base.tail(boundary_conditions), mesh, equations, dg)
+  calc_boundary_flux!(cache, t, Base.tail(boundary_conditions),
+                      mesh, have_nonconservative_terms, equations, dg)
 end
 
 # terminate recursion
 calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple{(),Tuple{}},
-                    mesh, equations, dg::DGMulti) = nothing
+                    mesh, have_nonconservative_terms, equations, dg::DGMulti) = nothing
 
-function calc_single_boundary_flux!(cache, t, boundary_condition, boundary_key,
-                                    mesh, equations, dg::DGMulti{NDIMS}) where {NDIMS}
+function calc_single_boundary_flux!(cache, t, boundary_condition, boundary_key, mesh,
+                                    have_nonconservative_terms::False, equations, dg::DGMulti{NDIMS}) where {NDIMS}
 
   rd = dg.basis
   md = mesh.md
@@ -455,8 +458,9 @@ function calc_single_boundary_flux!(cache, t, boundary_condition, boundary_key,
 
   # reshape face/normal arrays to have size = (num_points_on_face, num_faces_total).
   # mesh.boundary_faces indexes into the columns of these face-reshaped arrays.
-  num_pts_per_face = rd.Nfq ÷ rd.Nfaces
-  num_faces_total = rd.Nfaces * md.num_elements
+  num_faces = StartUpDG.num_faces(rd.element_type)
+  num_pts_per_face = rd.Nfq ÷ num_faces
+  num_faces_total = num_faces * md.num_elements
 
   # This function was originally defined as
   # `reshape_by_face(u) = reshape(view(u, :), num_pts_per_face, num_faces_total)`.
@@ -485,6 +489,58 @@ function calc_single_boundary_flux!(cache, t, boundary_condition, boundary_key,
   # However, we don't have to re-reshape, since cache.flux_face_values still retains its original shape.
 end
 
+function calc_single_boundary_flux!(cache, t, boundary_condition, boundary_key, mesh,
+                                    have_nonconservative_terms::True, equations, dg::DGMulti{NDIMS}) where {NDIMS}
+
+  rd = dg.basis
+  md = mesh.md
+  surface_flux, nonconservative_flux = dg.surface_integral.surface_flux
+
+  # reshape face/normal arrays to have size = (num_points_on_face, num_faces_total).
+  # mesh.boundary_faces indexes into the columns of these face-reshaped arrays.
+  num_pts_per_face = rd.Nfq ÷ StartUpDG.num_faces(rd.element_type)
+  num_faces_total = StartUpDG.num_faces(rd.element_type) * md.num_elements
+
+  # This function was originally defined as
+  # `reshape_by_face(u) = reshape(view(u, :), num_pts_per_face, num_faces_total)`.
+  # This results in allocations due to https://github.com/JuliaLang/julia/issues/36313.
+  # To avoid allocations, we use Tim Holy's suggestion:
+  # https://github.com/JuliaLang/julia/issues/36313#issuecomment-782336300.
+  reshape_by_face(u) = Base.ReshapedArray(u, (num_pts_per_face, num_faces_total), ())
+
+  u_face_values = reshape_by_face(cache.u_face_values)
+  flux_face_values = reshape_by_face(cache.flux_face_values)
+  Jf = reshape_by_face(md.Jf)
+  nxyzJ, xyzf = reshape_by_face.(md.nxyzJ), reshape_by_face.(md.xyzf) # broadcast over nxyzJ::NTuple{NDIMS,Matrix}
+
+  # loop through boundary faces, which correspond to columns of reshaped u_face_values, ...
+  for f in mesh.boundary_faces[boundary_key]
+    for i in Base.OneTo(num_pts_per_face)
+      face_normal = SVector{NDIMS}(getindex.(nxyzJ, i, f)) / Jf[i,f]
+      face_coordinates = SVector{NDIMS}(getindex.(xyzf, i, f))
+
+      # Compute conservative and non-conservative fluxes separately.
+      # This imposes boundary conditions on the conservative part of the flux.
+      cons_flux_at_face_node = boundary_condition(u_face_values[i,f], face_normal, face_coordinates, t,
+                                                  surface_flux, equations)
+
+      # Compute pointwise nonconservative numerical flux at the boundary.
+      # In general, nonconservative fluxes can depend on both the contravariant
+      # vectors (normal direction) at the current node and the averaged ones.
+      # However, there is only one `face_normal` at boundaries, which we pass in twice.
+      # Note: This does not set any type of boundary condition for the nonconservative term
+      noncons_flux_at_face_node = nonconservative_flux(u_face_values[i,f], u_face_values[i,f],
+                                                       face_normal, face_normal, equations)
+
+      flux_face_values[i,f] = (cons_flux_at_face_node + 0.5 * noncons_flux_at_face_node) * Jf[i,f]
+
+    end
+  end
+
+  # Note: modifying the values of the reshaped array modifies the values of cache.flux_face_values.
+  # However, we don't have to re-reshape, since cache.flux_face_values still retains its original shape.
+end
+
 
 # inverts Jacobian and scales by -1.0
 function invert_jacobian!(du, mesh::DGMultiMesh, equations, dg::DGMulti, cache; scaling=-1)
@@ -568,7 +624,8 @@ function rhs!(du, u, t, mesh, equations,
     have_nonconservative_terms(equations), equations, dg)
 
   @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, t, boundary_conditions, mesh, equations, dg)
+    cache, t, boundary_conditions, mesh,
+    have_nonconservative_terms(equations), equations, dg)
 
   @trixi_timeit timer() "surface integral" calc_surface_integral!(
     du, u, dg.surface_integral, mesh, equations, dg, cache)
diff --git a/src/solvers/dgmulti/flux_differencing.jl b/src/solvers/dgmulti/flux_differencing.jl
index 97905f1d0b4..f511694c76e 100644
--- a/src/solvers/dgmulti/flux_differencing.jl
+++ b/src/solvers/dgmulti/flux_differencing.jl
@@ -594,8 +594,8 @@ function rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions:
                                                               have_nonconservative_terms(equations),
                                                               equations, dg)
 
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(cache, t, boundary_conditions,
-                                                            mesh, equations, dg)
+  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(cache, t, boundary_conditions, mesh,
+                                                            have_nonconservative_terms(equations), equations, dg)
 
   @trixi_timeit timer() "surface integral" calc_surface_integral!(du, u, dg.surface_integral,
                                                                   mesh, equations, dg, cache)
@@ -630,7 +630,8 @@ function rhs!(du, u, t, mesh, equations,
     have_nonconservative_terms(equations), equations, dg)
 
   @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, t, boundary_conditions, mesh, equations, dg)
+    cache, t, boundary_conditions, mesh,
+    have_nonconservative_terms(equations), equations, dg)
 
   @trixi_timeit timer() "surface integral" calc_surface_integral!(
     du, u, dg.surface_integral, mesh, equations, dg, cache)
diff --git a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
index dfafe4ff98f..ca2666f218f 100644
--- a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
+++ b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
@@ -61,7 +61,8 @@ function TensorProductGaussFaceOperator(operator::AbstractGaussOperator,
   nnodes_1d = length(rq1D)
 
   # Permutation of indices in a tensor product form
-  indices = reshape(1:length(rd.rf), nnodes_1d, rd.Nfaces)
+  num_faces = StartUpDG.num_faces(rd.element_type)
+  indices = reshape(1:length(rd.rf), nnodes_1d, num_faces)
   face_indices_tensor_product = zeros(Int, 2, nnodes_1d, ndims(rd.element_type))
   for i in 1:nnodes_1d # loop over nodes in one face
     face_indices_tensor_product[:, i, 1] .= indices[i, 1:2]
@@ -76,7 +77,7 @@ function TensorProductGaussFaceOperator(operator::AbstractGaussOperator,
   return TensorProductGaussFaceOperator{2, T_op, Tm, Tw, Tf, Ti}(interp_matrix_gauss_to_face_1d,
                                                                  inv.(wq1D), rd.wf,
                                                                  face_indices_tensor_product,
-                                                                 nnodes_1d, rd.Nfaces)
+                                                                 nnodes_1d, num_faces)
 end
 
 # constructor for a 3D operator
@@ -90,7 +91,8 @@ function TensorProductGaussFaceOperator(operator::AbstractGaussOperator,
   nnodes_1d = length(rq1D)
 
   # Permutation of indices in a tensor product form
-  indices = reshape(1:length(rd.rf), nnodes_1d, nnodes_1d, rd.Nfaces)
+  num_faces = StartUpDG.num_faces(rd.element_type)
+  indices = reshape(1:length(rd.rf), nnodes_1d, nnodes_1d, num_faces)
   face_indices_tensor_product = zeros(Int, 2, nnodes_1d, nnodes_1d, ndims(rd.element_type))
   for j in 1:nnodes_1d, i in 1:nnodes_1d # loop over nodes in one face
     face_indices_tensor_product[:, i, j, 1] .= indices[i, j, 1:2]
@@ -106,7 +108,7 @@ function TensorProductGaussFaceOperator(operator::AbstractGaussOperator,
   return TensorProductGaussFaceOperator{3, T_op, Tm, Tw, Tf, Ti}(interp_matrix_gauss_to_face_1d,
                                                                  inv.(wq1D), rd.wf,
                                                                  face_indices_tensor_product,
-                                                                 nnodes_1d, rd.Nfaces)
+                                                                 nnodes_1d, num_faces)
 end
 
 # specialize behavior of `mul_by!(A)` where `A isa TensorProductGaussFaceOperator)`
@@ -507,8 +509,8 @@ function rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions:
                                                               have_nonconservative_terms(equations),
                                                               equations, dg)
 
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(cache, t, boundary_conditions,
-                                                            mesh, equations, dg)
+  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(cache, t, boundary_conditions, mesh,
+                                                            have_nonconservative_terms(equations), equations, dg)
 
   # `du` is stored at Gauss nodes here
   @trixi_timeit timer() "surface integral" calc_surface_integral!(du, u, dg.surface_integral,
diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl
index d7f1463fde0..445a8082ce7 100644
--- a/src/solvers/dgsem_tree/dg_2d.jl
+++ b/src/solvers/dgsem_tree/dg_2d.jl
@@ -702,7 +702,7 @@ end
 
 function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any,4}, t,
                                           boundary_condition, nonconservative_terms::True, equations,
-                                          surface_integral ,dg::DG, cache,
+                                          surface_integral, dg::DG, cache,
                                           direction, first_boundary, last_boundary)
   surface_flux, nonconservative_flux = surface_integral.surface_flux
   @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
diff --git a/test/test_dgmulti_2d.jl b/test/test_dgmulti_2d.jl
index d2eaeea57d1..0c10a176420 100644
--- a/test/test_dgmulti_2d.jl
+++ b/test/test_dgmulti_2d.jl
@@ -292,6 +292,14 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
+  @trixi_testset "elixir_mhd_reflective_wall.jl (Quad)" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_reflective_wall.jl"),
+      cells_per_dimension = 4,
+      l2 = [0.0036019536614619687, 0.001734097206958611, 0.008375221008997178, 0.0, 0.028596796602124414, 0.0018573693138866614, 0.0020807798141551166, 0.0, 5.301188920230166e-5],
+      linf = [0.01692601228199253, 0.009369662298436778, 0.04145169295835428, 0.0, 0.11569908670112738, 0.00984964453299233, 0.01141708032148614, 0.0, 0.0002992631411931389]
+    )
+  end
+
   @trixi_testset "elixir_shallowwater_source_terms.jl (Quad, SBP)" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_source_terms.jl"),
       cells_per_dimension = 8, element_type = Quad(), approximation_type = SBP(),

From f581b9df4740bbcfcc6ad7de68cb6dca5401df82 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Sun, 14 May 2023 14:52:48 +0200
Subject: [PATCH 007/163] precompile tweaks (#1464)

* precompile init functions

* hacky workaround in __init__ not required on Julia v1.9
---
 src/Trixi.jl                | 20 +++++++++++---------
 src/auxiliary/precompile.jl |  3 +++
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/Trixi.jl b/src/Trixi.jl
index b45edbbecd4..cbf0380841f 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -272,15 +272,17 @@ function __init__()
   #       https://github.com/JuliaLang/julia/issues/32552
   #       https://github.com/JuliaLang/julia/issues/41740
   # See also https://discourse.julialang.org/t/performance-depends-dramatically-on-compilation-order/58425
-  let
-    for T in (Float32, Float64)
-      u_mortars_2d = zeros(T, 2, 2, 2, 2, 2)
-      u_view_2d = view(u_mortars_2d, 1, :, 1, :, 1)
-      LoopVectorization.axes(u_view_2d)
-
-      u_mortars_3d = zeros(T, 2, 2, 2, 2, 2, 2)
-      u_view_3d = view(u_mortars_3d, 1, :, 1, :, :, 1)
-      LoopVectorization.axes(u_view_3d)
+  if VERSION < v"1.9.0"
+    let
+      for T in (Float32, Float64)
+        u_mortars_2d = zeros(T, 2, 2, 2, 2, 2)
+        u_view_2d = view(u_mortars_2d, 1, :, 1, :, 1)
+        LoopVectorization.axes(u_view_2d)
+
+        u_mortars_3d = zeros(T, 2, 2, 2, 2, 2, 2)
+        u_view_3d = view(u_mortars_3d, 1, :, 1, :, :, 1)
+        LoopVectorization.axes(u_view_3d)
+      end
     end
   end
 end
diff --git a/src/auxiliary/precompile.jl b/src/auxiliary/precompile.jl
index eeeae07b601..0695e72efac 100644
--- a/src/auxiliary/precompile.jl
+++ b/src/auxiliary/precompile.jl
@@ -460,6 +460,9 @@ function _precompile_manual_()
     @assert Base.precompile(Tuple{typeof(trixi_include),String})
   end
 
+  @assert Base.precompile(Tuple{typeof(init_mpi)})
+  @assert Base.precompile(Tuple{typeof(init_p4est)})
+
   # The following precompile statements do not seem to be taken
   # # `multiply_dimensionwise!` as used in the analysis callback
   # for RealT in (Float64,)

From 3096a3897b0a42c627110f1cbefece8ec7918adc Mon Sep 17 00:00:00 2001
From: ArseniyKholod <119304909+ArseniyKholod@users.noreply.github.com>
Date: Mon, 15 May 2023 09:17:29 +0200
Subject: [PATCH 008/163] MPI: Handle sanity checks during initialization of
 unstructured boundary conditions differently (#1447)

* Update sort_boundary_conditions.jl

* remove unnecessary variables

* size->mpi_nranks

* try to fix issue with checks

* use MPI.Gatherv

* solve issue with empty unique_names list

* re-run checks

* undo last change

* Update src/solvers/dgsem_unstructured/sort_boundary_conditions.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* Update src/solvers/dgsem_unstructured/sort_boundary_conditions.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* Update src/solvers/dgsem_unstructured/sort_boundary_conditions.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* Update src/solvers/dgsem_unstructured/sort_boundary_conditions.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* Update src/solvers/dgsem_unstructured/sort_boundary_conditions.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* Update src/solvers/dgsem_unstructured/sort_boundary_conditions.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* Update sort_boundary_conditions.jl

* Update sort_boundary_conditions.jl

* Update sort_boundary_conditions.jl

---------

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 .../sort_boundary_conditions.jl               | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl
index 0206cc38084..5315e695bd6 100644
--- a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl
+++ b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl
@@ -41,10 +41,26 @@ function initialize!(boundary_types_container::UnstructuredSortedBoundaryTypes{N
 
   unique_names = unique(cache.boundaries.name)
 
-  # TODO: This needs to be handled differently for the `ParallelP4estMesh` since the boundaries
-  # are distributed and thus unique_names only contains the names of boundaries on the local process
-  # See https://github.com/trixi-framework/Trixi.jl/issues/1047
-  if !mpi_isparallel()
+  if mpi_isparallel()
+    # Exchange of boundaries names
+    send_buffer = Vector{UInt8}(join(unique_names, "\0"))
+    push!(send_buffer, 0)
+    if mpi_isroot()
+      recv_buffer_length = MPI.Gather(length(send_buffer), mpi_root(), mpi_comm())
+      recv_buffer = Vector{UInt8}(undef, sum(recv_buffer_length))
+      MPI.Gatherv!(send_buffer, MPI.VBuffer(recv_buffer, recv_buffer_length), mpi_root(), mpi_comm())
+      all_names = unique(Symbol.(split(String(recv_buffer), "\0"; keepempty=false)))
+      for key in keys(boundary_dictionary)
+        if !(key in all_names)
+          println(stderr, "ERROR: Key $(repr(key)) is not a valid boundary name")
+          MPI.Abort(mpi_comm(), 1)
+        end
+      end
+    else
+      MPI.Gather(length(send_buffer), mpi_root(), mpi_comm())
+      MPI.Gatherv!(send_buffer, nothing, mpi_root(), mpi_comm())
+    end
+  else
     for key in keys(boundary_dictionary)
       if !(key in unique_names)
         error("Key $(repr(key)) is not a valid boundary name")

From 2ddf0308beb555e14b91c100e8d14d0b8b947d96 Mon Sep 17 00:00:00 2001
From: Michael Schlottke-Lakemper <michael@sloede.com>
Date: Mon, 15 May 2023 17:48:22 +0200
Subject: [PATCH 009/163] Add `@autoinfiltrate` for simple, ad-hoc interactive
 debugging (#1465)

* Add `@autoinfiltrate` for simple, ad-hoc interactive debugging

* Add missing escape

---------

Co-authored-by: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 docs/src/development.md    | 18 ++++------
 src/auxiliary/auxiliary.jl | 67 ++++++++++++++++++++++++++++++++++----
 2 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/docs/src/development.md b/docs/src/development.md
index f7c41a398b8..f3f65057eb7 100644
--- a/docs/src/development.md
+++ b/docs/src/development.md
@@ -256,17 +256,13 @@ Breakpoints can be set by adding a line with the ```@infiltrate``` macro at the
 in the code. Use [Revise](@ref interactive-use-of-julia) if you want to set and delete breakpoints
 in your package without having to restart Julia.
 
-!!! note
-    When running Julia inside a package environment, the ```@infiltrate``` macro only works if `Infiltrator`
-    has been added to the dependencies. Another work around when using Revise is to first load the
-    package and then add breakpoints with `Main.@infiltrate` to the code. If this is not
-    desired, the functional form
-    ```julia
-    if isdefined(Main, :Infiltrator)
-      Main.Infiltrator.infiltrate(@__MODULE__, Base.@locals, @__FILE__, @__LINE__)
-    end
-    ```
-    can be used to set breakpoints when working with Trixi.jl or other packages.
+!!! note "Use `@autoinfiltrate` when debugging Trixi.jl"
+    When running Julia inside a package environment, e.g., inside the source
+    code of Trixi.jl itself, the `@infiltrate` macro only works if
+    `Infiltrator` has been added to the package dependencies. To avoid this,
+    you can use the (non-exported) [`@autoinfiltrate`](@ref) macro
+    in Trixi.jl, which only requires Infiltrator.jl to be available in the
+    current environment stack and will auto-load it for you.
 
 Triggering the breakpoint starts a REPL session where it is possible to interact with the current
 local scope. Possible commands are:
diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl
index 225eb8dfcd7..4958e0d1fdc 100644
--- a/src/auxiliary/auxiliary.jl
+++ b/src/auxiliary/auxiliary.jl
@@ -1,14 +1,9 @@
-# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
-# Since these FMAs can increase the performance of many numerical algorithms,
-# we need to opt-in explicitly.
-# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
-@muladd begin
-
+# The following statements below outside the `@muladd begin ... end` block, as otherwise
+# Revise.jl might be broken
 
 include("containers.jl")
 include("math.jl")
 
-
 # Enable debug timings `@trixi_timeit timer() "name" stuff...`.
 # This allows us to disable timings completely by executing
 # `TimerOutputs.disable_debug_timings(Trixi)`
@@ -23,6 +18,13 @@ const main_timer = TimerOutput()
 timer() = main_timer
 
 
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+
+
 """
     PerformanceCounter()
 
@@ -279,4 +281,55 @@ macro trixi_timeit(timer_output, label, expr)
 end
 
 
+"""
+    @autoinfiltrate
+    @autoinfiltrate condition::Bool
+
+Invoke the `@infiltrate` macro of the package Infiltrator.jl to create a breakpoint for ad-hoc
+interactive debugging in the REPL. If the optional argument `condition` is given, the breakpoint is
+only enabled if `condition` evaluates to `true`.
+
+As opposed to using `Infiltrator.@infiltrate` directly, this macro does not require Infiltrator.jl
+to be added as a dependency to Trixi.jl. As a bonus, the macro will also attempt to load the
+Infiltrator module if it has not yet been loaded manually.
+
+Note: For this macro to work, the Infiltrator.jl package needs to be installed in your current Julia
+environment stack.
+
+See also: [Infiltrator.jl](https://github.com/JuliaDebug/Infiltrator.jl)
+
+!!! warning "Internal use only"
+    Please note that this macro is intended for internal use only. It is *not* part of the public
+    API of Trixi.jl, and it thus can altered (or be removed) at any time without it being considered
+    a breaking change.
+"""
+macro autoinfiltrate(condition = true)
+  pkgid = Base.PkgId(Base.UUID("5903a43b-9cc3-4c30-8d17-598619ec4e9b"), "Infiltrator")
+  if !haskey(Base.loaded_modules, pkgid)
+    try
+      Base.eval(Main, :(using Infiltrator))
+    catch err
+      @error "Cannot load Infiltrator.jl. Make sure it is included in your environment stack."
+    end
+  end
+  i = get(Base.loaded_modules, pkgid, nothing)
+  lnn = LineNumberNode(__source__.line, __source__.file)
+
+  if i === nothing
+    return Expr(
+      :macrocall,
+      Symbol("@warn"),
+      lnn,
+      "Could not load Infiltrator.")
+  end
+
+  return Expr(
+    :macrocall,
+    Expr(:., i, QuoteNode(Symbol("@infiltrate"))),
+    lnn,
+    esc(condition)
+  )
+end
+
+
 end # @muladd

From 6e5ea1390686b01c4bb345d9281ea303625681c0 Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Mon, 15 May 2023 22:41:42 -0500
Subject: [PATCH 010/163] preparation for `DGMulti` shock capturing (#1458)

* some interfaces for DGMulti shock capturing

* fix ambiguity

* pass in volume flux instead of volume integral

* dropped one

* factor flux_differencing_kernel! out

* consistent formatting

* Update src/solvers/dgmulti/types.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* add kwargs reference

* add volume_flux specialization

* Revert "add volume_flux specialization"

This reverts commit 7e0c458aff0c94a4baffbf7ef1197f37200e57c4.

* inlining `flux_differencing_kernel!`

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 src/Trixi.jl                                  |  2 +-
 src/callbacks_step/analysis_dgmulti.jl        |  2 +-
 src/solvers/dgmulti/flux_differencing.jl      | 19 ++--
 .../dgmulti/flux_differencing_gauss_sbp.jl    | 87 ++++++++++---------
 src/solvers/dgmulti/types.jl                  | 16 ++++
 5 files changed, 71 insertions(+), 55 deletions(-)

diff --git a/src/Trixi.jl b/src/Trixi.jl
index cbf0380841f..c0cecf86bd4 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -237,7 +237,7 @@ export ode_norm, ode_unstable_check
 
 export convergence_test, jacobian_fd, jacobian_ad_forward, linear_structure
 
-export DGMulti, estimate_dt, DGMultiMesh, GaussSBP
+export DGMulti, DGMultiBasis, estimate_dt, DGMultiMesh, GaussSBP
 
 export ViscousFormulationBassiRebay1, ViscousFormulationLocalDG
 
diff --git a/src/callbacks_step/analysis_dgmulti.jl b/src/callbacks_step/analysis_dgmulti.jl
index 9489c8bc753..2fbd8eda87a 100644
--- a/src/callbacks_step/analysis_dgmulti.jl
+++ b/src/callbacks_step/analysis_dgmulti.jl
@@ -181,7 +181,7 @@ end
 
 SolutionAnalyzer(rd::RefElemData) = rd
 
-nelements(mesh::DGMultiMesh, solver::DGMulti, cache) = mesh.md.num_elements
+nelements(mesh::DGMultiMesh, ::DGMulti, other_args...) = mesh.md.num_elements
 function ndofsglobal(mesh::DGMultiMesh, solver::DGMulti, cache)
   if mpi_isparallel()
     error("`ndofsglobal` is not implemented for `DGMultiMesh` when used in parallel with MPI")
diff --git a/src/solvers/dgmulti/flux_differencing.jl b/src/solvers/dgmulti/flux_differencing.jl
index f511694c76e..51339193c71 100644
--- a/src/solvers/dgmulti/flux_differencing.jl
+++ b/src/solvers/dgmulti/flux_differencing.jl
@@ -413,10 +413,10 @@ end
 # Computes flux differencing contribution from each Cartesian direction over a single element.
 # For dense operators, we do not use sum factorization.
 @inline function local_flux_differencing!(fluxdiff_local, u_local, element_index,
-                                          has_nonconservative_terms::False, volume_integral,
+                                          has_nonconservative_terms::False, volume_flux,
                                           has_sparse_operators::False, mesh,
                                           equations, dg, cache)
-  @unpack volume_flux = volume_integral
+
   for dim in eachdim(mesh)
     Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache)
     # True() indicates the volume flux is symmetric
@@ -427,10 +427,10 @@ end
 end
 
 @inline function local_flux_differencing!(fluxdiff_local, u_local, element_index,
-                                          has_nonconservative_terms::True, volume_integral,
+                                          has_nonconservative_terms::True, volume_flux,
                                           has_sparse_operators::False, mesh,
                                           equations, dg, cache)
-  flux_conservative, flux_nonconservative = volume_integral.volume_flux
+  flux_conservative, flux_nonconservative = volume_flux
   for dim in eachdim(mesh)
     Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache)
     # True() indicates the flux is symmetric.
@@ -450,11 +450,10 @@ end
 # When the operators are sparse, we use the sum-factorization approach to
 # computing flux differencing.
 @inline function local_flux_differencing!(fluxdiff_local, u_local, element_index,
-                                          has_nonconservative_terms::False, volume_integral,
+                                          has_nonconservative_terms::False, volume_flux,
                                           has_sparse_operators::True, mesh,
                                           equations, dg, cache)
   @unpack Qrst_skew = cache
-  @unpack volume_flux = volume_integral
   for dim in eachdim(mesh)
     # There are two ways to write this flux differencing discretization on affine meshes.
     #
@@ -481,11 +480,11 @@ end
 end
 
 @inline function local_flux_differencing!(fluxdiff_local, u_local, element_index,
-                                          has_nonconservative_terms::True, volume_integral,
+                                          has_nonconservative_terms::True, volume_flux,
                                           has_sparse_operators::True, mesh,
                                           equations, dg, cache)
   @unpack Qrst_skew = cache
-  flux_conservative, flux_nonconservative = volume_integral.volume_flux
+  flux_conservative, flux_nonconservative = volume_flux
   for dim in eachdim(mesh)
     normal_direction = get_contravariant_vector(element_index, dim, mesh, cache)
     Q_skew = Qrst_skew[dim]
@@ -521,7 +520,7 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh,
     u_local = view(entropy_projected_u_values, :, e)
 
     local_flux_differencing!(fluxdiff_local, u_local, e,
-                             have_nonconservative_terms, volume_integral,
+                             have_nonconservative_terms, volume_integral.volume_flux,
                              has_sparse_operators(dg),
                              mesh, equations, dg, cache)
 
@@ -548,7 +547,7 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh,
     u_local = view(u, :, e)
 
     local_flux_differencing!(fluxdiff_local, u_local, e,
-                             have_nonconservative_terms, volume_integral,
+                             have_nonconservative_terms, volume_integral.volume_flux,
                              has_sparse_operators(dg),
                              mesh, equations, dg, cache)
 
diff --git a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
index ca2666f218f..35de95a7ddb 100644
--- a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
+++ b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
@@ -413,55 +413,56 @@ function calc_surface_integral!(du, u, surface_integral::SurfaceIntegralWeakForm
   end
 end
 
-function calc_volume_integral!(du, u, mesh::DGMultiMesh,
-                               have_nonconservative_terms, equations,
-                               volume_integral, dg::DGMultiFluxDiff{<:GaussSBP},
-                               cache)
-
-  @unpack entropy_projected_u_values = cache
-  @unpack fluxdiff_local_threaded, rhs_local_threaded, rhs_volume_local_threaded = cache
+@inline function flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh,
+                                           have_nonconservative_terms, equations,
+                                           volume_flux, dg::DGMultiFluxDiff{<:GaussSBP},
+                                           cache, alpha=true)
+
+  fluxdiff_local = cache.fluxdiff_local_threaded[Threads.threadid()]
+  fill!(fluxdiff_local, zero(eltype(fluxdiff_local)))
+  u_local = view(cache.entropy_projected_u_values, :, element)
+
+  local_flux_differencing!(fluxdiff_local, u_local, element,
+                           have_nonconservative_terms,
+                           volume_flux, has_sparse_operators(dg),
+                           mesh, equations, dg, cache)
+
+  # convert `fluxdiff_local::Vector{<:SVector}` to `rhs_local::StructArray{<:SVector}`
+  # for faster performance when using `apply_to_each_field`.
+  rhs_local = cache.rhs_local_threaded[Threads.threadid()]
+  for i in Base.OneTo(length(fluxdiff_local))
+    rhs_local[i] = fluxdiff_local[i]
+  end
 
-  # After computing the volume integral, the rhs values are stored at Gauss nodes.
-  # We transform from Gauss nodes back to Lobatto nodes in `invert_jacobian!`.
-  @unpack projection_matrix_gauss_to_face, inv_gauss_weights = cache
+  # stores rhs contributions only at Gauss volume nodes
+  rhs_volume_local = cache.rhs_volume_local_threaded[Threads.threadid()]
 
-  rd = dg.basis
-  volume_indices = Base.OneTo(rd.Nq)
-  face_indices = (rd.Nq + 1):(rd.Nq + rd.Nfq)
+  # Here, we exploit that under a Gauss nodal basis the structure of the projection
+  # matrix `Ph = [diagm(1 ./ wq), projection_matrix_gauss_to_face]` such that
+  # `Ph * [u; uf] = (u ./ wq) + projection_matrix_gauss_to_face * uf`.
+  volume_indices = Base.OneTo(dg.basis.Nq)
+  face_indices = (dg.basis.Nq + 1):(dg.basis.Nq + dg.basis.Nfq)
+  local_volume_flux = view(rhs_local, volume_indices)
+  local_face_flux = view(rhs_local, face_indices)
 
-  @threaded for e in eachelement(mesh, dg, cache)
-    fluxdiff_local = fluxdiff_local_threaded[Threads.threadid()]
-    fill!(fluxdiff_local, zero(eltype(fluxdiff_local)))
-    u_local = view(entropy_projected_u_values, :, e)
-
-    local_flux_differencing!(fluxdiff_local, u_local, e,
-                             have_nonconservative_terms, volume_integral,
-                             has_sparse_operators(dg),
-                             mesh, equations, dg, cache)
-
-    # convert `fluxdiff_local::Vector{<:SVector}` to `rhs_local::StructArray{<:SVector}`
-    # for faster performance when using `apply_to_each_field`.
-    rhs_local = rhs_local_threaded[Threads.threadid()]
-    for i in Base.OneTo(length(fluxdiff_local))
-      rhs_local[i] = fluxdiff_local[i]
-    end
+  # initialize rhs_volume_local = projection_matrix_gauss_to_face * local_face_flux
+  apply_to_each_field(mul_by!(cache.projection_matrix_gauss_to_face), rhs_volume_local, local_face_flux)
 
-    # stores rhs contributions only at Gauss volume nodes
-    rhs_volume_local = rhs_volume_local_threaded[Threads.threadid()]
-
-    # Here, we exploit that under a Gauss nodal basis the structure of the projection
-    # matrix `Ph = [diagm(1 ./ wq), projection_matrix_gauss_to_face]` such that `Ph * [u; uf] = (u ./ wq) + projection_matrix_gauss_to_face * uf`.
-    local_volume_flux = view(rhs_local, volume_indices)
-    local_face_flux = view(rhs_local, face_indices)
-
-    # initialize rhs_volume_local = projection_matrix_gauss_to_face * local_face_flux
-    apply_to_each_field(mul_by!(projection_matrix_gauss_to_face), rhs_volume_local, local_face_flux)
+  # accumulate volume contributions at Gauss nodes
+  for i in eachindex(rhs_volume_local)
+    du[i, element] = alpha * (rhs_volume_local[i] + local_volume_flux[i] * cache.inv_gauss_weights[i])
+  end
+end
 
-    # accumulate volume contributions at Gauss nodes
-    for i in eachindex(rhs_volume_local)
-      du[i, e] = rhs_volume_local[i] + local_volume_flux[i] * inv_gauss_weights[i]
-    end
+function calc_volume_integral!(du, u, mesh::DGMultiMesh,
+                               have_nonconservative_terms, equations,
+                               volume_integral::VolumeIntegralFluxDifferencing,
+                               dg::DGMultiFluxDiff{<:GaussSBP}, cache)
 
+  @threaded for e in eachelement(mesh, dg, cache)
+    flux_differencing_kernel!(du, u, e, mesh,
+                              have_nonconservative_terms, equations,
+                              volume_integral.volume_flux, dg, cache)
   end
 
 end
diff --git a/src/solvers/dgmulti/types.jl b/src/solvers/dgmulti/types.jl
index 9d1c378cd79..20358556d8b 100644
--- a/src/solvers/dgmulti/types.jl
+++ b/src/solvers/dgmulti/types.jl
@@ -78,6 +78,22 @@ function DGMulti(element_type::AbstractElemShape,
   return DG(rd, nothing #= mortar =#, surface_integral, volume_integral)
 end
 
+DGMulti(basis::RefElemData; volume_integral, surface_integral) =
+  DG(basis, nothing #= mortar =#, surface_integral, volume_integral)
+
+"""
+    DGMultiBasis(element_type, polydeg; approximation_type = Polynomial(), kwargs...)
+
+Constructs a basis for DGMulti solvers. Returns a "StartUpDG.RefElemData" object.
+  The `kwargs` arguments are additional keyword arguments for `RefElemData`, such as `quad_rule_vol`.
+  These are the same as the `RefElemData_kwargs` used in [`DGMulti`](@ref).
+  For more info, see the [StartUpDG.jl docs](https://jlchan.github.io/StartUpDG.jl/dev/).
+
+"""
+DGMultiBasis(element_type, polydeg; approximation_type = Polynomial(), kwargs...) =
+  RefElemData(element_type, approximation_type, polydeg; kwargs...)
+
+
 ########################################
 #            DGMultiMesh
 ########################################

From db907d7dde2da2d9fe252ae1bf246a5d63e67eea Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Tue, 16 May 2023 01:23:55 -0500
Subject: [PATCH 011/163] change calc_surface_integral! signature for
 consistency (#1467)

Co-authored-by: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 src/solvers/dgmulti/dg.jl                          | 10 +++++-----
 src/solvers/dgmulti/flux_differencing.jl           |  6 +++---
 src/solvers/dgmulti/flux_differencing_gauss_sbp.jl |  8 ++++----
 src/solvers/dgmulti/sbp.jl                         |  4 ++--
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl
index 5d087f0deb2..c9b7f5f021d 100644
--- a/src/solvers/dgmulti/dg.jl
+++ b/src/solvers/dgmulti/dg.jl
@@ -389,8 +389,8 @@ end
 
 # assumes cache.flux_face_values is computed and filled with
 # for polyomial discretizations, use dense LIFT matrix for surface contributions.
-function calc_surface_integral!(du, u, surface_integral::SurfaceIntegralWeakForm,
-                                mesh::DGMultiMesh, equations,
+function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations,
+                                surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGMulti, cache)
   rd = dg.basis
   apply_to_each_field(mul_by_accum!(rd.LIFT), du, cache.flux_face_values)
@@ -412,8 +412,8 @@ end
 
 # Specialize for nodal SBP discretizations. Uses that du = LIFT*u is equivalent to
 # du[Fmask,:] .= u ./ rd.wq[rd.Fmask]
-function calc_surface_integral!(du, u, surface_integral::SurfaceIntegralWeakForm,
-                                mesh::DGMultiMesh, equations,
+function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations,
+                                surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGMultiSBP, cache)
   rd = dg.basis
   @unpack flux_face_values, lift_scalings = cache
@@ -628,7 +628,7 @@ function rhs!(du, u, t, mesh, equations,
     have_nonconservative_terms(equations), equations, dg)
 
   @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, dg.surface_integral, mesh, equations, dg, cache)
+    du, u, mesh, equations, dg.surface_integral, dg, cache)
 
   @trixi_timeit timer() "Jacobian" invert_jacobian!(
     du, mesh, equations, dg, cache)
diff --git a/src/solvers/dgmulti/flux_differencing.jl b/src/solvers/dgmulti/flux_differencing.jl
index 51339193c71..1031c837efa 100644
--- a/src/solvers/dgmulti/flux_differencing.jl
+++ b/src/solvers/dgmulti/flux_differencing.jl
@@ -596,8 +596,8 @@ function rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions:
   @trixi_timeit timer() "boundary flux" calc_boundary_flux!(cache, t, boundary_conditions, mesh,
                                                             have_nonconservative_terms(equations), equations, dg)
 
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(du, u, dg.surface_integral,
-                                                                  mesh, equations, dg, cache)
+  @trixi_timeit timer() "surface integral" calc_surface_integral!(du, u, mesh, equations,
+                                                                  dg.surface_integral, dg, cache)
 
   @trixi_timeit timer() "Jacobian" invert_jacobian!(du, mesh, equations, dg, cache)
 
@@ -633,7 +633,7 @@ function rhs!(du, u, t, mesh, equations,
     have_nonconservative_terms(equations), equations, dg)
 
   @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, dg.surface_integral, mesh, equations, dg, cache)
+    du, u, mesh, equations, dg.surface_integral, dg, cache)
 
   @trixi_timeit timer() "Jacobian" invert_jacobian!(
     du, mesh, equations, dg, cache)
diff --git a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
index 35de95a7ddb..09ee3d7d19c 100644
--- a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
+++ b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
@@ -394,8 +394,8 @@ end
 
 # Assumes cache.flux_face_values is already computed.
 # Enables tensor product evaluation of `LIFT isa TensorProductGaussFaceOperator`.
-function calc_surface_integral!(du, u, surface_integral::SurfaceIntegralWeakForm,
-                                mesh::DGMultiMesh, equations,
+function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations,
+                                surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGMultiFluxDiff{<:GaussSBP}, cache)
   @unpack gauss_volume_local_threaded = cache
   @unpack interp_matrix_gauss_to_lobatto, gauss_LIFT = cache
@@ -514,8 +514,8 @@ function rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions:
                                                             have_nonconservative_terms(equations), equations, dg)
 
   # `du` is stored at Gauss nodes here
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(du, u, dg.surface_integral,
-                                                                  mesh, equations, dg, cache)
+  @trixi_timeit timer() "surface integral" calc_surface_integral!(du, u, mesh, equations,
+                                                                  dg.surface_integral, dg, cache)
 
   # invert Jacobian and map `du` from Gauss to Lobatto nodes
   @trixi_timeit timer() "Jacobian" invert_jacobian_and_interpolate!(du, mesh, equations, dg, cache)
diff --git a/src/solvers/dgmulti/sbp.jl b/src/solvers/dgmulti/sbp.jl
index f0070bc8a84..18be52b7ba3 100644
--- a/src/solvers/dgmulti/sbp.jl
+++ b/src/solvers/dgmulti/sbp.jl
@@ -456,8 +456,8 @@ function calc_interface_flux!(cache, surface_integral::SurfaceIntegralWeakForm,
   nothing
 end
 
-function calc_surface_integral!(du, u, surface_integral::SurfaceIntegralWeakForm,
-                                mesh::DGMultiMesh, equations,
+function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations,
+                                surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGMultiPeriodicFDSBP, cache)
   @assert nelements(mesh, dg, cache) == 1
   nothing

From bc68423342e9f4da89081ab4a141d726eb220d2b Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Tue, 16 May 2023 10:27:59 +0200
Subject: [PATCH 012/163] allow MPI.jl v0.20.* (#1472)

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 333f571e728..9796ae5e04a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -53,7 +53,7 @@ HDF5 = "0.14, 0.15, 0.16"
 IfElse = "0.1"
 LinearMaps = "2.7, 3.0"
 LoopVectorization = "0.12.118"
-MPI = "0.20 - 0.20.8"
+MPI = "0.20"
 MuladdMacro = "0.2.2"
 Octavian = "0.3.5"
 OffsetArrays = "1.3"

From 026d13d4dfdaf038e96039ea00117abe55314d57 Mon Sep 17 00:00:00 2001
From: Michael Schlottke-Lakemper <michael@sloede.com>
Date: Tue, 16 May 2023 15:46:22 +0200
Subject: [PATCH 013/163] Fix broken ref in documentation (#1473)

---
 docs/src/development.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/development.md b/docs/src/development.md
index f3f65057eb7..e6a24f0cf06 100644
--- a/docs/src/development.md
+++ b/docs/src/development.md
@@ -260,7 +260,7 @@ in your package without having to restart Julia.
     When running Julia inside a package environment, e.g., inside the source
     code of Trixi.jl itself, the `@infiltrate` macro only works if
     `Infiltrator` has been added to the package dependencies. To avoid this,
-    you can use the (non-exported) [`@autoinfiltrate`](@ref) macro
+    you can use the (non-exported) `@autoinfiltrate` macro
     in Trixi.jl, which only requires Infiltrator.jl to be available in the
     current environment stack and will auto-load it for you.
 

From 5ab925a05fe1f4f9eea64ff203f8e17e7630ac0e Mon Sep 17 00:00:00 2001
From: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com>
Date: Tue, 16 May 2023 15:53:30 +0200
Subject: [PATCH 014/163] Add function barrier to fix HG shock capturing on
 macOS ARM (#1462)

* Add function barrier to fix HG shock capturing on macOS ARM

* Implement suggestions

* Add comment

* rename to calc_indicator_hennemann_gassner!

---------

Co-authored-by: Hendrik Ranocha <mail@ranocha.de>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 src/solvers/dgsem_tree/indicators.jl    |  34 +++++++
 src/solvers/dgsem_tree/indicators_1d.jl | 117 ++++++++++-------------
 src/solvers/dgsem_tree/indicators_2d.jl | 120 +++++++++++-------------
 src/solvers/dgsem_tree/indicators_3d.jl | 119 ++++++++++-------------
 4 files changed, 190 insertions(+), 200 deletions(-)

diff --git a/src/solvers/dgsem_tree/indicators.jl b/src/solvers/dgsem_tree/indicators.jl
index 0cf099d95f2..30d3b2c0448 100644
--- a/src/solvers/dgsem_tree/indicators.jl
+++ b/src/solvers/dgsem_tree/indicators.jl
@@ -102,6 +102,40 @@ function Base.show(io::IO, ::MIME"text/plain", indicator::IndicatorHennemannGass
 end
 
 
+function (indicator_hg::IndicatorHennemannGassner)(u, mesh, equations, dg::DGSEM, cache;
+                                                   kwargs...)
+  @unpack alpha_smooth = indicator_hg
+  @unpack alpha, alpha_tmp = indicator_hg.cache
+  # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
+  #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
+  #       or just `resize!` whenever we call the relevant methods as we do now?
+  resize!(alpha, nelements(dg, cache))
+  if alpha_smooth
+    resize!(alpha_tmp, nelements(dg, cache))
+  end
+
+  # magic parameters
+  threshold = 0.5 * 10^(-1.8 * (nnodes(dg))^0.25)
+  parameter_s = log((1 - 0.0001) / 0.0001)
+
+  @threaded for element in eachelement(dg, cache)
+    # This is dispatched by mesh dimension.
+    # Use this function barrier and unpack inside to avoid passing closures to
+    # Polyester.jl with `@batch` (`@threaded`).
+    # Otherwise, `@threaded` does not work here with Julia ARM on macOS.
+    # See https://github.com/JuliaSIMD/Polyester.jl/issues/88.
+    calc_indicator_hennemann_gassner!(
+      indicator_hg, threshold, parameter_s, u,
+      element, mesh, equations, dg, cache)
+  end
+
+  if alpha_smooth
+    apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
+  end
+
+  return alpha
+end
+
 
 """
     IndicatorLöhner (equivalent to IndicatorLoehner)
diff --git a/src/solvers/dgsem_tree/indicators_1d.jl b/src/solvers/dgsem_tree/indicators_1d.jl
index b2bbb282725..c1a88161245 100644
--- a/src/solvers/dgsem_tree/indicators_1d.jl
+++ b/src/solvers/dgsem_tree/indicators_1d.jl
@@ -24,84 +24,69 @@ function create_cache(typ::Type{IndicatorHennemannGassner}, mesh, equations::Abs
 end
 
 
-function (indicator_hg::IndicatorHennemannGassner)(u, mesh::Union{TreeMesh{1}, StructuredMesh{1}},
-                                                   equations, dg::DGSEM, cache;
-                                                   kwargs...)
+# Use this function barrier and unpack inside to avoid passing closures to Polyester.jl
+# with @batch (@threaded).
+# Otherwise, @threaded does not work here with Julia ARM on macOS.
+# See https://github.com/JuliaSIMD/Polyester.jl/issues/88.
+@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, u,
+                                                   element, mesh::AbstractMesh{1},
+                                                   equations, dg, cache)
   @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg
   @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded = indicator_hg.cache
-  # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
-  #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
-  #       or just `resize!` whenever we call the relevant methods as we do now?
-  resize!(alpha, nelements(dg, cache))
-  if alpha_smooth
-    resize!(alpha_tmp, nelements(dg, cache))
-  end
-
-  # magic parameters
-  threshold = 0.5 * 10^(-1.8 * (nnodes(dg))^0.25)
-  parameter_s = log((1 - 0.0001)/0.0001)
 
-  @threaded for element in eachelement(dg, cache)
-    indicator = indicator_threaded[Threads.threadid()]
-    modal     = modal_threaded[Threads.threadid()]
+  indicator = indicator_threaded[Threads.threadid()]
+  modal     = modal_threaded[Threads.threadid()]
 
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, element)
-      indicator[i] = indicator_hg.variable(u_local, equations)
-    end
-
-    # Convert to modal representation
-    multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator)
-
-    # Calculate total energies for all modes, without highest, without two highest
-    total_energy = zero(eltype(modal))
-    for i in 1:nnodes(dg)
-      total_energy += modal[i]^2
-    end
-    total_energy_clip1 = zero(eltype(modal))
-    for i in 1:(nnodes(dg)-1)
-      total_energy_clip1 += modal[i]^2
-    end
-    total_energy_clip2 = zero(eltype(modal))
-    for i in 1:(nnodes(dg)-2)
-      total_energy_clip2 += modal[i]^2
-    end
+  # Calculate indicator variables at Gauss-Lobatto nodes
+  for i in eachnode(dg)
+    u_local = get_node_vars(u, equations, dg, i, element)
+    indicator[i] = indicator_hg.variable(u_local, equations)
+  end
 
-    # Calculate energy in higher modes
-    if !(iszero(total_energy))
-      energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
-    else
-      energy_frac_1 = zero(total_energy)
-    end
-    if !(iszero(total_energy_clip1))
-      energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
-    else
-      energy_frac_2 = zero(total_energy_clip1)
-    end
-    energy = max(energy_frac_1, energy_frac_2)
+  # Convert to modal representation
+  multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator)
 
-    alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
+  # Calculate total energies for all modes, without highest, without two highest
+  total_energy = zero(eltype(modal))
+  for i in 1:nnodes(dg)
+    total_energy += modal[i]^2
+  end
+  total_energy_clip1 = zero(eltype(modal))
+  for i in 1:(nnodes(dg)-1)
+    total_energy_clip1 += modal[i]^2
+  end
+  total_energy_clip2 = zero(eltype(modal))
+  for i in 1:(nnodes(dg)-2)
+    total_energy_clip2 += modal[i]^2
+  end
 
-    # Take care of the case close to pure DG
-    if alpha_element < alpha_min
-      alpha_element = zero(alpha_element)
-    end
+  # Calculate energy in higher modes
+  if !(iszero(total_energy))
+    energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
+  else
+    energy_frac_1 = zero(total_energy)
+  end
+  if !(iszero(total_energy_clip1))
+    energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
+  else
+    energy_frac_2 = zero(total_energy_clip1)
+  end
+  energy = max(energy_frac_1, energy_frac_2)
 
-    # Take care of the case close to pure FV
-    if alpha_element > 1 - alpha_min
-      alpha_element = one(alpha_element)
-    end
+  alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
 
-    # Clip the maximum amount of FV allowed
-    alpha[element] = min(alpha_max, alpha_element)
+  # Take care of the case close to pure DG
+  if alpha_element < alpha_min
+    alpha_element = zero(alpha_element)
   end
 
-  if alpha_smooth
-    apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
+  # Take care of the case close to pure FV
+  if alpha_element > 1 - alpha_min
+    alpha_element = one(alpha_element)
   end
 
-  return alpha
+  # Clip the maximum amount of FV allowed
+  alpha[element] = min(alpha_max, alpha_element)
 end
 
 # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
@@ -411,4 +396,4 @@ function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkRayHesthaven})(
   return alpha
 end
 
-end # @muladd
\ No newline at end of file
+end # @muladd
diff --git a/src/solvers/dgsem_tree/indicators_2d.jl b/src/solvers/dgsem_tree/indicators_2d.jl
index fe1c5908152..eb08657563b 100644
--- a/src/solvers/dgsem_tree/indicators_2d.jl
+++ b/src/solvers/dgsem_tree/indicators_2d.jl
@@ -25,85 +25,71 @@ function create_cache(typ::Type{IndicatorHennemannGassner}, mesh, equations::Abs
 end
 
 
-function (indicator_hg::IndicatorHennemannGassner)(u::AbstractArray{<:Any,4},
-                                                   mesh, equations, dg::DGSEM, cache;
-                                                   kwargs...)
+# Use this function barrier and unpack inside to avoid passing closures to Polyester.jl
+# with @batch (@threaded).
+# Otherwise, @threaded does not work here with Julia ARM on macOS.
+# See https://github.com/JuliaSIMD/Polyester.jl/issues/88.
+@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, u,
+                                                   element, mesh::AbstractMesh{2},
+                                                   equations, dg, cache)
   @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg
-  @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded = indicator_hg.cache
-  # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
-  #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
-  #       or just `resize!` whenever we call the relevant methods as we do now?
-  resize!(alpha, nelements(dg, cache))
-  if alpha_smooth
-    resize!(alpha_tmp, nelements(dg, cache))
-  end
-
-  # magic parameters
-  threshold = 0.5 * 10^(-1.8 * (nnodes(dg))^0.25)
-  parameter_s = log((1 - 0.0001)/0.0001)
+  @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded,
+          modal_tmp1_threaded = indicator_hg.cache
 
-  @threaded for element in eachelement(dg, cache)
-    indicator  = indicator_threaded[Threads.threadid()]
-    modal      = modal_threaded[Threads.threadid()]
-    modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
+  indicator  = indicator_threaded[Threads.threadid()]
+  modal      = modal_threaded[Threads.threadid()]
+  modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
 
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for j in eachnode(dg), i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, j, element)
-      indicator[i, j] = indicator_hg.variable(u_local, equations)
-    end
-
-    # Convert to modal representation
-    multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator, modal_tmp1)
-
-    # Calculate total energies for all modes, without highest, without two highest
-    total_energy = zero(eltype(modal))
-    for j in 1:nnodes(dg), i in 1:nnodes(dg)
-      total_energy += modal[i, j]^2
-    end
-    total_energy_clip1 = zero(eltype(modal))
-    for j in 1:(nnodes(dg)-1), i in 1:(nnodes(dg)-1)
-      total_energy_clip1 += modal[i, j]^2
-    end
-    total_energy_clip2 = zero(eltype(modal))
-    for j in 1:(nnodes(dg)-2), i in 1:(nnodes(dg)-2)
-      total_energy_clip2 += modal[i, j]^2
-    end
+  # Calculate indicator variables at Gauss-Lobatto nodes
+  for j in eachnode(dg), i in eachnode(dg)
+    u_local = get_node_vars(u, equations, dg, i, j, element)
+    indicator[i, j] = indicator_hg.variable(u_local, equations)
+  end
 
-    # Calculate energy in higher modes
-    if !(iszero(total_energy))
-      energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
-    else
-      energy_frac_1 = zero(total_energy)
-    end
-    if !(iszero(total_energy_clip1))
-      energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
-    else
-      energy_frac_2 = zero(total_energy_clip1)
-    end
-    energy = max(energy_frac_1, energy_frac_2)
+  # Convert to modal representation
+  multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator, modal_tmp1)
 
-    alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
+  # Calculate total energies for all modes, without highest, without two highest
+  total_energy = zero(eltype(modal))
+  for j in 1:nnodes(dg), i in 1:nnodes(dg)
+    total_energy += modal[i, j]^2
+  end
+  total_energy_clip1 = zero(eltype(modal))
+  for j in 1:(nnodes(dg)-1), i in 1:(nnodes(dg)-1)
+    total_energy_clip1 += modal[i, j]^2
+  end
+  total_energy_clip2 = zero(eltype(modal))
+  for j in 1:(nnodes(dg)-2), i in 1:(nnodes(dg)-2)
+    total_energy_clip2 += modal[i, j]^2
+  end
 
-    # Take care of the case close to pure DG
-    if alpha_element < alpha_min
-      alpha_element = zero(alpha_element)
-    end
+  # Calculate energy in higher modes
+  if !(iszero(total_energy))
+    energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
+  else
+    energy_frac_1 = zero(total_energy)
+  end
+  if !(iszero(total_energy_clip1))
+    energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
+  else
+    energy_frac_2 = zero(total_energy_clip1)
+  end
+  energy = max(energy_frac_1, energy_frac_2)
 
-    # Take care of the case close to pure FV
-    if alpha_element > 1 - alpha_min
-      alpha_element = one(alpha_element)
-    end
+  alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
 
-    # Clip the maximum amount of FV allowed
-    alpha[element] = min(alpha_max, alpha_element)
+  # Take care of the case close to pure DG
+  if alpha_element < alpha_min
+    alpha_element = zero(alpha_element)
   end
 
-  if alpha_smooth
-    apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
+  # Take care of the case close to pure FV
+  if alpha_element > 1 - alpha_min
+    alpha_element = one(alpha_element)
   end
 
-  return alpha
+  # Clip the maximum amount of FV allowed
+  alpha[element] = min(alpha_max, alpha_element)
 end
 
 
diff --git a/src/solvers/dgsem_tree/indicators_3d.jl b/src/solvers/dgsem_tree/indicators_3d.jl
index abb4b061aad..c1e7aee886a 100644
--- a/src/solvers/dgsem_tree/indicators_3d.jl
+++ b/src/solvers/dgsem_tree/indicators_3d.jl
@@ -26,87 +26,72 @@ function create_cache(typ::Type{IndicatorHennemannGassner}, mesh, equations::Abs
 end
 
 
-function (indicator_hg::IndicatorHennemannGassner)(u::AbstractArray{<:Any,5},
-                                                   mesh, equations, dg::DGSEM, cache;
-                                                   kwargs...)
+# Use this function barrier and unpack inside to avoid passing closures to Polyester.jl
+# with @batch (@threaded).
+# Otherwise, @threaded does not work here with Julia ARM on macOS.
+# See https://github.com/JuliaSIMD/Polyester.jl/issues/88.
+@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, u,
+                                                   element, mesh::AbstractMesh{3},
+                                                   equations, dg, cache)
   @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg
   @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded,
           modal_tmp1_threaded, modal_tmp2_threaded = indicator_hg.cache
-  # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
-  #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
-  #       or just `resize!` whenever we call the relevant methods as we do now?
-  resize!(alpha, nelements(dg, cache))
-  if alpha_smooth
-    resize!(alpha_tmp, nelements(dg, cache))
-  end
-
-  # magic parameters
-  threshold = 0.5 * 10^(-1.8 * (nnodes(dg))^0.25)
-  parameter_s = log((1 - 0.0001)/0.0001)
-
-  @threaded for element in eachelement(dg, cache)
-    indicator  = indicator_threaded[Threads.threadid()]
-    modal      = modal_threaded[Threads.threadid()]
-    modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
-    modal_tmp2 = modal_tmp2_threaded[Threads.threadid()]
-
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, j, k, element)
-      indicator[i, j, k] = indicator_hg.variable(u_local, equations)
-    end
 
-    # Convert to modal representation
-    multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator, modal_tmp1, modal_tmp2)
+  indicator  = indicator_threaded[Threads.threadid()]
+  modal      = modal_threaded[Threads.threadid()]
+  modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
+  modal_tmp2 = modal_tmp2_threaded[Threads.threadid()]
 
-    # Calculate total energies for all modes, without highest, without two highest
-    total_energy = zero(eltype(modal))
-    for k in 1:nnodes(dg), j in 1:nnodes(dg), i in 1:nnodes(dg)
-      total_energy += modal[i, j, k]^2
-    end
-    total_energy_clip1 = zero(eltype(modal))
-    for k in 1:(nnodes(dg)-1), j in 1:(nnodes(dg)-1), i in 1:(nnodes(dg)-1)
-      total_energy_clip1 += modal[i, j, k]^2
-    end
-    total_energy_clip2 = zero(eltype(modal))
-    for k in 1:(nnodes(dg)-2), j in 1:(nnodes(dg)-2), i in 1:(nnodes(dg)-2)
-      total_energy_clip2 += modal[i, j, k]^2
-    end
+  # Calculate indicator variables at Gauss-Lobatto nodes
+  for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+    u_local = get_node_vars(u, equations, dg, i, j, k, element)
+    indicator[i, j, k] = indicator_hg.variable(u_local, equations)
+  end
 
-    # Calculate energy in higher modes
-    if !(iszero(total_energy))
-      energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
-    else
-      energy_frac_1 = zero(total_energy)
-    end
-    if !(iszero(total_energy_clip1))
-      energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
-    else
-      energy_frac_2 = zero(total_energy_clip1)
-    end
-    energy = max(energy_frac_1, energy_frac_2)
+  # Convert to modal representation
+  multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator, modal_tmp1, modal_tmp2)
 
-    alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
+  # Calculate total energies for all modes, without highest, without two highest
+  total_energy = zero(eltype(modal))
+  for k in 1:nnodes(dg), j in 1:nnodes(dg), i in 1:nnodes(dg)
+    total_energy += modal[i, j, k]^2
+  end
+  total_energy_clip1 = zero(eltype(modal))
+  for k in 1:(nnodes(dg)-1), j in 1:(nnodes(dg)-1), i in 1:(nnodes(dg)-1)
+    total_energy_clip1 += modal[i, j, k]^2
+  end
+  total_energy_clip2 = zero(eltype(modal))
+  for k in 1:(nnodes(dg)-2), j in 1:(nnodes(dg)-2), i in 1:(nnodes(dg)-2)
+    total_energy_clip2 += modal[i, j, k]^2
+  end
 
-    # Take care of the case close to pure DG
-    if alpha_element < alpha_min
-      alpha_element = zero(alpha_element)
-    end
+  # Calculate energy in higher modes
+  if !(iszero(total_energy))
+    energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
+  else
+    energy_frac_1 = zero(total_energy)
+  end
+  if !(iszero(total_energy_clip1))
+    energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
+  else
+    energy_frac_2 = zero(total_energy_clip1)
+  end
+  energy = max(energy_frac_1, energy_frac_2)
 
-    # Take care of the case close to pure FV
-    if alpha_element > 1 - alpha_min
-      alpha_element = one(alpha_element)
-    end
+  alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
 
-    # Clip the maximum amount of FV allowed
-    alpha[element] = min(alpha_max, alpha_element)
+  # Take care of the case close to pure DG
+  if alpha_element < alpha_min
+    alpha_element = zero(alpha_element)
   end
 
-  if alpha_smooth
-    apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
+  # Take care of the case close to pure FV
+  if alpha_element > 1 - alpha_min
+    alpha_element = one(alpha_element)
   end
 
-  return alpha
+  # Clip the maximum amount of FV allowed
+  alpha[element] = min(alpha_max, alpha_element)
 end
 
 
From 2808d4ead2af58956867b77a58a0f4dffcfdc7f5 Mon Sep 17 00:00:00 2001
From: MarkLopat <122838494+MarkLopat@users.noreply.github.com>
Date: Tue, 16 May 2023 13:25:24 -0500
Subject: [PATCH 015/163] Update scalar_linear_advection_1d.jl (#1477)

---
 docs/literate/src/files/scalar_linear_advection_1d.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/literate/src/files/scalar_linear_advection_1d.jl b/docs/literate/src/files/scalar_linear_advection_1d.jl
index 0261401b9be..8a8a1b19963 100644
--- a/docs/literate/src/files/scalar_linear_advection_1d.jl
+++ b/docs/literate/src/files/scalar_linear_advection_1d.jl
@@ -44,10 +44,11 @@ dx = (coordinates_max - coordinates_min) / n_elements # length of one element
 # ```
 # Here, $J$ is the Jacobian determinant of the transformation.
 
-# Using this transformation, we can transform our equation for all elements $Q_l$.
+# Using this transformation, we can transform our equation for each element $Q_l$.
 # ```math
 # \frac{dx}{2} u_t^{Q_l} + u_\xi^{Q_l} = 0 \text{, for }t\in\mathbb{R}^+,\; \xi\in[-1, 1]
 # ```
+# $u_t^{Q_l}$ and $u_\xi^{Q_l}$ denote the time and spatial derivatives of the solution on the element $Q_l$.
 
 
 # ### ii. Polynomial approach

From 9f081dc5ee40cf5c5ffb6b3770b76ba7d2a95217 Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Tue, 16 May 2023 18:07:50 -0500
Subject: [PATCH 016/163] Add Hennemann-Gassner shock capturing for `DGMulti`
 `GaussSBP` solvers (#1466)

* add volume integral cache for Gauss schemes


fix

* fix splatting

* initial draft of indicator routines

* fix typo, add TODO

* draft of indicator + smoothing

* some interfaces for DGMulti shock capturing

* fix ambiguity

* fix nelements ambiguity

* add DGMultiBasis

* draft of shock capturing routines

* add test elixir

* pass in volume flux instead of volume integral

* dropped one

* refactoring

* factor flux_differencing_kernel! out

* use flux_differencing_kernel!

* factor flux_differencing_kernel! out

* consistent formatting

* Update src/solvers/dgmulti/types.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* add kwargs reference

* add volume_flux specialization

* Revert "add volume_flux specialization"

This reverts commit 7e0c458aff0c94a4baffbf7ef1197f37200e57c4.

* inlining `flux_differencing_kernel!`

* draft of the low order (FV) volume kernel

* remove some unpacks

* working shock capturing

* add example elixir for shock capturing

* add test

* remove some @unpacks

* reduce allocations

* reduce more allocations

* factor out rhs_local projection onto Gauss nodes

* remove one more reshape to remove allocations

* fix indicator computations

* comments

* update l2, linf for tests

* Update src/solvers/dgmulti/shock_capturing.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* add Base.ReshapedArray comments

* improve clarity of variable names


fix FToF naming

* removing normalization

* use adjoint

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 .../dgmulti_2d/elixir_euler_shockcapturing.jl |  51 +++
 src/solvers/dgmulti.jl                        |   3 +
 .../dgmulti/flux_differencing_gauss_sbp.jl    |  51 +--
 src/solvers/dgmulti/shock_capturing.jl        | 290 ++++++++++++++++++
 src/solvers/dgmulti/types.jl                  |  14 +-
 test/test_dgmulti_2d.jl                       |   8 +
 6 files changed, 395 insertions(+), 22 deletions(-)
 create mode 100644 examples/dgmulti_2d/elixir_euler_shockcapturing.jl
 create mode 100644 src/solvers/dgmulti/shock_capturing.jl

diff --git a/examples/dgmulti_2d/elixir_euler_shockcapturing.jl b/examples/dgmulti_2d/elixir_euler_shockcapturing.jl
new file mode 100644
index 00000000000..4b2a408c757
--- /dev/null
+++ b/examples/dgmulti_2d/elixir_euler_shockcapturing.jl
@@ -0,0 +1,51 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations2D(1.4)
+
+initial_condition = initial_condition_weak_blast_wave
+
+surface_flux = flux_lax_friedrichs
+volume_flux  = flux_ranocha
+
+polydeg = 3
+basis = DGMultiBasis(Quad(), polydeg, approximation_type=GaussSBP())
+
+indicator_sc = IndicatorHennemannGassner(equations, basis,
+                                         alpha_max=0.5,
+                                         alpha_min=0.001,
+                                         alpha_smooth=true,
+                                         variable=density_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg=volume_flux,
+                                                 volume_flux_fv=surface_flux)
+dg = DGMulti(basis,
+             surface_integral = SurfaceIntegralWeakForm(surface_flux),
+             volume_integral = volume_integral)
+
+cells_per_dimension = (8, 8)
+mesh = DGMultiMesh(dg, cells_per_dimension, periodicity=true)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, dg)
+
+tspan = (0.0, 0.15)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+alive_callback = AliveCallback(alive_interval=10)
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, uEltype=real(dg))
+callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, RDPK3SpFSAL49(); abstol=1.0e-6, reltol=1.0e-6,
+            ode_default_options()..., callback=callbacks);
+
+summary_callback() # print the timer summary
+
diff --git a/src/solvers/dgmulti.jl b/src/solvers/dgmulti.jl
index 318a11b678e..8ff27db0cd9 100644
--- a/src/solvers/dgmulti.jl
+++ b/src/solvers/dgmulti.jl
@@ -10,5 +10,8 @@ include("dgmulti/sbp.jl")
 # specialization of DGMulti to specific equations
 include("dgmulti/flux_differencing_compressible_euler.jl")
 
+# shock capturing
+include("dgmulti/shock_capturing.jl")
+
 # parabolic terms for DGMulti solvers
 include("dgmulti/dg_parabolic.jl")
\ No newline at end of file
diff --git a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
index 09ee3d7d19c..95a471fa71b 100644
--- a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
+++ b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
@@ -133,15 +133,15 @@ end
 # Interpolates values from volume Gauss nodes to face nodes on one element.
 @inline function tensor_product_gauss_face_operator!(out::AbstractVector,
                                                      A::TensorProductGaussFaceOperator{2, Interpolation},
-                                                     x::AbstractVector)
+                                                     x_in::AbstractVector)
 
-  @unpack interp_matrix_gauss_to_face_1d, face_indices_tensor_product = A
-  @unpack nnodes_1d, nfaces = A
+  (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A
+  (; nnodes_1d) = A
 
   fill!(out, zero(eltype(out)))
 
   # for 2D GaussSBP nodes, the indexing is first in x, then in y
-  x = reshape(x, nnodes_1d, nnodes_1d)
+  x = reshape(x_in, nnodes_1d, nnodes_1d)
 
   # interpolation in the x-direction
   @turbo for i in Base.OneTo(nnodes_1d) # loop over nodes in a face
@@ -169,8 +169,8 @@ end
                                                      A::TensorProductGaussFaceOperator{3, Interpolation},
                                                      x::AbstractVector)
 
-  @unpack interp_matrix_gauss_to_face_1d, face_indices_tensor_product = A
-  @unpack nnodes_1d, nfaces = A
+  (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A
+  (; nnodes_1d) = A
 
   fill!(out, zero(eltype(out)))
 
@@ -215,13 +215,16 @@ end
                                                      A::TensorProductGaussFaceOperator{2, Projection{ApplyFaceWeights}},
                                                      x::AbstractVector) where {ApplyFaceWeights}
 
-  @unpack interp_matrix_gauss_to_face_1d, face_indices_tensor_product = A
-  @unpack inv_volume_weights_1d, nnodes_1d, nfaces = A
+  (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A
+  (; inv_volume_weights_1d, nnodes_1d) = A
 
   fill!(out_vec, zero(eltype(out_vec)))
 
-  # for 2D GaussSBP nodes, the indexing is first in x, then y
-  out = reshape(out_vec, nnodes_1d, nnodes_1d)
+  # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
+  # Thus, Base.ReshapedArray should be used if you are setting values in the array.
+  # `reshape` is fine if you are only accessing values.
+  # Note that, for 2D GaussSBP nodes, the indexing is first in x, then y
+  out = Base.ReshapedArray(out_vec, (nnodes_1d, nnodes_1d), ())
 
   if ApplyFaceWeights == true
     @turbo for i in eachindex(x)
@@ -266,8 +269,11 @@ end
 
   fill!(out_vec, zero(eltype(out_vec)))
 
-  # for 3D GaussSBP nodes, the indexing is first in y, then x, then z.
-  out = reshape(out_vec, nnodes_1d, nnodes_1d, nnodes_1d)
+  # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
+  # Thus, Base.ReshapedArray should be used if you are setting values in the array.
+  # `reshape` is fine if you are only accessing values.
+  # Note that, for 3D GaussSBP nodes, the indexing is first in y, then x, then z.
+  out = Base.ReshapedArray(out_vec, (nnodes_1d, nnodes_1d, nnodes_1d), ())
 
   if ApplyFaceWeights == true
     @turbo for i in eachindex(x)
@@ -351,9 +357,12 @@ function create_cache(mesh::DGMultiMesh, equations,
   return (; cache..., projection_matrix_gauss_to_face, gauss_LIFT, inv_gauss_weights,
          rhs_volume_local_threaded, gauss_volume_local_threaded,
          interp_matrix_lobatto_to_gauss, interp_matrix_gauss_to_lobatto,
-         interp_matrix_gauss_to_face)
+         interp_matrix_gauss_to_face,
+         create_cache(mesh, equations, dg.volume_integral, dg, RealT, uEltype)...) # add cache specialized on the volume integral
 end
 
+# by default, return an empty tuple for volume integral caches
+create_cache(mesh, equations, volume_integral, dg, RealT, uEltype) = NamedTuple()
 
 # TODO: DGMulti. Address hard-coding of `entropy2cons!` and `cons2entropy!` for this function.
 function entropy_projection!(cache, u, mesh::DGMultiMesh, equations, dg::DGMultiFluxDiff{<:GaussSBP})
@@ -397,8 +406,8 @@ end
 function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations,
                                 surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGMultiFluxDiff{<:GaussSBP}, cache)
-  @unpack gauss_volume_local_threaded = cache
-  @unpack interp_matrix_gauss_to_lobatto, gauss_LIFT = cache
+
+  (; gauss_LIFT, gauss_volume_local_threaded) = cache
 
   @threaded for e in eachelement(mesh, dg, cache)
 
@@ -434,8 +443,12 @@ end
     rhs_local[i] = fluxdiff_local[i]
   end
 
-  # stores rhs contributions only at Gauss volume nodes
-  rhs_volume_local = cache.rhs_volume_local_threaded[Threads.threadid()]
+  project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha)
+
+end
+
+function project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh::DGMultiMesh,
+                                     dg::DGMulti, cache, alpha=true)
 
   # Here, we exploit that under a Gauss nodal basis the structure of the projection
   # matrix `Ph = [diagm(1 ./ wq), projection_matrix_gauss_to_face]` such that
@@ -446,11 +459,13 @@ end
   local_face_flux = view(rhs_local, face_indices)
 
   # initialize rhs_volume_local = projection_matrix_gauss_to_face * local_face_flux
+  rhs_volume_local = cache.rhs_volume_local_threaded[Threads.threadid()]
   apply_to_each_field(mul_by!(cache.projection_matrix_gauss_to_face), rhs_volume_local, local_face_flux)
 
   # accumulate volume contributions at Gauss nodes
   for i in eachindex(rhs_volume_local)
-    du[i, element] = alpha * (rhs_volume_local[i] + local_volume_flux[i] * cache.inv_gauss_weights[i])
+    du_local = rhs_volume_local[i] + local_volume_flux[i] * cache.inv_gauss_weights[i]
+    du[i, element] = du[i, element] + alpha * du_local
   end
 end
 
diff --git a/src/solvers/dgmulti/shock_capturing.jl b/src/solvers/dgmulti/shock_capturing.jl
new file mode 100644
index 00000000000..12408bcf9ba
--- /dev/null
+++ b/src/solvers/dgmulti/shock_capturing.jl
@@ -0,0 +1,290 @@
+# by default, return an empty tuple for volume integral caches
+function create_cache(mesh::DGMultiMesh{NDIMS}, equations,
+                      volume_integral::VolumeIntegralShockCapturingHG,
+                      dg::DGMultiFluxDiff{<:GaussSBP}, RealT, uEltype) where {NDIMS}
+  element_ids_dg   = Int[]
+  element_ids_dgfv = Int[]
+
+  # build element to element (element_to_element_connectivity) connectivity for smoothing of
+  # shock capturing parameters.
+  face_to_face_connectivity = mesh.md.FToF # num_faces x num_elements matrix
+  element_to_element_connectivity = similar(face_to_face_connectivity)
+  for e in axes(face_to_face_connectivity, 2)
+    for f in axes(face_to_face_connectivity, 1)
+      neighbor_face_index = face_to_face_connectivity[f, e]
+
+      # reverse-engineer element index from face. Assumes all elements
+      # have the same number of faces.
+      neighbor_element_index = ((neighbor_face_index - 1) ÷ dg.basis.num_faces) + 1
+      element_to_element_connectivity[f, e] = neighbor_element_index
+    end
+  end
+
+  # create sparse hybridized operators for low order scheme
+  Qrst, E = StartUpDG.sparse_low_order_SBP_operators(dg.basis)
+  Brst = map(n -> Diagonal(n .* dg.basis.wf), dg.basis.nrstJ)
+  sparse_hybridized_SBP_operators = map((Q, B) -> 0.5 * [Q-Q' E'*B; -B*E zeros(size(B))], Qrst, Brst)
+
+  # Find the joint sparsity pattern of the entire matrix. We store the sparsity pattern as
+  # an adjoint for faster iteration through the rows.
+  sparsity_pattern = sum(map(A -> abs.(A)', sparse_hybridized_SBP_operators)) .> 100 * eps()
+
+  return (; element_ids_dg, element_ids_dgfv,
+            sparse_hybridized_SBP_operators, sparsity_pattern,
+            element_to_element_connectivity)
+end
+
+
+# this method is used when the indicator is constructed as for shock-capturing volume integrals
+function create_cache(::Type{IndicatorHennemannGassner}, equations::AbstractEquations,
+                      basis::RefElemData{NDIMS}) where NDIMS
+
+  alpha = Vector{real(basis)}()
+  alpha_tmp = similar(alpha)
+
+  A = Vector{real(basis)}
+  indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
+  modal_threaded     = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
+
+  # initialize inverse Vandermonde matrices at Gauss-Legendre nodes
+  (; N) = basis
+  lobatto_node_coordinates_1D, _ = StartUpDG.gauss_lobatto_quad(0, 0, N)
+  VDM_1D = StartUpDG.vandermonde(Line(), N, lobatto_node_coordinates_1D)
+  inverse_vandermonde = SimpleKronecker(NDIMS, inv(VDM_1D))
+
+  return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde)
+end
+
+
+function (indicator_hg::IndicatorHennemannGassner)(u, mesh::DGMultiMesh,
+                                                   equations, dg::DGMulti{NDIMS}, cache;
+                                                   kwargs...) where {NDIMS}
+  (; alpha_max, alpha_min, alpha_smooth, variable) = indicator_hg
+  (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde) = indicator_hg.cache
+
+  resize!(alpha, nelements(mesh, dg))
+  if alpha_smooth
+    resize!(alpha_tmp, nelements(mesh, dg))
+  end
+
+  # magic parameters
+  threshold = 0.5 * 10^(-1.8 * (dg.basis.N + 1)^0.25)
+  parameter_s = log((1 - 0.0001) / 0.0001)
+
+  @threaded for element in eachelement(mesh, dg)
+    indicator = indicator_threaded[Threads.threadid()]
+    modal_ = modal_threaded[Threads.threadid()]
+
+    # Calculate indicator variable at interpolation (Lobatto) nodes.
+    # TODO: calculate indicator variables at Gauss nodes or using `cache.entropy_projected_u_values`
+    for i in eachnode(dg)
+      indicator[i] = indicator_hg.variable(u[i, element], equations)
+    end
+
+    # multiply by invVDM::SimpleKronecker
+    LinearAlgebra.mul!(modal_, inverse_vandermonde, indicator)
+
+    # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
+    # Thus, Base.ReshapedArray should be used if you are setting values in the array.
+    # `reshape` is fine if you are only accessing values.
+    # Here, we reshape modal coefficients to expose the tensor product structure.
+    modal = Base.ReshapedArray(modal_, ntuple(_ -> dg.basis.N + 1, NDIMS), ())
+
+    # Calculate total energies for all modes, all modes minus the highest mode, and
+    # all modes without the two highest modes
+    total_energy = sum(x -> x^2, modal)
+    clip_1_ranges = ntuple(_ -> Base.OneTo(dg.basis.N), NDIMS)
+    clip_2_ranges = ntuple(_ -> Base.OneTo(dg.basis.N - 1), NDIMS)
+    # These splattings do not seem to allocate as of Julia 1.9.0?
+    total_energy_clip1 = sum(x -> x^2, view(modal, clip_1_ranges...))
+    total_energy_clip2 = sum(x -> x^2, view(modal, clip_2_ranges...))
+
+    # Calculate energy in higher modes
+    if !(iszero(total_energy))
+      energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
+    else
+      energy_frac_1 = zero(total_energy)
+    end
+    if !(iszero(total_energy_clip1))
+      energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
+    else
+      energy_frac_2 = zero(total_energy_clip1)
+    end
+    energy = max(energy_frac_1, energy_frac_2)
+
+    alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
+
+    # Take care of the case close to pure DG
+    if alpha_element < alpha_min
+      alpha_element = zero(alpha_element)
+    end
+
+    # Take care of the case close to pure FV
+    if alpha_element > 1 - alpha_min
+      alpha_element = one(alpha_element)
+    end
+
+    # Clip the maximum amount of FV allowed
+    alpha[element] = min(alpha_max, alpha_element)
+  end
+
+  # smooth element indices after they're all computed
+  if alpha_smooth
+    apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
+  end
+
+  return alpha
+end
+
+# Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
+function apply_smoothing!(mesh::DGMultiMesh, alpha, alpha_tmp, dg::DGMulti, cache)
+
+  # Copy alpha values such that smoothing is indpedenent of the element access order
+  alpha_tmp .= alpha
+
+  # smooth alpha with its neighboring value
+  for element in eachelement(mesh, dg)
+    for face in Base.OneTo(StartUpDG.num_faces(dg.basis.element_type))
+      neighboring_element = cache.element_to_element_connectivity[face, element]
+      alpha_neighbor = alpha_tmp[neighboring_element]
+      alpha[element]  = max(alpha[element], 0.5 * alpha_neighbor)
+    end
+  end
+
+end
+
+#     pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache)
+#
+# Given blending factors `alpha` and the solver `dg`, fill
+# `element_ids_dg` with the IDs of elements using a pure DG scheme and
+# `element_ids_dgfv` with the IDs of elements using a blended DG-FV scheme.
+function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha,
+                                       mesh::DGMultiMesh, dg::DGMulti)
+  empty!(element_ids_dg)
+  empty!(element_ids_dgfv)
+
+  for element in eachelement(mesh, dg)
+    # Clip blending factor for values close to zero (-> pure DG)
+    dg_only = isapprox(alpha[element], 0, atol=1e-12)
+    if dg_only
+      push!(element_ids_dg, element)
+    else
+      push!(element_ids_dgfv, element)
+    end
+  end
+
+  return nothing
+end
+
+function calc_volume_integral!(du, u,
+                               mesh::DGMultiMesh,
+                               have_nonconservative_terms, equations,
+                               volume_integral::VolumeIntegralShockCapturingHG,
+                               dg::DGMultiFluxDiff, cache)
+
+  (; element_ids_dg, element_ids_dgfv) = cache
+  (; volume_flux_dg, volume_flux_fv, indicator) = volume_integral
+
+  # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α
+  alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache)
+
+  # Determine element ids for DG-only and blended DG-FV volume integral
+  pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, mesh, dg)
+
+  # Loop over pure DG elements
+  @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg)
+    element = element_ids_dg[idx_element]
+    flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms,
+                              equations, volume_flux_dg, dg, cache)
+  end
+
+  # Loop over blended DG-FV elements, blend the high and low order RHS contributions
+  # via `rhs_high * (1 - alpha) + rhs_low * (alpha)`.
+  @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv)
+    element = element_ids_dgfv[idx_element]
+    alpha_element = alpha[element]
+
+    # Calculate DG volume integral contribution
+    flux_differencing_kernel!(du, u, element, mesh,
+                              have_nonconservative_terms, equations,
+                              volume_flux_dg, dg, cache, 1 - alpha_element)
+
+    # Calculate "FV" low order volume integral contribution
+    low_order_flux_differencing_kernel!(du, u, element, mesh,
+                                        have_nonconservative_terms, equations,
+                                        volume_flux_fv, dg, cache, alpha_element)
+  end
+
+  return nothing
+end
+
+get_sparse_operator_entries(i, j, mesh::DGMultiMesh{1}, cache) =
+  SVector(cache.sparse_hybridized_SBP_operators[1][i, j])
+
+function get_sparse_operator_entries(i, j, mesh::DGMultiMesh{2}, cache)
+  Qr, Qs = cache.sparse_hybridized_SBP_operators
+  return SVector(Qr[i, j], Qs[i, j])
+end
+
+function get_sparse_operator_entries(i, j, mesh::DGMultiMesh{3}, cache)
+  Qr, Qs, Qt = cache.sparse_hybridized_SBP_operators
+  return SVector(Qr[i, j], Qs[i, j], Qt[i, j])
+end
+
+get_contravariant_matrix(element, mesh::DGMultiMesh{1}, cache) =
+  SMatrix{1, 1}(cache.dxidxhatj[1, 1][1, element])
+
+function get_contravariant_matrix(element, mesh::DGMultiMesh{2, <:Affine}, cache)
+  (; dxidxhatj) = cache
+  return SMatrix{2, 2}(dxidxhatj[1, 1][1, element], dxidxhatj[2, 1][1, element],
+                       dxidxhatj[1, 2][1, element], dxidxhatj[2, 2][1, element])
+end
+
+function get_contravariant_matrix(element, mesh::DGMultiMesh{3, <:Affine}, cache)
+  (; dxidxhatj) = cache
+  return SMatrix{3, 3}(dxidxhatj[1, 1][1, element], dxidxhatj[2, 1][1, element], dxidxhatj[3, 1][1, element],
+                       dxidxhatj[1, 2][1, element], dxidxhatj[2, 2][1, element], dxidxhatj[3, 2][1, element],
+                       dxidxhatj[1, 3][1, element], dxidxhatj[2, 3][1, element], dxidxhatj[3, 3][1, element])
+end
+
+# computes an algebraic low order method with internal dissipation.
+# TODO: implement for curved meshes
+function low_order_flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh,
+                                             have_nonconservative_terms::False, equations,
+                                             volume_flux_fv, dg::DGMultiFluxDiff{<:GaussSBP},
+                                             cache, alpha=true)
+
+  # accumulates output from flux differencing
+  rhs_local = cache.rhs_local_threaded[Threads.threadid()]
+  fill!(rhs_local, zero(eltype(rhs_local)))
+
+  u_local = view(cache.entropy_projected_u_values, :, element)
+
+  (; sparsity_pattern) = cache
+  A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
+  row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base)
+  for i in row_ids
+    u_i = u_local[i]
+    du_i = zero(u_i)
+    for id in nzrange(A_base, i)
+      j = rows[id]
+      u_j = u_local[j]
+
+      # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j
+      geometric_matrix = get_contravariant_matrix(element, mesh, cache)
+      reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache)
+      normal_direction_ij = geometric_matrix * reference_operator_entries
+
+      # note that we do not need to normalize `normal_direction_ij` since
+      # it is typically normalized within the flux computation.
+      f_ij = volume_flux_fv(u_i, u_j, normal_direction_ij, equations)
+      du_i = du_i + 2 * f_ij
+    end
+    rhs_local[i] = du_i
+  end
+
+  # TODO: factor this out to avoid calling it twice during calc_volume_integral!
+  project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha)
+
+end
+
diff --git a/src/solvers/dgmulti/types.jl b/src/solvers/dgmulti/types.jl
index 20358556d8b..c452ed67b2e 100644
--- a/src/solvers/dgmulti/types.jl
+++ b/src/solvers/dgmulti/types.jl
@@ -12,10 +12,10 @@ const DGMultiWeakForm{ApproxType, ElemType} =
   DGMulti{NDIMS, ElemType, ApproxType, <:SurfaceIntegralWeakForm, <:VolumeIntegralWeakForm} where {NDIMS}
 
 const DGMultiFluxDiff{ApproxType, ElemType} =
-  DGMulti{NDIMS, ElemType, ApproxType, <:SurfaceIntegralWeakForm, <:VolumeIntegralFluxDifferencing} where {NDIMS}
+  DGMulti{NDIMS, ElemType, ApproxType, <:SurfaceIntegralWeakForm, <:Union{VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG}} where {NDIMS}
 
 const DGMultiFluxDiffSBP{ApproxType, ElemType} =
-  DGMulti{NDIMS, ElemType, ApproxType, <:SurfaceIntegralWeakForm, <:VolumeIntegralFluxDifferencing} where {NDIMS, ApproxType<:Union{SBP, AbstractDerivativeOperator}}
+  DGMulti{NDIMS, ElemType, ApproxType, <:SurfaceIntegralWeakForm, <:Union{VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG}} where {NDIMS, ApproxType<:Union{SBP, AbstractDerivativeOperator}}
 
 const DGMultiSBP{ApproxType, ElemType} =
   DGMulti{NDIMS, ElemType, ApproxType, SurfaceIntegral, VolumeIntegral} where {NDIMS, ElemType, ApproxType<:Union{SBP, AbstractDerivativeOperator}, SurfaceIntegral, VolumeIntegral}
@@ -303,7 +303,10 @@ function LinearAlgebra.mul!(b_in, A_kronecker::SimpleKronecker{2}, x_in)
     tmp_storage[i] = x_in[i]
   end
   x = reshape(tmp_storage, n, n)
-  b = reshape(b_in, n, n)
+  # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
+  # Thus, Base.ReshapedArray should be used if you are setting values in the array.
+  # `reshape` is fine if you are only accessing values.
+  b = Base.ReshapedArray(b_in, (n, n), ())
 
   @turbo thread=true for j in 1:n, i in 1:n
     tmp = zero(eltype(x))
@@ -340,7 +343,10 @@ function LinearAlgebra.mul!(b_in, A_kronecker::SimpleKronecker{3}, x_in)
     tmp_storage[i] = x_in[i]
   end
   x = reshape(tmp_storage, n, n, n)
-  b = reshape(b_in, n, n, n)
+  # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
+  # Thus, Base.ReshapedArray should be used if you are setting values in the array.
+  # `reshape` is fine if you are only accessing values.
+  b = Base.ReshapedArray(b_in, (n, n, n), ())
 
   @turbo thread=true for k in 1:n, j in 1:n, i in 1:n
     tmp = zero(eltype(x))
diff --git a/test/test_dgmulti_2d.jl b/test/test_dgmulti_2d.jl
index 0c10a176420..3959a4d7c79 100644
--- a/test/test_dgmulti_2d.jl
+++ b/test/test_dgmulti_2d.jl
@@ -169,6 +169,14 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
+  @trixi_testset "elixir_euler_shockcapturing.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_shockcapturing.jl"),
+      cells_per_dimension = 4, tspan = (0.0, 0.1),
+      l2 = [0.05685148333985476, 0.04308122135907089, 0.043081221359070915, 0.21098131003847664],
+      linf = [0.2360672306096051, 0.16684417686971842, 0.1668441768697189, 0.8572572782118661]
+    )
+  end
+
   @trixi_testset "elixir_euler_weakform.jl (FD SBP)" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_weakform.jl"),
       cells_per_dimension = (2, 2),

From 30fa9c90482db4cd7af1248d71c9569f723ef50d Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Wed, 17 May 2023 05:10:26 +0200
Subject: [PATCH 017/163] set version to v0.5.23

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 9796ae5e04a..52a00f15e29 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.23-pre"
+version = "0.5.23"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From fa0412089dce9a90ae6eff2704f4a87e65ee0635 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Wed, 17 May 2023 05:10:41 +0200
Subject: [PATCH 018/163] set development version to v0.5.24-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 52a00f15e29..5ee043b3232 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.23"
+version = "0.5.24-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From ecec28dc20ee00876f034fa7dd47050e9e4470bb Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Wed, 17 May 2023 01:54:45 -0500
Subject: [PATCH 019/163] Support `GaussSBP` shock capturing on curved meshes
 (#1478)

* add volume integral cache for Gauss schemes


fix

* fix splatting

* initial draft of indicator routines

* fix typo, add TODO

* draft of indicator + smoothing

* some interfaces for DGMulti shock capturing

* fix ambiguity

* fix nelements ambiguity

* add DGMultiBasis

* draft of shock capturing routines

* add test elixir

* pass in volume flux instead of volume integral

* dropped one

* refactoring

* factor flux_differencing_kernel! out

* use flux_differencing_kernel!

* factor flux_differencing_kernel! out

* consistent formatting

* Update src/solvers/dgmulti/types.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* add kwargs reference

* add volume_flux specialization

* Revert "add volume_flux specialization"

This reverts commit 7e0c458aff0c94a4baffbf7ef1197f37200e57c4.

* inlining `flux_differencing_kernel!`

* draft of the low order (FV) volume kernel

* remove some unpacks

* working shock capturing

* add example elixir for shock capturing

* add test

* remove some @unpacks

* reduce allocations

* reduce more allocations

* factor out rhs_local projection onto Gauss nodes

* remove one more reshape to remove allocations

* fix indicator computations

* comments

* update l2, linf for tests

* Update src/solvers/dgmulti/shock_capturing.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* add Base.ReshapedArray comments

* improve clarity of variable names


fix FToF naming

* removing normalization

* use adjoint

* adding curved shock capturing

* adding a test

* Update src/solvers/dgmulti/shock_capturing.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/solvers/dgmulti/shock_capturing.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 .../elixir_euler_shockcapturing_curved.jl     | 56 +++++++++++++++++
 src/solvers/dgmulti/shock_capturing.jl        | 61 ++++++++++++++++++-
 test/test_dgmulti_2d.jl                       |  9 +++
 3 files changed, 124 insertions(+), 2 deletions(-)
 create mode 100644 examples/dgmulti_2d/elixir_euler_shockcapturing_curved.jl

diff --git a/examples/dgmulti_2d/elixir_euler_shockcapturing_curved.jl b/examples/dgmulti_2d/elixir_euler_shockcapturing_curved.jl
new file mode 100644
index 00000000000..dad898b99b6
--- /dev/null
+++ b/examples/dgmulti_2d/elixir_euler_shockcapturing_curved.jl
@@ -0,0 +1,56 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations2D(1.4)
+
+initial_condition = initial_condition_weak_blast_wave
+
+surface_flux = flux_lax_friedrichs
+volume_flux  = flux_ranocha
+
+polydeg = 3
+basis = DGMultiBasis(Quad(), polydeg, approximation_type=GaussSBP())
+
+indicator_sc = IndicatorHennemannGassner(equations, basis,
+                                         alpha_max=0.5,
+                                         alpha_min=0.001,
+                                         alpha_smooth=true,
+                                         variable=density_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg=volume_flux,
+                                                 volume_flux_fv=surface_flux)
+dg = DGMulti(basis,
+             surface_integral = SurfaceIntegralWeakForm(surface_flux),
+             volume_integral = volume_integral)
+
+function mapping(xi, eta)
+  x = xi  + 0.1 * sin(pi * xi) * sin(pi * eta)
+  y = eta + 0.1 * sin(pi * xi) * sin(pi * eta)
+  return SVector(x, y)
+end
+cells_per_dimension = (16, 16)
+mesh = DGMultiMesh(dg, cells_per_dimension, mapping)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, dg)
+
+tspan = (0.0, 0.15)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+alive_callback = AliveCallback(alive_interval=10)
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, uEltype=real(dg))
+callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, RDPK3SpFSAL49(); abstol=1.0e-6, reltol=1.0e-6,
+            ode_default_options()..., callback=callbacks);
+
+summary_callback() # print the timer summary
+
diff --git a/src/solvers/dgmulti/shock_capturing.jl b/src/solvers/dgmulti/shock_capturing.jl
index 12408bcf9ba..bbda089ee58 100644
--- a/src/solvers/dgmulti/shock_capturing.jl
+++ b/src/solvers/dgmulti/shock_capturing.jl
@@ -247,8 +247,24 @@ function get_contravariant_matrix(element, mesh::DGMultiMesh{3, <:Affine}, cache
                        dxidxhatj[1, 3][1, element], dxidxhatj[2, 3][1, element], dxidxhatj[3, 3][1, element])
 end
 
+function get_contravariant_matrix(i, element, mesh::DGMultiMesh{2}, cache)
+  (; dxidxhatj) = cache
+  return SMatrix{2, 2}(dxidxhatj[1, 1][i, element], dxidxhatj[2, 1][i, element],
+                       dxidxhatj[1, 2][i, element], dxidxhatj[2, 2][i, element])
+end
+
+function get_contravariant_matrix(i, element, mesh::DGMultiMesh{3}, cache)
+  (; dxidxhatj) = cache
+  return SMatrix{3, 3}(dxidxhatj[1, 1][i, element], dxidxhatj[2, 1][i, element], dxidxhatj[3, 1][i, element],
+                       dxidxhatj[1, 2][i, element], dxidxhatj[2, 2][i, element], dxidxhatj[3, 2][i, element],
+                       dxidxhatj[1, 3][i, element], dxidxhatj[2, 3][i, element], dxidxhatj[3, 3][i, element])
+end
+
+get_avg_contravariant_matrix(i, j, element, mesh::DGMultiMesh, cache) =
+  0.5 * (get_contravariant_matrix(i, element, mesh, cache) + get_contravariant_matrix(j, element, mesh, cache))
+
 # computes an algebraic low order method with internal dissipation.
-# TODO: implement for curved meshes
+# This method is for affine/Cartesian meshes
 function low_order_flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh,
                                              have_nonconservative_terms::False, equations,
                                              volume_flux_fv, dg::DGMultiFluxDiff{<:GaussSBP},
@@ -260,6 +276,47 @@ function low_order_flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh,
 
   u_local = view(cache.entropy_projected_u_values, :, element)
 
+  # constant over each element
+  geometric_matrix = get_contravariant_matrix(element, mesh, cache)
+
+  (; sparsity_pattern) = cache
+  A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
+  row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base)
+  for i in row_ids
+    u_i = u_local[i]
+    du_i = zero(u_i)
+    for id in nzrange(A_base, i)
+      j = rows[id]
+      u_j = u_local[j]
+
+      # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j
+      reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache)
+      normal_direction_ij = geometric_matrix * reference_operator_entries
+
+      # note that we do not need to normalize `normal_direction_ij` since
+      # it is typically normalized within the flux computation.
+      f_ij = volume_flux_fv(u_i, u_j, normal_direction_ij, equations)
+      du_i = du_i + 2 * f_ij
+    end
+    rhs_local[i] = du_i
+  end
+
+  # TODO: factor this out to avoid calling it twice during calc_volume_integral!
+  project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha)
+
+end
+
+function low_order_flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh{NDIMS, <:NonAffine},
+                                             have_nonconservative_terms::False, equations,
+                                             volume_flux_fv, dg::DGMultiFluxDiff{<:GaussSBP},
+                                             cache, alpha=true) where {NDIMS}
+
+  # accumulates output from flux differencing
+  rhs_local = cache.rhs_local_threaded[Threads.threadid()]
+  fill!(rhs_local, zero(eltype(rhs_local)))
+
+  u_local = view(cache.entropy_projected_u_values, :, element)
+
   (; sparsity_pattern) = cache
   A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
   row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base)
@@ -271,7 +328,7 @@ function low_order_flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh,
       u_j = u_local[j]
 
       # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j
-      geometric_matrix = get_contravariant_matrix(element, mesh, cache)
+      geometric_matrix = get_avg_contravariant_matrix(i, j, element, mesh, cache)
       reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache)
       normal_direction_ij = geometric_matrix * reference_operator_entries
 
diff --git a/test/test_dgmulti_2d.jl b/test/test_dgmulti_2d.jl
index 3959a4d7c79..302dbebc8ff 100644
--- a/test/test_dgmulti_2d.jl
+++ b/test/test_dgmulti_2d.jl
@@ -177,6 +177,15 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
+  @trixi_testset "elixir_euler_shockcapturing_curved.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_shockcapturing_curved.jl"),
+      cells_per_dimension = 4, tspan = (0.0, 0.1),
+      l2 = [0.05565849298766252, 0.042322816017256494, 0.042322816017256466, 0.2064212098324083],
+      linf = [0.23633287875008924, 0.16930148707515683, 0.16930148707515688, 0.8587706761131937]
+    )
+  end
+
+
   @trixi_testset "elixir_euler_weakform.jl (FD SBP)" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_weakform.jl"),
       cells_per_dimension = (2, 2),

From 286b47d5a6b9932b4eb2f665745ca10308302b6a Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Wed, 17 May 2023 16:42:30 +0200
Subject: [PATCH 020/163] remove unused function mpi_isparallel (#1481)

---
 src/auxiliary/mpi.jl | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/src/auxiliary/mpi.jl b/src/auxiliary/mpi.jl
index 3816b42c337..ab1b13d49da 100644
--- a/src/auxiliary/mpi.jl
+++ b/src/auxiliary/mpi.jl
@@ -45,18 +45,6 @@ const MPI_IS_ROOT = Ref(true)
 
 @inline mpi_isparallel() = MPI_IS_PARALLEL[]
 
-# This is not type-stable but that's okay since we want to get rid of it anyway
-# and it's not used in performance-critical parts. The alternative we used before,
-# calling something like `eval(:(mpi_parallel() = True()))` in `init_mpi()`,
-# causes invalidations and slows down the first call to Trixi.jl.
-function mpi_parallel()
-  if mpi_isparallel()
-    return True()
-  else
-    return False()
-  end
-end
-
 @inline mpi_isroot() = MPI_IS_ROOT[]
 
 @inline mpi_root() = 0

From 5feb5da96da9467f39c37d825dc0dbd00600e1cb Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Thu, 18 May 2023 09:44:55 +0200
Subject: [PATCH 021/163] More FDSBP tests and fixes (#1482)

* specialize rhs! on source_terms to avoid spurious allocations

* use PtrArray when wrapping with FDSBP to fix allocations

This has not been a problem when we created the FDSBP solvers. However, some updates in the package
ecosystem lead to spurious allocations that are hard to track down. A simple fix is to use PtrArrays
in Trixi.wrap_array. On Julia v1.8.5, this leads to a performance improvement of ca.
- 1.12s to 27.4ms for volume integral of examples/tree_1d_fdsbp/elixir_burgers_basic.jl
- 657ms to 112ms for volume integral of examples/tree_2d_fdsbp/elixir_advection_extended.jl
- 1.14s to 44.8ms for volume integral of examples/tree_3d_fdsbp/elixir_advection_extended.jl
With Julia v1.9, the previous situation was unbearable since there was a performance hit of roughly
a factor of 100.

* test 3D FDSBP for scalar eq

* add tests for allocations of FDSBP
---
 .../elixir_advection_extended.jl              | 56 +++++++++++++++++++
 src/solvers/dg.jl                             | 46 ++++++++++-----
 src/solvers/dgsem_p4est/dg_3d_parallel.jl     |  4 +-
 src/solvers/dgsem_structured/dg_1d.jl         |  4 +-
 src/solvers/dgsem_structured/dg_2d.jl         |  4 +-
 src/solvers/dgsem_structured/dg_3d.jl         |  4 +-
 src/solvers/dgsem_tree/dg_1d.jl               |  4 +-
 src/solvers/dgsem_tree/dg_2d.jl               |  4 +-
 src/solvers/dgsem_tree/dg_2d_parallel.jl      |  4 +-
 src/solvers/dgsem_tree/dg_3d.jl               |  4 +-
 src/solvers/dgsem_unstructured/dg_2d.jl       |  4 +-
 test/test_tree_1d_fdsbp.jl                    | 18 ++++++
 test/test_tree_2d_fdsbp.jl                    | 18 ++++++
 test/test_tree_3d_fdsbp.jl                    | 24 ++++++++
 14 files changed, 166 insertions(+), 32 deletions(-)
 create mode 100644 examples/tree_3d_fdsbp/elixir_advection_extended.jl

diff --git a/examples/tree_3d_fdsbp/elixir_advection_extended.jl b/examples/tree_3d_fdsbp/elixir_advection_extended.jl
new file mode 100644
index 00000000000..241e0698649
--- /dev/null
+++ b/examples/tree_3d_fdsbp/elixir_advection_extended.jl
@@ -0,0 +1,56 @@
+# !!! warning "Experimental implementation (upwind SBP)"
+#     This is an experimental feature and may change in future releases.
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+advection_velocity = (0.2, -0.7, 0.5)
+equations = LinearScalarAdvectionEquation3D(advection_velocity)
+
+initial_condition = initial_condition_convergence_test
+
+D_SBP = derivative_operator(SummationByPartsOperators.MattssonNordström2004(),
+                            derivative_order=1, accuracy_order=4,
+                            xmin=0.0, xmax=1.0, N=10)
+solver = FDSBP(D_SBP,
+               surface_integral=SurfaceIntegralStrongForm(flux_lax_friedrichs),
+               volume_integral=VolumeIntegralStrongForm())
+
+coordinates_min = (-1.0, -1.0, -1.0)
+coordinates_max = ( 1.0,  1.0,  1.0)
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=1,
+                n_cells_max=30_000,
+                periodicity=true)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan);
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval,
+                                     extra_analysis_integrals=(energy_total,))
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback)
+
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, RDPK3SpFSAL49(); abstol=1.0e-9, reltol=1.0e-9,
+            ode_default_options()..., callback=callbacks)
+summary_callback()
diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl
index 29de151f143..fc6420791bb 100644
--- a/src/solvers/dg.jl
+++ b/src/solvers/dg.jl
@@ -375,7 +375,7 @@ const MeshesDGSEM = Union{TreeMesh, StructuredMesh, UnstructuredMesh2D, P4estMes
     eachnode(dg::DG)
 
 Return an iterator over the indices that specify the location in relevant data structures
-for the nodes in `dg`. 
+for the nodes in `dg`.
 In particular, not the nodes themselves are returned.
 """
 @inline eachnode(dg::DG) = Base.OneTo(nnodes(dg))
@@ -390,7 +390,7 @@ In particular, not the nodes themselves are returned.
     eachelement(dg::DG, cache)
 
 Return an iterator over the indices that specify the location in relevant data structures
-for the elements in `cache`. 
+for the elements in `cache`.
 In particular, not the elements themselves are returned.
 """
 @inline eachelement(dg::DG, cache)   = Base.OneTo(nelements(dg, cache))
@@ -399,7 +399,7 @@ In particular, not the elements themselves are returned.
     eachinterface(dg::DG, cache)
 
 Return an iterator over the indices that specify the location in relevant data structures
-for the interfaces in `cache`. 
+for the interfaces in `cache`.
 In particular, not the interfaces themselves are returned.
 """
 @inline eachinterface(dg::DG, cache) = Base.OneTo(ninterfaces(dg, cache))
@@ -408,7 +408,7 @@ In particular, not the interfaces themselves are returned.
     eachboundary(dg::DG, cache)
 
 Return an iterator over the indices that specify the location in relevant data structures
-for the boundaries in `cache`. 
+for the boundaries in `cache`.
 In particular, not the boundaries themselves are returned.
 """
 @inline eachboundary(dg::DG, cache)  = Base.OneTo(nboundaries(dg, cache))
@@ -417,7 +417,7 @@ In particular, not the boundaries themselves are returned.
     eachmortar(dg::DG, cache)
 
 Return an iterator over the indices that specify the location in relevant data structures
-for the mortars in `cache`. 
+for the mortars in `cache`.
 In particular, not the mortars themselves are returned.
 """
 @inline eachmortar(dg::DG, cache)    = Base.OneTo(nmortars(dg, cache))
@@ -426,7 +426,7 @@ In particular, not the mortars themselves are returned.
     eachmpiinterface(dg::DG, cache)
 
 Return an iterator over the indices that specify the location in relevant data structures
-for the MPI interfaces in `cache`. 
+for the MPI interfaces in `cache`.
 In particular, not the interfaces themselves are returned.
 """
 @inline eachmpiinterface(dg::DG, cache) = Base.OneTo(nmpiinterfaces(dg, cache))
@@ -435,7 +435,7 @@ In particular, not the interfaces themselves are returned.
     eachmpimortar(dg::DG, cache)
 
 Return an iterator over the indices that specify the location in relevant data structures
-for the MPI mortars in `cache`. 
+for the MPI mortars in `cache`.
 In particular, not the mortars themselves are returned.
 """
 @inline eachmpimortar(dg::DG, cache) = Base.OneTo(nmpimortars(dg, cache))
@@ -520,6 +520,12 @@ AdaptorAMR(mesh, dg::DG) = AdaptorL2(dg.basis)
 # DGSEM (discontinuous Galerkin spectral element method)
 include("dgsem/dgsem.jl")
 
+# Finite difference methods using summation by parts (SBP) operators
+# These methods are very similar to DG methods since they also impose interface
+# and boundary conditions weakly. Thus, these methods can re-use a lot of
+# functionality implemented for DGSEM.
+include("fdsbp_tree/fdsbp.jl")
+
 
 
 function allocate_coefficients(mesh::AbstractMesh, equations, dg::DG, cache)
@@ -569,6 +575,25 @@ end
   end
 end
 
+# Finite difference summation by parts (FDSBP) methods
+@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, dg::FDSBP, cache)
+  @boundscheck begin
+    @assert length(u_ode) == nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)
+  end
+  # See comments on the DGSEM version above
+  if LoopVectorization.check_args(u_ode)
+    # Here, we do not specialize on the number of nodes using `StaticInt` since
+    # - it will not be type stable (SBP operators just store it as a runtime value)
+    # - FD methods tend to use high node counts
+    PtrArray(pointer(u_ode),
+             (StaticInt(nvariables(equations)), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache)))
+  else
+    # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`.
+    unsafe_wrap(Array{eltype(u_ode), ndims(mesh)+2}, pointer(u_ode),
+                (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache)))
+  end
+end
+
 # General fallback
 @inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, dg::DG, cache)
   wrap_array_native(u_ode, mesh, equations, dg, cache)
@@ -634,11 +659,4 @@ include("dgsem_unstructured/dg.jl")
 include("dgsem_p4est/dg.jl")
 
 
-# Finite difference methods using summation by parts (SBP) operators
-# These methods are very similar to DG methods since they also impose interface
-# and boundary conditions weakly. Thus, these methods can re-use a lot of
-# functionality implemented for DGSEM.
-include("fdsbp_tree/fdsbp.jl")
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl
index e0e7268d8ee..5c77247ac6d 100644
--- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl
+++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl
@@ -7,8 +7,8 @@
 
 function rhs!(du, u, t,
               mesh::ParallelP4estMesh{3}, equations,
-              initial_condition, boundary_conditions, source_terms,
-              dg::DG, cache)
+              initial_condition, boundary_conditions, source_terms::Source,
+              dg::DG, cache) where {Source}
   # Start to receive MPI data
   @trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache)
 
diff --git a/src/solvers/dgsem_structured/dg_1d.jl b/src/solvers/dgsem_structured/dg_1d.jl
index 504bbe39259..e33328a8204 100644
--- a/src/solvers/dgsem_structured/dg_1d.jl
+++ b/src/solvers/dgsem_structured/dg_1d.jl
@@ -7,8 +7,8 @@
 
 function rhs!(du, u, t,
               mesh::StructuredMesh{1}, equations,
-              initial_condition, boundary_conditions, source_terms,
-              dg::DG, cache)
+              initial_condition, boundary_conditions, source_terms::Source,
+              dg::DG, cache) where {Source}
   # Reset du
   @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
 
diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl
index 3a68bced409..a8972dfe766 100644
--- a/src/solvers/dgsem_structured/dg_2d.jl
+++ b/src/solvers/dgsem_structured/dg_2d.jl
@@ -7,8 +7,8 @@
 
 function rhs!(du, u, t,
               mesh::StructuredMesh{2}, equations,
-              initial_condition, boundary_conditions, source_terms,
-              dg::DG, cache)
+              initial_condition, boundary_conditions, source_terms::Source,
+              dg::DG, cache) where {Source}
   # Reset du
   @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
 
diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl
index 2c823042c7d..6c27e206321 100644
--- a/src/solvers/dgsem_structured/dg_3d.jl
+++ b/src/solvers/dgsem_structured/dg_3d.jl
@@ -7,8 +7,8 @@
 
 function rhs!(du, u, t,
               mesh::StructuredMesh{3}, equations,
-              initial_condition, boundary_conditions, source_terms,
-              dg::DG, cache)
+              initial_condition, boundary_conditions, source_terms::Source,
+              dg::DG, cache) where {Source}
   # Reset du
   @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
 
diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl
index df882befaba..a3346a4f15c 100644
--- a/src/solvers/dgsem_tree/dg_1d.jl
+++ b/src/solvers/dgsem_tree/dg_1d.jl
@@ -71,8 +71,8 @@ end
 
 function rhs!(du, u, t,
               mesh::TreeMesh{1}, equations,
-              initial_condition, boundary_conditions, source_terms,
-              dg::DG, cache)
+              initial_condition, boundary_conditions, source_terms::Source,
+              dg::DG, cache) where {Source}
   # Reset du
   @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
 
diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl
index 445a8082ce7..3ce9f611a8b 100644
--- a/src/solvers/dgsem_tree/dg_2d.jl
+++ b/src/solvers/dgsem_tree/dg_2d.jl
@@ -99,8 +99,8 @@ end
 
 function rhs!(du, u, t,
               mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations,
-              initial_condition, boundary_conditions, source_terms,
-              dg::DG, cache)
+              initial_condition, boundary_conditions, source_terms::Source,
+              dg::DG, cache) where {Source}
   # Reset du
   @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
 
diff --git a/src/solvers/dgsem_tree/dg_2d_parallel.jl b/src/solvers/dgsem_tree/dg_2d_parallel.jl
index d605a848aad..a7c6a8b4746 100644
--- a/src/solvers/dgsem_tree/dg_2d_parallel.jl
+++ b/src/solvers/dgsem_tree/dg_2d_parallel.jl
@@ -429,8 +429,8 @@ end
 
 function rhs!(du, u, t,
               mesh::Union{ParallelTreeMesh{2}, ParallelP4estMesh{2}}, equations,
-              initial_condition, boundary_conditions, source_terms,
-              dg::DG, cache)
+              initial_condition, boundary_conditions, source_terms::Source,
+              dg::DG, cache) where {Source}
   # Start to receive MPI data
   @trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache)
 
diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl
index fd8f76d168f..aef86e3de7d 100644
--- a/src/solvers/dgsem_tree/dg_3d.jl
+++ b/src/solvers/dgsem_tree/dg_3d.jl
@@ -122,8 +122,8 @@ end
 
 function rhs!(du, u, t,
               mesh::Union{TreeMesh{3}, P4estMesh{3}}, equations,
-              initial_condition, boundary_conditions, source_terms,
-              dg::DG, cache)
+              initial_condition, boundary_conditions, source_terms::Source,
+              dg::DG, cache) where {Source}
   # Reset du
   @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
 
diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl
index acb89bc4526..283f8bdc74e 100644
--- a/src/solvers/dgsem_unstructured/dg_2d.jl
+++ b/src/solvers/dgsem_unstructured/dg_2d.jl
@@ -34,8 +34,8 @@ end
 
 function rhs!(du, u, t,
               mesh::UnstructuredMesh2D, equations,
-              initial_condition, boundary_conditions, source_terms,
-              dg::DG, cache)
+              initial_condition, boundary_conditions, source_terms::Source,
+              dg::DG, cache) where {Source}
   # Reset du
   @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
 
diff --git a/test/test_tree_1d_fdsbp.jl b/test/test_tree_1d_fdsbp.jl
index 44be4438154..a966b3836f3 100644
--- a/test/test_tree_1d_fdsbp.jl
+++ b/test/test_tree_1d_fdsbp.jl
@@ -13,6 +13,15 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_fdsbp")
       l2   = [8.316190308678742e-7],
       linf = [7.1087263324720595e-6],
       tspan = (0.0, 0.5))
+
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+      t = sol.t[end]
+      u_ode = sol.u[end]
+      du_ode = similar(u_ode)
+      @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
   end
 
   # same tolerances as above since the methods should be identical (up to
@@ -39,6 +48,15 @@ end
       l2   = [4.1370344463620254e-6, 4.297052451817826e-6, 9.857382045003056e-6],
       linf = [1.675305070092392e-5, 1.3448113863834266e-5, 3.8185336878271414e-5],
       tspan = (0.0, 0.5))
+
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+      t = sol.t[end]
+      u_ode = sol.u[end]
+      du_ode = similar(u_ode)
+      @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
   end
 
   @trixi_testset "elixir_euler_convergence.jl with splitting_vanleer_haenel" begin
diff --git a/test/test_tree_2d_fdsbp.jl b/test/test_tree_2d_fdsbp.jl
index f75fedcf2a3..7c58ef89a6c 100644
--- a/test/test_tree_2d_fdsbp.jl
+++ b/test/test_tree_2d_fdsbp.jl
@@ -13,6 +13,15 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_2d_fdsbp")
       l2   = [2.898644263922225e-6],
       linf = [8.491517930142578e-6],
       rtol = 1.0e-7) # These results change a little bit and depend on the CI system
+
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+      t = sol.t[end]
+      u_ode = sol.u[end]
+      du_ode = similar(u_ode)
+      @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
   end
 end
 
@@ -22,6 +31,15 @@ end
       l2   = [1.7088389997042244e-6, 1.7437997855125774e-6, 1.7437997855350776e-6, 5.457223460127621e-6],
       linf = [9.796504903736292e-6, 9.614745892783105e-6, 9.614745892783105e-6, 4.026107182575345e-5],
       tspan = (0.0, 0.1))
+
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+      t = sol.t[end]
+      u_ode = sol.u[end]
+      du_ode = similar(u_ode)
+      @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
   end
 
   @trixi_testset "elixir_euler_convergence.jl with Lax-Friedrichs splitting" begin
diff --git a/test/test_tree_3d_fdsbp.jl b/test/test_tree_3d_fdsbp.jl
index 22d45abc762..9dceab38031 100644
--- a/test/test_tree_3d_fdsbp.jl
+++ b/test/test_tree_3d_fdsbp.jl
@@ -8,11 +8,35 @@ include("test_trixi.jl")
 EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_3d_fdsbp")
 
 @testset "Compressible Euler" begin
+  @trixi_testset "elixir_advection_extended.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"),
+      l2   = [0.005355755365412444],
+      linf = [0.01856044696350767])
+
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+      t = sol.t[end]
+      u_ode = sol.u[end]
+      du_ode = similar(u_ode)
+      @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
+  end
+
   @trixi_testset "elixir_euler_convergence.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_convergence.jl"),
       l2   = [2.247522803543667e-5, 2.2499169224681058e-5, 2.24991692246826e-5, 2.2499169224684707e-5, 5.814121361417382e-5],
       linf = [9.579357410749445e-5, 9.544871933409027e-5, 9.54487193367548e-5, 9.544871933453436e-5, 0.0004192294529472562],
       tspan = (0.0, 0.2))
+
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+      t = sol.t[end]
+      u_ode = sol.u[end]
+      du_ode = similar(u_ode)
+      @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
   end
 
   @trixi_testset "elixir_euler_convergence.jl with VolumeIntegralStrongForm" begin

From f739a2067439507b8b321b588bab7a3b4e57935e Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Fri, 19 May 2023 00:15:35 -0500
Subject: [PATCH 022/163] Update scalar_linear_advection_1d.jl (#1483)

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 docs/literate/src/files/scalar_linear_advection_1d.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/literate/src/files/scalar_linear_advection_1d.jl b/docs/literate/src/files/scalar_linear_advection_1d.jl
index 8a8a1b19963..42c831c98ba 100644
--- a/docs/literate/src/files/scalar_linear_advection_1d.jl
+++ b/docs/literate/src/files/scalar_linear_advection_1d.jl
@@ -48,7 +48,7 @@ dx = (coordinates_max - coordinates_min) / n_elements # length of one element
 # ```math
 # \frac{dx}{2} u_t^{Q_l} + u_\xi^{Q_l} = 0 \text{, for }t\in\mathbb{R}^+,\; \xi\in[-1, 1]
 # ```
-# $u_t^{Q_l}$ and $u_\xi^{Q_l}$ denote the time and spatial derivatives of the solution on the element $Q_l$.
+# Here, $u_t^{Q_l}$ and $u_\xi^{Q_l}$ denote the time and spatial derivatives of the solution on the element $Q_l$.
 
 
 # ### ii. Polynomial approach

From cdf53c72bfd3d14feab4eeaa6c9f1cbb39dddc5c Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Fri, 19 May 2023 08:12:21 +0200
Subject: [PATCH 023/163] set version to v0.5.24

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 5ee043b3232..b95f05b74a8 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.24-pre"
+version = "0.5.24"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 153ef659f8e678ad0acab743c43399131f3a4885 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Fri, 19 May 2023 08:12:35 +0200
Subject: [PATCH 024/163] set development version to v0.5.25-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index b95f05b74a8..cb27c828cf4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.24"
+version = "0.5.25-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 9bd246fba7263b0581c15edc9a713cdcb67d5c7c Mon Sep 17 00:00:00 2001
From: jmbender <129838210+jmbender@users.noreply.github.com>
Date: Thu, 25 May 2023 11:04:59 +0200
Subject: [PATCH 025/163] improve docstring of analysis callback (#1487)

* improve docstring of analysis callback

* adddocstrings to common energy_* functions

---------

Co-authored-by: Hendrik Ranocha <mail@ranocha.de>
---
 src/callbacks_step/analysis.jl | 13 ++++++++----
 src/equations/equations.jl     | 37 ++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index 79a442f06c3..c6a2ee6fb95 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -19,12 +19,17 @@ Analyze a numerical solution every `interval` time steps and print the
 results to the screen. If `save_analysis`, the results are also saved in
 `joinpath(output_directory, analysis_filename)`.
 
-Additional errors can be computed, e.g. by passing `extra_analysis_errors = [:primitive]`.
+Additional errors can be computed, e.g. by passing
+`extra_analysis_errors = (:l2_error_primitive, :linf_error_primitive)`
+or `extra_analysis_errors = (:conservation_error,)`.
 
 Further scalar functions `func` in `extra_analysis_integrals` are applied to the numerical
-solution and integrated over the computational domain.
-See `Trixi.analyze`, `Trixi.pretty_form_utf`, `Trixi.pretty_form_ascii` for further
-information on how to create custom analysis quantities.
+solution and integrated over the computational domain. Some examples for this are
+[`entropy`](@ref), [`energy_kinetic`](@ref), [`energy_internal`](@ref), and [`energy_total`](@ref).
+You can also write your own function with the same signature as the examples listed above and
+pass it via `extra_analysis_integrals`.
+See the developer comments about `Trixi.analyze`, `Trixi.pretty_form_utf`, and
+`Trixi.pretty_form_ascii` for further information on how to create custom analysis quantities.
 
 In addition, the analysis callback records and outputs a number of quantities that are useful for
 evaluating the computational performance, such as the total runtime, the performance index
diff --git a/src/equations/equations.jl b/src/equations/equations.jl
index e44270737e8..6640ee7cfc7 100644
--- a/src/equations/equations.jl
+++ b/src/equations/equations.jl
@@ -262,6 +262,9 @@ function prim2cons end
 
 Return the chosen entropy of the conserved variables `u` for a given set of
 `equations`.
+
+`u` is a vector of the conserved variables at a single node, i.e., a vector
+of the correct length `nvariables(equations)`.
 """
 function entropy end
 
@@ -270,6 +273,7 @@ function entropy end
 
 Convert the conserved variables `u` to the entropy variables for a given set of
 `equations` with chosen standard [`entropy`](@ref).
+
 `u` is a vector type of the correct length `nvariables(equations)`.
 Notice the function doesn't include any error checks for the purpose of efficiency,
 so please make sure your input is correct.
@@ -289,6 +293,39 @@ The inverse conversion is performed by [`cons2entropy`](@ref).
 """
 function entropy2cons end
 
+"""
+    energy_total(u, equations)
+
+Return the total energy of the conserved variables `u` for a given set of
+`equations`, e.g., the [`CompressibleEulerEquations2D`](@ref).
+
+`u` is a vector of the conserved variables at a single node, i.e., a vector
+of the correct length `nvariables(equations)`.
+"""
+function energy_total end
+
+"""
+    energy_kinetic(u, equations)
+
+Return the kinetic energy of the conserved variables `u` for a given set of
+`equations`, e.g., the [`CompressibleEulerEquations2D`](@ref).
+
+`u` is a vector of the conserved variables at a single node, i.e., a vector
+of the correct length `nvariables(equations)`.
+"""
+function energy_kinetic end
+
+"""
+    energy_internal(u, equations)
+
+Return the internal energy of the conserved variables `u` for a given set of
+`equations`, e.g., the [`CompressibleEulerEquations2D`](@ref).
+
+`u` is a vector of the conserved variables at a single node, i.e., a vector
+of the correct length `nvariables(equations)`.
+"""
+function energy_internal end
+
 ####################################################################################################
 # Include files with actual implementations for different systems of equations.
 

From 711d4d8c272c69c5db170b8c0ce1ee0a58d89db4 Mon Sep 17 00:00:00 2001
From: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
Date: Fri, 26 May 2023 10:20:47 +0200
Subject: [PATCH 026/163] Parallel I/O (#1399)

* add parallel IO

* use parallel HDF5 only when enabled

* add documentation

* prefix has_parallel with HDF5

* put duplicated code in main function

* fix computation of slices

* fix typo

* write and read restart files in parallel

* put filename in main function

* Apply suggestions from code review

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* format

* fix typo in comment

* write nelementsglobal, not nelements

* update filtered warning

* Update docs/src/parallelization.md

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
---
 .gitignore                             |   4 +-
 docs/src/parallelization.md            |  10 ++
 src/Trixi.jl                           |   7 +-
 src/callbacks_step/save_restart_dg.jl  | 124 ++++++++++++++++++++++++-
 src/callbacks_step/save_solution_dg.jl |  75 ++++++++++++++-
 test/test_trixi.jl                     |   4 +-
 6 files changed, 214 insertions(+), 10 deletions(-)

diff --git a/.gitignore b/.gitignore
index a23cdb3f326..3132b9af38b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,4 +28,6 @@ coverage_report/
 .DS_Store
 
 run
-run/*
\ No newline at end of file
+run/*
+
+LocalPreferences.toml
diff --git a/docs/src/parallelization.md b/docs/src/parallelization.md
index 27c0b50189c..08470fd064a 100644
--- a/docs/src/parallelization.md
+++ b/docs/src/parallelization.md
@@ -162,3 +162,13 @@ If you use error-based step size control (see also the section on [error-based a
 together with MPI you need to pass `internalnorm=ode_norm` and you should pass
 `unstable_check=ode_unstable_check` to OrdinaryDiffEq's [`solve`](https://docs.sciml.ai/DiffEqDocs/latest/basics/common_solver_opts/),
 which are both included in [`ode_default_options`](@ref).
+
+### Using parallel input and output
+Trixi.jl allows parallel I/O using MPI by leveraging parallel HDF5.jl. To enable this, you first need
+to use a system-provided MPI library, see also [here](@ref parallel_system_MPI) and you need to tell
+[HDF5.jl](https://github.com/JuliaIO/HDF5.jl) to use this library.
+To do so, set the environment variable `JULIA_HDF5_PATH` to the local path
+that contains the `libhdf5.so` shared object file and build HDF5.jl by executing `using Pkg; Pkg.build("HDF5")`.
+For more information see also the [documentation of HDF5.jl](https://juliaio.github.io/HDF5.jl/stable/mpi/).
+
+If you do not perform these steps to use parallel HDF5 or if the HDF5 is not MPI-enabled, Trixi.jl will fall back on a less efficient I/O mechanism. In that case, all disk I/O is performed only on rank zero and data is distributed to/gathered from the other ranks using regular MPI communication.
diff --git a/src/Trixi.jl b/src/Trixi.jl
index c0cecf86bd4..a8a2c0ad128 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -25,6 +25,10 @@ using SparseArrays: AbstractSparseMatrix, AbstractSparseMatrixCSC, sparse, dropt
 # import @reexport now to make it available for further imports/exports
 using Reexport: @reexport
 
+# MPI needs to be imported before HDF5 to be able to use parallel HDF5
+# as long as HDF5.jl uses Requires.jl to enable parallel HDF5 with MPI
+using MPI: MPI
+
 using SciMLBase: CallbackSet, DiscreteCallback,
                  ODEProblem, ODESolution, ODEFunction,
                  SplitODEProblem
@@ -38,12 +42,11 @@ using DiffEqCallbacks: PeriodicCallback, PeriodicCallbackAffect
 @reexport using EllipsisNotation # ..
 using FillArrays: Ones, Zeros
 using ForwardDiff: ForwardDiff
-using HDF5: h5open, attributes
+using HDF5: HDF5, h5open, attributes, create_dataset, datatype, dataspace
 using IfElse: ifelse
 using LinearMaps: LinearMap
 using LoopVectorization: LoopVectorization, @turbo, indices
 using StaticArrayInterface: static_length # used by LoopVectorization
-using MPI: MPI
 using MuladdMacro: @muladd
 using Octavian: Octavian, matmul!
 using Polyester: @batch # You know, the cheapest threads you can find...
diff --git a/src/callbacks_step/save_restart_dg.jl b/src/callbacks_step/save_restart_dg.jl
index 47487124ccb..a46a8bc856b 100644
--- a/src/callbacks_step/save_restart_dg.jl
+++ b/src/callbacks_step/save_restart_dg.jl
@@ -9,9 +9,10 @@ function save_restart_file(u, time, dt, timestep,
                            mesh::Union{SerialTreeMesh, StructuredMesh, UnstructuredMesh2D, SerialP4estMesh},
                            equations, dg::DG, cache,
                            restart_callback)
+
   @unpack output_directory = restart_callback
 
-  # Filename without extension based on current time step
+  # Filename based on current time step
   filename = joinpath(output_directory, @sprintf("restart_%06d.h5", timestep))
 
   # Restart files always store conservative variables
@@ -88,11 +89,68 @@ end
 function save_restart_file(u, time, dt, timestep,
                            mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache,
                            restart_callback)
-  @unpack output_directory = restart_callback
 
-  # Filename without extension based on current time step
+  @unpack output_directory = restart_callback
+  # Filename based on current time step
   filename = joinpath(output_directory, @sprintf("restart_%06d.h5", timestep))
 
+  if HDF5.has_parallel()
+    save_restart_file_parallel(u, time, dt, timestep, mesh, equations, dg, cache, filename)
+  else
+    save_restart_file_on_root(u, time, dt, timestep, mesh, equations, dg, cache, filename)
+  end
+end
+
+
+function save_restart_file_parallel(u, time, dt, timestep,
+                                    mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache,
+                                    filename)
+
+  # Restart files always store conservative variables
+  data = u
+
+  # Calculate element and node counts by MPI rank
+  element_size = nnodes(dg)^ndims(mesh)
+  element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
+  node_counts = element_counts * Cint(element_size)
+  # Cumulative sum of nodes per rank starting with an additional 0
+  cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts))
+
+  # Open file (clobber existing content)
+  h5open(filename, "w", mpi_comm()) do file
+    # Add context information as attributes
+    attributes(file)["ndims"] = ndims(mesh)
+    attributes(file)["equations"] = get_name(equations)
+    attributes(file)["polydeg"] = polydeg(dg)
+    attributes(file)["n_vars"] = nvariables(equations)
+    attributes(file)["n_elements"] = nelementsglobal(dg, cache)
+    attributes(file)["mesh_type"] = get_name(mesh)
+    attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
+    attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
+    attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
+    attributes(file)["timestep"] = timestep
+
+    # Store each variable of the solution
+    for v in eachvariable(equations)
+      # Need to create dataset explicitly in parallel case
+      var = create_dataset(file, "/variables_$v", datatype(eltype(data)), dataspace((ndofsglobal(mesh, dg, cache),)))
+      # Write data of each process in slices (ranks start with 0)
+      slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2]
+      # Convert to 1D array
+      var[slice] = vec(data[v, .., :])
+      # Add variable name as attribute
+      attributes(var)["name"] = varnames(cons2cons, equations)[v]
+    end
+  end
+
+  return filename
+end
+
+
+function save_restart_file_on_root(u, time, dt, timestep,
+                                   mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache,
+                                   filename)
+
   # Restart files always store conservative variables
   data = u
 
@@ -144,6 +202,66 @@ end
 
 function load_restart_file(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, restart_file)
 
+  if HDF5.has_parallel()
+    load_restart_file_parallel(mesh, equations, dg, cache, restart_file)
+  else
+    load_restart_file_on_root(mesh, equations, dg, cache, restart_file)
+  end
+end
+
+
+function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, restart_file)
+
+  # Calculate element and node counts by MPI rank
+  element_size = nnodes(dg)^ndims(mesh)
+  element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
+  node_counts = element_counts * Cint(element_size)
+  # Cumulative sum of nodes per rank starting with an additional 0
+  cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts))
+
+  # allocate memory
+  u_ode = allocate_coefficients(mesh, equations, dg, cache)
+  u = wrap_array_native(u_ode, mesh, equations, dg, cache)
+
+  # read in parallel
+  h5open(restart_file, "r", mpi_comm()) do file
+    # Read attributes to perform some sanity checks
+    if read(attributes(file)["ndims"]) != ndims(mesh)
+      error("restart mismatch: ndims differs from value in restart file")
+    end
+    if read(attributes(file)["equations"]) != get_name(equations)
+      error("restart mismatch: equations differ from value in restart file")
+    end
+    if read(attributes(file)["polydeg"]) != polydeg(dg)
+      error("restart mismatch: polynomial degree in solver differs from value in restart file")
+    end
+    if read(attributes(file)["n_elements"]) != nelementsglobal(dg, cache)
+      error("restart mismatch: number of elements in solver differs from value in restart file")
+    end
+
+    # Read data
+    for v in eachvariable(equations)
+      # Check if variable name matches
+      var = file["variables_$v"]
+      if (name = read(attributes(var)["name"])) != varnames(cons2cons, equations)[v]
+        error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'")
+      end
+
+      # Read variable
+      mpi_println("Reading variables_$v ($name)...")
+      # Read data of each process in slices (ranks start with 0)
+      slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2]
+      # Convert 1D array back to actual size of `u`
+      u[v, .., :] = reshape(read(var)[slice], size(@view u[v, .., :]))
+    end
+  end
+
+  return u_ode
+end
+
+
+function load_restart_file_on_root(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, restart_file)
+
   # Calculate element and node counts by MPI rank
   element_size = nnodes(dg)^ndims(mesh)
   element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
diff --git a/src/callbacks_step/save_solution_dg.jl b/src/callbacks_step/save_solution_dg.jl
index c1708ca6820..6d1cdf0151b 100644
--- a/src/callbacks_step/save_solution_dg.jl
+++ b/src/callbacks_step/save_solution_dg.jl
@@ -10,9 +10,10 @@ function save_solution_file(u, time, dt, timestep,
                             equations, dg::DG, cache,
                             solution_callback, element_variables=Dict{Symbol,Any}();
                             system="")
+
   @unpack output_directory, solution_variables = solution_callback
 
-  # Filename without extension based on current time step
+  # Filename based on current time step
   if isempty(system)
     filename = joinpath(output_directory, @sprintf("solution_%06d.h5", timestep))
   else
@@ -78,9 +79,10 @@ function save_solution_file(u, time, dt, timestep,
                             mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache,
                             solution_callback, element_variables=Dict{Symbol,Any}();
                             system="")
+
   @unpack output_directory, solution_variables = solution_callback
 
-  # Filename without extension based on current time step
+  # Filename based on current time step
   if isempty(system)
     filename = joinpath(output_directory, @sprintf("solution_%06d.h5", timestep))
   else
@@ -103,6 +105,75 @@ function save_solution_file(u, time, dt, timestep,
     n_vars = size(data, 1)
   end
 
+  if HDF5.has_parallel()
+    save_solution_file_parallel(data, time, dt, timestep, n_vars, mesh, equations, dg, cache, solution_variables, filename, element_variables)
+  else
+    save_solution_file_on_root(data, time, dt, timestep, n_vars, mesh, equations, dg, cache, solution_variables, filename, element_variables)
+  end
+end
+
+
+function save_solution_file_parallel(data, time, dt, timestep, n_vars,
+                                     mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache,
+                                     solution_variables, filename, element_variables=Dict{Symbol,Any}())
+
+  # Calculate element and node counts by MPI rank
+  element_size = nnodes(dg)^ndims(mesh)
+  element_counts = cache.mpi_cache.n_elements_by_rank
+  node_counts = element_counts * element_size
+  # Cumulative sum of elements per rank starting with an additional 0
+  cum_element_counts = append!(zeros(eltype(element_counts), 1), cumsum(element_counts))
+  # Cumulative sum of nodes per rank starting with an additional 0
+  cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts))
+
+  # Open file using parallel HDF5 (clobber existing content)
+  h5open(filename, "w", mpi_comm()) do file
+    # Add context information as attributes
+    attributes(file)["ndims"] = ndims(mesh)
+    attributes(file)["equations"] = get_name(equations)
+    attributes(file)["polydeg"] = polydeg(dg)
+    attributes(file)["n_vars"] = n_vars
+    attributes(file)["n_elements"] = nelementsglobal(dg, cache)
+    attributes(file)["mesh_type"] = get_name(mesh)
+    attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
+    attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
+    attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
+    attributes(file)["timestep"] = timestep
+
+    # Store each variable of the solution data
+    for v in 1:n_vars
+      # Need to create dataset explicitly in parallel case
+      var = create_dataset(file, "/variables_$v", datatype(eltype(data)), dataspace((ndofsglobal(mesh, dg, cache),)))
+      # Write data of each process in slices (ranks start with 0)
+      slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2]
+      # Convert to 1D array
+      var[slice] = vec(data[v, .., :])
+      # Add variable name as attribute
+      attributes(var)["name"] = varnames(solution_variables, equations)[v]
+    end
+
+    # Store element variables
+    for (v, (key, element_variable)) in enumerate(element_variables)
+      # Need to create dataset explicitly in parallel case
+      var = create_dataset(file, "/element_variables_$v", datatype(eltype(element_variable)), dataspace((nelementsglobal(dg, cache),)))
+
+      # Write data of each process in slices (ranks start with 0)
+      slice = (cum_element_counts[mpi_rank() + 1] + 1):cum_element_counts[mpi_rank() + 2]
+      # Add to file
+      var[slice] = element_variable
+      # Add variable name as attribute
+      attributes(var)["name"] = string(key)
+    end
+  end
+
+  return filename
+end
+
+
+function save_solution_file_on_root(data, time, dt, timestep, n_vars,
+                                    mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache,
+                                    solution_variables, filename, element_variables=Dict{Symbol,Any}())
+
   # Calculate element and node counts by MPI rank
   element_size = nnodes(dg)^ndims(mesh)
   element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
diff --git a/test/test_trixi.jl b/test/test_trixi.jl
index f9d0ef42ee4..ddace6b4fbe 100644
--- a/test/test_trixi.jl
+++ b/test/test_trixi.jl
@@ -149,8 +149,8 @@ macro test_nowarn_mod(expr, additional_ignore_content=String[])
           "[ Info: You just called `trixi_include`. Julia may now compile the code, please be patient.\n",
           # TODO: Upstream (PlotUtils). This should be removed again once the
           #       deprecated stuff is fixed upstream.
-          "WARNING: importing deprecated binding Colors.RGB1 into PlotUtils.\n",
-          "WARNING: importing deprecated binding Colors.RGB4 into PlotUtils.\n",
+          "WARNING: importing deprecated binding Colors.RGB1 into Plots.\n",
+          "WARNING: importing deprecated binding Colors.RGB4 into Plots.\n",
           r"┌ Warning: Keyword argument letter not supported with Plots.+\n└ @ Plots.+\n",
           r"┌ Warning: `parse\(::Type, ::Coloarant\)` is deprecated.+\n│.+\n│.+\n└ @ Plots.+\n",
           # TODO: Silence warning introduced by Flux v0.13.13. Should be properly fixed.

From f6c5bcebbe963a23cb388f83b1c7e369a3bb2c1b Mon Sep 17 00:00:00 2001
From: jmbender <129838210+jmbender@users.noreply.github.com>
Date: Fri, 26 May 2023 13:27:39 +0200
Subject: [PATCH 027/163] First order finite volume 1D (#1489)

* added polynomial degree zero for lobatto legendre basis

* fix reference_offset for FV approximations

* added cell boundaries to fix visualization for FV

* add 1D test for finite volume

* fix whitespace

* Update src/visualization/types.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* added comment to polydeg=0 to docstring of LGL Basis

---------

Co-authored-by: Hendrik Ranocha <mail@ranocha.de>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 .../elixir_advection_finite_volume.jl         | 56 +++++++++++++++++++
 src/solvers/dgsem/basis_lobatto_legendre.jl   | 10 ++++
 src/solvers/dgsem_tree/containers_1d.jl       |  2 +-
 src/solvers/dgsem_tree/containers_2d.jl       |  2 +-
 src/solvers/dgsem_tree/containers_3d.jl       |  2 +-
 src/visualization/types.jl                    | 23 ++++++++
 test/test_tree_1d_advection.jl                |  6 ++
 7 files changed, 98 insertions(+), 3 deletions(-)
 create mode 100644 examples/tree_1d_dgsem/elixir_advection_finite_volume.jl

diff --git a/examples/tree_1d_dgsem/elixir_advection_finite_volume.jl b/examples/tree_1d_dgsem/elixir_advection_finite_volume.jl
new file mode 100644
index 00000000000..28518e7276a
--- /dev/null
+++ b/examples/tree_1d_dgsem/elixir_advection_finite_volume.jl
@@ -0,0 +1,56 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+advection_velocity = 1.0
+equations = LinearScalarAdvectionEquation1D(advection_velocity)
+
+# Create DG solver with polynomial degree = 0, i.e., a first order finite volume solver,
+# with (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg=0, surface_flux=flux_lax_friedrichs)
+
+coordinates_min = -1.0 # minimum coordinate
+coordinates_max =  1.0 # maximum coordinate
+
+# Create a uniformly refined mesh with periodic boundaries
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=5,
+                n_cells_max=30_000) # set maximum capacity of tree data structure
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, solver)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 1.0
+ode = semidiscretize(semi, (0.0, 1.0));
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_callback = AnalysisCallback(semi, interval=100)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl=0.9)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, stepsize_callback)
+
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, Euler(),
+            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep=false, callback=callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl
index d34b0275da1..a3a7fb6dd31 100644
--- a/src/solvers/dgsem/basis_lobatto_legendre.jl
+++ b/src/solvers/dgsem/basis_lobatto_legendre.jl
@@ -9,6 +9,9 @@
     LobattoLegendreBasis([RealT=Float64,] polydeg::Integer)
 
 Create a nodal Lobatto-Legendre basis for polynomials of degree `polydeg`.
+
+For the special case `polydeg=0` the DG method reduces to a finite volume method.
+Therefore, this function sets the center point of the cell as single node.
 """
 struct LobattoLegendreBasis{RealT<:Real, NNODES,
                             VectorT<:AbstractVector{RealT},
@@ -504,6 +507,13 @@ function gauss_lobatto_nodes_weights(n_nodes::Integer)
   nodes = zeros(n_nodes)
   weights = zeros(n_nodes)
 
+  # Special case for polynomial degree zero (first order finite volume)
+  if n_nodes == 1
+    nodes[1] = 0
+    weights[1] = 2
+    return nodes, weights
+  end
+
   # Get polynomial degree for convenience
   N = n_nodes - 1
 
diff --git a/src/solvers/dgsem_tree/containers_1d.jl b/src/solvers/dgsem_tree/containers_1d.jl
index f3255c6c953..10718fb2e55 100644
--- a/src/solvers/dgsem_tree/containers_1d.jl
+++ b/src/solvers/dgsem_tree/containers_1d.jl
@@ -105,7 +105,7 @@ function init_elements!(elements, cell_ids, mesh::TreeMesh1D, basis)
   reference_length = integrate(one ∘ eltype, nodes, basis)
   # Compute the offset of the midpoint of the 1D reference interval
   # (its difference from zero)
-  reference_offset = first(nodes) + reference_length / 2
+  reference_offset = (first(nodes) + last(nodes)) / 2
 
   # Store cell ids
   elements.cell_ids .= cell_ids
diff --git a/src/solvers/dgsem_tree/containers_2d.jl b/src/solvers/dgsem_tree/containers_2d.jl
index 9f874d50464..c0ece1f8c1a 100644
--- a/src/solvers/dgsem_tree/containers_2d.jl
+++ b/src/solvers/dgsem_tree/containers_2d.jl
@@ -106,7 +106,7 @@ function init_elements!(elements, cell_ids, mesh::TreeMesh2D, basis)
   reference_length = integrate(one ∘ eltype, nodes, basis)
   # Compute the offset of the midpoint of the 1D reference interval
   # (its difference from zero)
-  reference_offset = first(nodes) + reference_length / 2
+  reference_offset = (first(nodes) + last(nodes)) / 2
 
   # Store cell ids
   elements.cell_ids .= cell_ids
diff --git a/src/solvers/dgsem_tree/containers_3d.jl b/src/solvers/dgsem_tree/containers_3d.jl
index 2cf371e3612..bc88e931b31 100644
--- a/src/solvers/dgsem_tree/containers_3d.jl
+++ b/src/solvers/dgsem_tree/containers_3d.jl
@@ -106,7 +106,7 @@ function init_elements!(elements, cell_ids, mesh::TreeMesh3D, basis)
   reference_length = integrate(one ∘ eltype, nodes, basis)
   # Compute the offset of the midpoint of the 1D reference interval
   # (its difference from zero)
-  reference_offset = first(nodes) + reference_length / 2
+  reference_offset = (first(nodes) + last(nodes)) / 2
 
   # Store cell ids
   elements.cell_ids .= cell_ids
diff --git a/src/visualization/types.jl b/src/visualization/types.jl
index a83b5bc92c6..62cfe93038d 100644
--- a/src/visualization/types.jl
+++ b/src/visualization/types.jl
@@ -529,6 +529,29 @@ function PlotData1D(u, mesh::TreeMesh, equations, solver, cache;
   if ndims(mesh) == 1
     x, data, mesh_vertices_x = get_data_1d(original_nodes, unstructured_data, nvisnodes)
     orientation_x = 1
+
+    # Special care is required for first-order FV approximations since the nodes are the
+    # cell centers and do not contain the boundaries
+    n_nodes = size(unstructured_data, 1)
+    if n_nodes == 1
+      n_visnodes = length(x) ÷ nelements(solver, cache)
+      if n_visnodes != 2
+        throw(ArgumentError("This number of visualization nodes is currently not supported for finite volume approximations."))
+      end
+      left_boundary = mesh.tree.center_level_0[1] - mesh.tree.length_level_0 / 2
+      dx_2 = zero(left_boundary)
+      for i in 1:div(length(x), 2)
+        # Adjust plot nodes so that they are at the boundaries of each element
+        dx_2 = x[2 * i - 1] - left_boundary
+        x[2 * i - 1] -= dx_2
+        x[2 * i    ] += dx_2
+        left_boundary = left_boundary+ 2 * dx_2
+
+        # Adjust mesh plot nodes
+        mesh_vertices_x[i] -= dx_2
+      end
+      mesh_vertices_x[end] += dx_2
+    end
   elseif ndims(mesh) == 2
     if curve !== nothing
       x, data, mesh_vertices_x = unstructured_2d_to_1d_curve(original_nodes, unstructured_data, nvisnodes, curve, mesh, solver, cache)
diff --git a/test/test_tree_1d_advection.jl b/test/test_tree_1d_advection.jl
index d8ece1d055e..0cf0f2c1170 100644
--- a/test/test_tree_1d_advection.jl
+++ b/test/test_tree_1d_advection.jl
@@ -27,6 +27,12 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
       linf = [3.235356127918171e-5],
       coverage_override = (maxiters=6,))
   end
+
+  @trixi_testset "elixir_advection_finite_volume.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_finite_volume.jl"),
+      l2   = [0.011662300515980219],
+      linf = [0.01647256923710194])
+  end
 end
 
 end # module

From 72c32e9b872b169ab2e841175da3fcaf88a876dd Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Fri, 26 May 2023 17:52:32 +0200
Subject: [PATCH 028/163] set version to v0.5.25

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index cb27c828cf4..a97ea2b3cd3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.25-pre"
+version = "0.5.25"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From ee5a3a31bc3bedc3aef649ed6bde13f449598e4f Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Fri, 26 May 2023 17:52:47 +0200
Subject: [PATCH 029/163] set development version to v0.5.26-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index a97ea2b3cd3..b30521e5196 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.25"
+version = "0.5.26-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 1d8752012720bef31c92d885fba5eac4362feeec Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Sat, 27 May 2023 00:22:18 -0500
Subject: [PATCH 030/163] Change parabolic `apply_jacobian!` to
 `apply_jacobian_parabolic!` (#1492)

* `apply_jacobian!` -> `apply_jacobian_parabolic!`

* switch to `apply_jacobian_parabolic!`

* Update src/solvers/dgsem_tree/dg_1d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* missed one

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 src/solvers/dgsem_tree/dg_1d_parabolic.jl |  8 ++++----
 src/solvers/dgsem_tree/dg_2d_parabolic.jl | 10 +++++-----
 src/solvers/dgsem_tree/dg_3d_parabolic.jl | 12 ++++++------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/solvers/dgsem_tree/dg_1d_parabolic.jl b/src/solvers/dgsem_tree/dg_1d_parabolic.jl
index be4235c627b..1bec34568d8 100644
--- a/src/solvers/dgsem_tree/dg_1d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_1d_parabolic.jl
@@ -73,7 +73,7 @@ function rhs_parabolic!(du, u, t, mesh::TreeMesh{1}, equations_parabolic::Abstra
     du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic)
 
   # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian!(
+  @trixi_timeit timer() "Jacobian" apply_jacobian_parabolic!(
     du, mesh, equations_parabolic, dg, cache_parabolic)
 
   return nothing
@@ -444,7 +444,7 @@ function calc_gradient!(gradients, u_transformed, t,
 
   # Apply Jacobian from mapping to reference element
   @trixi_timeit timer() "Jacobian" begin
-    apply_jacobian!(gradients, mesh, equations_parabolic, dg, cache_parabolic)
+    apply_jacobian_parabolic!(gradients, mesh, equations_parabolic, dg, cache_parabolic)
   end
 
   return nothing
@@ -483,8 +483,8 @@ end
 # This is because the parabolic fluxes are assumed to be of the form
 #   `du/dt + df/dx = dg/dx + source(x,t)`,
 # where f(u) is the inviscid flux and g(u) is the viscous flux.
-function apply_jacobian!(du, mesh::TreeMesh{1},
-                         equations::AbstractEquationsParabolic, dg::DG, cache)
+function apply_jacobian_parabolic!(du, mesh::TreeMesh{1},
+                                   equations::AbstractEquationsParabolic, dg::DG, cache)
 
   @threaded for element in eachelement(dg, cache)
     factor = cache.elements.inverse_jacobian[element]
diff --git a/src/solvers/dgsem_tree/dg_2d_parabolic.jl b/src/solvers/dgsem_tree/dg_2d_parabolic.jl
index ca6394172ad..97bbc7e2633 100644
--- a/src/solvers/dgsem_tree/dg_2d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_2d_parabolic.jl
@@ -76,7 +76,7 @@ function rhs_parabolic!(du, u, t, mesh::TreeMesh{2}, equations_parabolic::Abstra
     du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic)
 
   # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian!(
+  @trixi_timeit timer() "Jacobian" apply_jacobian_parabolic!(
     du, mesh, equations_parabolic, dg, cache_parabolic)
 
   return nothing
@@ -540,8 +540,8 @@ function calc_gradient!(gradients, u_transformed, t,
 
   # Apply Jacobian from mapping to reference element
   @trixi_timeit timer() "Jacobian" begin
-    apply_jacobian!(gradients_x, mesh, equations_parabolic, dg, cache_parabolic)
-    apply_jacobian!(gradients_y, mesh, equations_parabolic, dg, cache_parabolic)
+    apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg, cache_parabolic)
+    apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg, cache_parabolic)
   end
 
   return nothing
@@ -586,8 +586,8 @@ end
 # This is because the parabolic fluxes are assumed to be of the form
 #   `du/dt + df/dx = dg/dx + source(x,t)`,
 # where f(u) is the inviscid flux and g(u) is the viscous flux.
-function apply_jacobian!(du, mesh::TreeMesh{2},
-                         equations::AbstractEquationsParabolic, dg::DG, cache)
+function apply_jacobian_parabolic!(du, mesh::TreeMesh{2},
+                                   equations::AbstractEquationsParabolic, dg::DG, cache)
 
   @threaded for element in eachelement(dg, cache)
     factor = cache.elements.inverse_jacobian[element]
diff --git a/src/solvers/dgsem_tree/dg_3d_parabolic.jl b/src/solvers/dgsem_tree/dg_3d_parabolic.jl
index d3a47cb06be..d4a197de172 100644
--- a/src/solvers/dgsem_tree/dg_3d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_3d_parabolic.jl
@@ -76,7 +76,7 @@ function rhs_parabolic!(du, u, t, mesh::TreeMesh{3}, equations_parabolic::Abstra
     du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic)
 
   # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian!(
+  @trixi_timeit timer() "Jacobian" apply_jacobian_parabolic!(
     du, mesh, equations_parabolic, dg, cache_parabolic)
 
   return nothing
@@ -601,9 +601,9 @@ function calc_gradient!(gradients, u_transformed, t,
 
   # Apply Jacobian from mapping to reference element
   @trixi_timeit timer() "Jacobian" begin
-    apply_jacobian!(gradients_x, mesh, equations_parabolic, dg, cache_parabolic)
-    apply_jacobian!(gradients_y, mesh, equations_parabolic, dg, cache_parabolic)
-    apply_jacobian!(gradients_z, mesh, equations_parabolic, dg, cache_parabolic)
+    apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg, cache_parabolic)
+    apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg, cache_parabolic)
+    apply_jacobian_parabolic!(gradients_z, mesh, equations_parabolic, dg, cache_parabolic)
   end
 
   return nothing
@@ -648,8 +648,8 @@ end
 # This is because the parabolic fluxes are assumed to be of the form
 #   `du/dt + df/dx = dg/dx + source(x,t)`,
 # where f(u) is the inviscid flux and g(u) is the viscous flux.
-function apply_jacobian!(du, mesh::TreeMesh{3},
-                         equations::AbstractEquationsParabolic, dg::DG, cache)
+function apply_jacobian_parabolic!(du, mesh::TreeMesh{3},
+                                   equations::AbstractEquationsParabolic, dg::DG, cache)
 
   @threaded for element in eachelement(dg, cache)
     factor = cache.elements.inverse_jacobian[element]

From 5197fcbcfcdab19ecd770c58af37028c904a0d33 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Sun, 28 May 2023 17:31:20 +0200
Subject: [PATCH 031/163] fix docstring of iplot (#1496)

The docstring did not have a connection to the function `iplot` before.
---
 src/visualization/recipes_makie.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/visualization/recipes_makie.jl b/src/visualization/recipes_makie.jl
index 3fd3850c34d..aef713d5d9c 100644
--- a/src/visualization/recipes_makie.jl
+++ b/src/visualization/recipes_makie.jl
@@ -148,6 +148,7 @@ Keywords:
 !!! warning "Experimental implementation"
     This is an experimental feature and may change in future releases.
 """
+function iplot end
 
 # Enables `iplot(PlotData2D(sol))`.
 function iplot(pd::PlotData2DTriangulated;

From da457e4471d586a9cdf716a917ffac15aecfafd7 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Mon, 29 May 2023 19:27:52 +0200
Subject: [PATCH 032/163] move Linux MPI to separate CI job (#1497)

---
 .github/workflows/ci.yml             |  2 +-
 .github/workflows/ci_problematic.yml | 80 ++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/ci_problematic.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 32420c458ba..9e0472d556b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -75,7 +75,7 @@ jobs:
           - misc_part2
           - performance_specializations_part1
           - performance_specializations_part2
-          - mpi
+          # - mpi # moved to ci_problematic.yml
           - threaded
         include:
           - version: '1.8'
diff --git a/.github/workflows/ci_problematic.yml b/.github/workflows/ci_problematic.yml
new file mode 100644
index 00000000000..8c324c51a66
--- /dev/null
+++ b/.github/workflows/ci_problematic.yml
@@ -0,0 +1,80 @@
+name: CI-Problematic
+
+on:
+  push:
+    branches:
+      - main
+    paths-ignore:
+      - 'AUTHORS.md'
+      - 'CITATION.bib'
+      - 'CONTRIBUTING.md'
+      - 'LICENSE.md'
+      - 'NEWS.md'
+      - 'README.md'
+      - '.zenodo.json'
+      - '.github/workflows/benchmark.yml'
+      - '.github/workflows/CompatHelper.yml'
+      - '.github/workflows/TagBot.yml'
+      - 'benchmark/**'
+      - 'docs/**'
+      - 'utils/**'
+  pull_request:
+    paths-ignore:
+      - 'AUTHORS.md'
+      - 'CITATION.bib'
+      - 'CONTRIBUTING.md'
+      - 'LICENSE.md'
+      - 'NEWS.md'
+      - 'README.md'
+      - '.zenodo.json'
+      - '.github/workflows/benchmark.yml'
+      - '.github/workflows/CompatHelper.yml'
+      - '.github/workflows/TagBot.yml'
+      - 'benchmark/**'
+      - 'docs/**'
+      - 'utils/**'
+  workflow_dispatch:
+
+# Cancel redundant CI tests automatically
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test_problematic:
+    if: "!contains(github.event.head_commit.message, 'skip ci')"
+    # We could also include the Julia version as in
+    # name: ${{ matrix.trixi_test }} - ${{ matrix.os }} - Julia ${{ matrix.version }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    # to be more specific. However, that requires us updating the required CI tests whenever we update Julia.
+    name: ${{ matrix.trixi_test }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - '1.8'
+          # - 'nightly'
+        os:
+          - ubuntu-latest
+        arch:
+          - x64
+        trixi_test:
+          - mpi
+    steps:
+      - uses: actions/checkout@v3
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - run: julia -e 'using InteractiveUtils; versioninfo(verbose=true)'
+      - uses: julia-actions/cache@v1
+      - uses: julia-actions/julia-buildpkg@v1
+        env:
+          PYTHON: ""
+      - name: Run tests without coverage
+        uses: julia-actions/julia-runtest@v1
+        with:
+          coverage: false
+        env:
+          PYTHON: ""
+          TRIXI_TEST: ${{ matrix.trixi_test }}

From eca75cf14cdf76e000787587c70baf7cf1aecb90 Mon Sep 17 00:00:00 2001
From: Michael Schlottke-Lakemper <michael@sloede.com>
Date: Mon, 29 May 2023 21:26:39 +0200
Subject: [PATCH 033/163] Add package extension for Makie (#1494)

* Add package extension for Makie

* Add Makie to the [extras] section to make it work for Julia v1.8

* Fix typo

* Apply suggestions from code review

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update Project.toml

* Move Makie-specific code to package extension file

* Unconditionally export `iplot`, `iplot!`

* fix export list

* Add missing `using`

* Use more stuff from Trixi

* Extend coverage tracking to package extensions

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 .github/workflows/ci.yml                      |  2 +-
 Project.toml                                  | 10 ++++++++
 .../recipes_makie.jl => ext/TrixiMakieExt.jl  | 25 +++++++++++++++++++
 src/Trixi.jl                                  | 15 +++++++----
 src/visualization/visualization.jl            |  5 ++++
 5 files changed, 51 insertions(+), 6 deletions(-)
 rename src/visualization/recipes_makie.jl => ext/TrixiMakieExt.jl (95%)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9e0472d556b..a9b5239797c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -121,7 +121,7 @@ jobs:
           TRIXI_TEST: ${{ matrix.trixi_test }}
       - uses: julia-actions/julia-processcoverage@v1
         with:
-          directories: src,examples
+          directories: src,examples,ext
       - uses: codecov/codecov-action@v3
         with:
           file: ./lcov.info
diff --git a/Project.toml b/Project.toml
index b30521e5196..466b8065d2c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -42,6 +42,12 @@ Triangulate = "f7e6ffb2-c36d-4f8f-a77e-16e897189344"
 TriplotBase = "981d1d27-644d-49a2-9326-4793e63143c3"
 TriplotRecipes = "808ab39a-a642-4abf-81ff-4cb34ebbffa3"
 
+[weakdeps]
+Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
+
+[extensions]
+TrixiMakieExt = "Makie"
+
 [compat]
 CodeTracking = "1.0.5"
 ConstructionBase = "1.3"
@@ -53,6 +59,7 @@ HDF5 = "0.14, 0.15, 0.16"
 IfElse = "0.1"
 LinearMaps = "2.7, 3.0"
 LoopVectorization = "0.12.118"
+Makie = "0.19"
 MPI = "0.20"
 MuladdMacro = "0.2.2"
 Octavian = "0.3.5"
@@ -78,3 +85,6 @@ Triangulate = "2.0"
 TriplotBase = "0.1"
 TriplotRecipes = "0.1"
 julia = "1.8"
+
+[extras]
+Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
diff --git a/src/visualization/recipes_makie.jl b/ext/TrixiMakieExt.jl
similarity index 95%
rename from src/visualization/recipes_makie.jl
rename to ext/TrixiMakieExt.jl
index aef713d5d9c..c4c5380cfaf 100644
--- a/src/visualization/recipes_makie.jl
+++ b/ext/TrixiMakieExt.jl
@@ -1,3 +1,26 @@
+# Package extension for adding Makie-based features to Trixi.jl
+module TrixiMakieExt
+
+# Required for visualization code
+# We do not check `isdefined(Base, :get_extension)` since Julia v1.9.0
+# does not load package extensions when their dependency is loaded from
+# the main environment
+if VERSION >= v"1.9.1"
+  using Makie: Makie, GeometryBasics
+else
+  using ..Makie: Makie, GeometryBasics
+end
+
+# Use all exported symbols to avoid having to rewrite `recipes_makie.jl`
+using Trixi
+
+# Use additional symbols that are not exported
+using Trixi: PlotData2DTriangulated, TrixiODESolution, PlotDataSeries, ScalarData, @muladd,
+             wrap_array_native, mesh_equations_solver_cache
+
+# Import functions such that they can be extended with new methods
+import Trixi: iplot, iplot!
+
 # By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
 # Since these FMAs can increase the performance of many numerical algorithms,
 # we need to opt-in explicitly.
@@ -381,3 +404,5 @@ end
 
 
 end # @muladd
+
+end
diff --git a/src/Trixi.jl b/src/Trixi.jl
index a8a2c0ad128..ea09705b5ff 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -245,7 +245,8 @@ export DGMulti, DGMultiBasis, estimate_dt, DGMultiMesh, GaussSBP
 export ViscousFormulationBassiRebay1, ViscousFormulationLocalDG
 
 # Visualization-related exports
-export PlotData1D, PlotData2D, ScalarPlotData2D, getmesh, adapt_to_mesh_level!, adapt_to_mesh_level
+export PlotData1D, PlotData2D, ScalarPlotData2D, getmesh, adapt_to_mesh_level!, adapt_to_mesh_level,
+       iplot, iplot!
 
 function __init__()
   init_mpi()
@@ -257,10 +258,14 @@ function __init__()
     using .Plots: Plots
   end
 
-  @require Makie="ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" begin
-    include("visualization/recipes_makie.jl")
-    using .Makie: Makie, GeometryBasics
-    export iplot, iplot! # interactive plot
+  # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl
+  # We do not check `isdefined(Base, :get_extension)` since Julia v1.9.0
+  # does not load package extensions when their dependency is loaded from
+  # the main environment
+  @static if !(VERSION >= v"1.9.1")
+    @require Makie="ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" begin
+      include("../ext/TrixiMakieExt.jl")
+    end
   end
 
   @require Flux="587475ba-b771-5e3f-ad9e-33799f191a9c" begin
diff --git a/src/visualization/visualization.jl b/src/visualization/visualization.jl
index c0091efebd5..5d7795571fa 100644
--- a/src/visualization/visualization.jl
+++ b/src/visualization/visualization.jl
@@ -8,4 +8,9 @@ include("types.jl")
 include("utilities.jl")
 include("recipes_plots.jl")
 
+# Add function definitions here such that they can be exported from Trixi.jl and extended in the
+# TrixiMakieExt package extension or by the Makie-specific code loaded by Requires.jl
+function iplot end
+function iplot! end
+
 end # @muladd

From 1cdd6cf91ce5238bd1408a6036db28a267819b03 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 29 May 2023 21:27:47 +0200
Subject: [PATCH 034/163] Bump crate-ci/typos from 1.14.9 to 1.14.11 (#1498)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.14.9 to 1.14.11.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.14.9...v1.14.11)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/SpellCheck.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index 09612788f59..9e27ce20d06 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v3
       - name: Check spelling
-        uses: crate-ci/typos@v1.14.9
+        uses: crate-ci/typos@v1.14.11

From 9ab2171145f0234f2c237222640f85821fd51f61 Mon Sep 17 00:00:00 2001
From: Michael Schlottke-Lakemper <michael@sloede.com>
Date: Tue, 30 May 2023 05:51:32 +0200
Subject: [PATCH 035/163] Bump version number to v0.5.26

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 466b8065d2c..93ca79bba8b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.26-pre"
+version = "0.5.26"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From b014374dcd7e586014916f59eebaf6726a1a537e Mon Sep 17 00:00:00 2001
From: Michael Schlottke-Lakemper <michael@sloede.com>
Date: Tue, 30 May 2023 05:52:02 +0200
Subject: [PATCH 036/163] Set development version v0.5.27-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 93ca79bba8b..688f8c89cf1 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.26"
+version = "0.5.27-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From e73b68de76d362936823243e1a170472a860a735 Mon Sep 17 00:00:00 2001
From: Michael Schlottke-Lakemper <michael@sloede.com>
Date: Tue, 30 May 2023 12:04:24 +0200
Subject: [PATCH 037/163] Help user when trying to run functions from
 not-yet-loaded extension (#1499)

---
 src/Trixi.jl               |  2 ++
 src/auxiliary/auxiliary.jl | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/src/Trixi.jl b/src/Trixi.jl
index ea09705b5ff..bb4074a1b5e 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -253,6 +253,8 @@ function __init__()
 
   init_p4est()
 
+  register_error_hints()
+
   # Enable features that depend on the availability of the Plots package
   @require Plots="91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin
     using .Plots: Plots
diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl
index 4958e0d1fdc..4de743e93fe 100644
--- a/src/auxiliary/auxiliary.jl
+++ b/src/auxiliary/auxiliary.jl
@@ -332,4 +332,29 @@ macro autoinfiltrate(condition = true)
 end
 
 
+# Use the *experimental* feature in `Base` to add error hints for specific errors. We use it to
+# warn users in case they try to execute functions that are extended in package extensions which
+# have not yet been loaded.
+#
+# Reference: https://docs.julialang.org/en/v1/base/base/#Base.Experimental.register_error_hint
+function register_error_hints()
+  # We follow the advice in the docs and gracefully exit without doing anything if the experimental
+  # features gets silently removed.
+  if !isdefined(Base.Experimental, :register_error_hint)
+    return nothing
+  end
+
+  Base.Experimental.register_error_hint(MethodError) do io, exc, argtypes, kwargs
+    if exc.f in [iplot, iplot!] && isempty(methods(exc.f))
+      print(io, "\n$(exc.f) has no methods yet. It is part of a plotting extension of Trixi.jl " *
+                "that relies on Makie being loaded.\n" *
+                "To activate the extension, execute `using Makie`, `using CairoMakie`, " *
+                "`using GLMakie`, or load any other package that also uses Makie.")
+    end
+  end
+
+  return nothing
+end
+
+
 end # @muladd

From 7d7e975c0a161f11eb450ebfaa7ec512e9c123e8 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Tue, 30 May 2023 14:23:53 +0200
Subject: [PATCH 038/163] move CI to Julia 1.9 (#1316)

* check CI on Julia 1.9

* use the v1.9 stable release

* update dgmulti test (roundoff level changes) (#1449)

* use --heap-size-hint=1G for MPI runs in CI

* adjust tolerance for one DGMulti test

* Revert "move Linux MPI to separate CI job (#1497)"

This reverts commit da457e4471d586a9cdf716a917ffac15aecfafd7.

* update to Julia v1.9

* use package extension on Julia v1.9.0

* Apply suggestions from code review

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

---------

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Jesse Chan <1156048+jlchan@users.noreply.github.com>
---
 .github/workflows/Documenter.yml     |  2 +-
 .github/workflows/benchmark.yml      |  2 +-
 .github/workflows/ci.yml             | 15 ++++--
 .github/workflows/ci_problematic.yml | 80 ----------------------------
 README.md                            |  2 +-
 docs/src/development.md              |  8 +--
 docs/src/index.md                    |  2 +-
 ext/TrixiMakieExt.jl                 |  6 +--
 src/Trixi.jl                         |  5 +-
 test/runtests.jl                     |  5 +-
 test/test_dgmulti_2d.jl              |  5 +-
 11 files changed, 27 insertions(+), 105 deletions(-)
 delete mode 100644 .github/workflows/ci_problematic.yml

diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml
index 5d9632b1653..6b557960c89 100644
--- a/.github/workflows/Documenter.yml
+++ b/.github/workflows/Documenter.yml
@@ -36,7 +36,7 @@ jobs:
       - uses: actions/checkout@v3
       - uses: julia-actions/setup-julia@v1
         with:
-          version: '1.8'
+          version: '1.9'
           show-versioninfo: true
       - uses: julia-actions/julia-buildpkg@v1
         env:
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 01a6c99e843..c5c95558c8c 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -11,7 +11,7 @@ jobs:
         os:
           - ubuntu-latest
         version:
-          - '1.8'
+          - '1.9'
         arch:
           - x64
     steps:
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a9b5239797c..b0a2c93db3c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -52,7 +52,8 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.8'
+          - '1.9'
+          # - '~1.9.0-0' # including development versions
           # - 'nightly'
         os:
           - ubuntu-latest
@@ -75,22 +76,26 @@ jobs:
           - misc_part2
           - performance_specializations_part1
           - performance_specializations_part2
-          # - mpi # moved to ci_problematic.yml
+          - mpi
           - threaded
         include:
           - version: '1.8'
+            os: ubuntu-latest
+            arch: x64
+            trixi_test: threaded_legacy
+          - version: '1.9'
             os: macOS-latest
             arch: x64
             trixi_test: mpi
-          - version: '1.8'
+          - version: '1.9'
             os: macOS-latest
             arch: x64
             trixi_test: threaded
-          - version: '1.8'
+          - version: '1.9'
             os: windows-latest
             arch: x64
             trixi_test: mpi
-          - version: '1.8'
+          - version: '1.9'
             os: windows-latest
             arch: x64
             trixi_test: threaded
diff --git a/.github/workflows/ci_problematic.yml b/.github/workflows/ci_problematic.yml
deleted file mode 100644
index 8c324c51a66..00000000000
--- a/.github/workflows/ci_problematic.yml
+++ /dev/null
@@ -1,80 +0,0 @@
-name: CI-Problematic
-
-on:
-  push:
-    branches:
-      - main
-    paths-ignore:
-      - 'AUTHORS.md'
-      - 'CITATION.bib'
-      - 'CONTRIBUTING.md'
-      - 'LICENSE.md'
-      - 'NEWS.md'
-      - 'README.md'
-      - '.zenodo.json'
-      - '.github/workflows/benchmark.yml'
-      - '.github/workflows/CompatHelper.yml'
-      - '.github/workflows/TagBot.yml'
-      - 'benchmark/**'
-      - 'docs/**'
-      - 'utils/**'
-  pull_request:
-    paths-ignore:
-      - 'AUTHORS.md'
-      - 'CITATION.bib'
-      - 'CONTRIBUTING.md'
-      - 'LICENSE.md'
-      - 'NEWS.md'
-      - 'README.md'
-      - '.zenodo.json'
-      - '.github/workflows/benchmark.yml'
-      - '.github/workflows/CompatHelper.yml'
-      - '.github/workflows/TagBot.yml'
-      - 'benchmark/**'
-      - 'docs/**'
-      - 'utils/**'
-  workflow_dispatch:
-
-# Cancel redundant CI tests automatically
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  test_problematic:
-    if: "!contains(github.event.head_commit.message, 'skip ci')"
-    # We could also include the Julia version as in
-    # name: ${{ matrix.trixi_test }} - ${{ matrix.os }} - Julia ${{ matrix.version }} - ${{ matrix.arch }} - ${{ github.event_name }}
-    # to be more specific. However, that requires us updating the required CI tests whenever we update Julia.
-    name: ${{ matrix.trixi_test }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        version:
-          - '1.8'
-          # - 'nightly'
-        os:
-          - ubuntu-latest
-        arch:
-          - x64
-        trixi_test:
-          - mpi
-    steps:
-      - uses: actions/checkout@v3
-      - uses: julia-actions/setup-julia@v1
-        with:
-          version: ${{ matrix.version }}
-          arch: ${{ matrix.arch }}
-      - run: julia -e 'using InteractiveUtils; versioninfo(verbose=true)'
-      - uses: julia-actions/cache@v1
-      - uses: julia-actions/julia-buildpkg@v1
-        env:
-          PYTHON: ""
-      - name: Run tests without coverage
-        uses: julia-actions/julia-runtest@v1
-        with:
-          coverage: false
-        env:
-          PYTHON: ""
-          TRIXI_TEST: ${{ matrix.trixi_test }}
diff --git a/README.md b/README.md
index 509435923a4..ccd70b6daf8 100644
--- a/README.md
+++ b/README.md
@@ -64,7 +64,7 @@ installation and postprocessing procedures. Its features include:
 ## Installation
 If you have not yet installed Julia, please [follow the instructions for your
 operating system](https://julialang.org/downloads/platform/). Trixi.jl works
-with Julia v1.8.
+with Julia v1.8 and newer. We recommend using the latest stable release of Julia.
 
 ### For users
 Trixi.jl and its related tools are registered Julia packages. Hence, you
diff --git a/docs/src/development.md b/docs/src/development.md
index e6a24f0cf06..cead713d0d1 100644
--- a/docs/src/development.md
+++ b/docs/src/development.md
@@ -18,7 +18,7 @@ package, which tracks changed files and re-loads them automatically. Therefore,
 it is *highly recommended* to first install Revise with the following command in Julia:
 To enter the package REPL mode, press `]` in the standard Julia REPL mode. Then, execute
 ```julia-repl
-(@v1.8) pkg> add Revise
+(@v1.9) pkg> add Revise
 ```
 Now you are able to run Trixi.jl from the REPL, change Trixi.jl code between runs,
 **and** enjoy the advantages of the compilation cache! Before you start using
@@ -28,7 +28,7 @@ Another recommended package for working from the REPL is
 [OhMyREPL.jl](https://github.com/KristofferC/OhMyREPL.jl). It can be installed
 by running
 ```julia-repl
-(@v1.8) pkg> add OhMyREPL
+(@v1.9) pkg> add OhMyREPL
 ```
 and adds syntax highlighting, bracket highlighting, and other helpful
 improvements for using Julia interactively. To automatically use OhMyREPL when
@@ -244,7 +244,7 @@ see the call stack, and execute statements.
 
 The package can be installed in the Julia REPL by executing
 ```julia-repl
-(@v1.8) pkg> add Infiltrator
+(@v1.9) pkg> add Infiltrator
 ```
 
 To load the package in the Julia REPL execute
@@ -328,5 +328,5 @@ in Trixi2Vtk.
 To use a locally modified Trixi.jl clone instead of a Trixi.jl release, one can tell Pkg
 to use the local source code of Trixi.jl instead of a registered version by running
 ```julia-repl
-(@v1.8) pkg> develop path/to/Trixi.jl
+(@v1.9) pkg> develop path/to/Trixi.jl
 ```
diff --git a/docs/src/index.md b/docs/src/index.md
index 1ee05860b67..3af785bc681 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -58,7 +58,7 @@ installation and postprocessing procedures. Its features include:
 ## Installation
 If you have not yet installed Julia, please [follow the instructions for your
 operating system](https://julialang.org/downloads/platform/). Trixi.jl works
-with Julia v1.8.
+with Julia v1.8 and newer. We recommend using the latest stable release of Julia.
 
 ### For users
 Trixi.jl and its related tools are registered Julia packages. Hence, you
diff --git a/ext/TrixiMakieExt.jl b/ext/TrixiMakieExt.jl
index c4c5380cfaf..4618048556b 100644
--- a/ext/TrixiMakieExt.jl
+++ b/ext/TrixiMakieExt.jl
@@ -2,12 +2,10 @@
 module TrixiMakieExt
 
 # Required for visualization code
-# We do not check `isdefined(Base, :get_extension)` since Julia v1.9.0
-# does not load package extensions when their dependency is loaded from
-# the main environment
-if VERSION >= v"1.9.1"
+if isdefined(Base, :get_extension)
   using Makie: Makie, GeometryBasics
 else
+  # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl
   using ..Makie: Makie, GeometryBasics
 end
 
diff --git a/src/Trixi.jl b/src/Trixi.jl
index bb4074a1b5e..76cd74edcaf 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -261,10 +261,7 @@ function __init__()
   end
 
   # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl
-  # We do not check `isdefined(Base, :get_extension)` since Julia v1.9.0
-  # does not load package extensions when their dependency is loaded from
-  # the main environment
-  @static if !(VERSION >= v"1.9.1")
+  @static if !isdefined(Base, :get_extension)
     @require Makie="ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" begin
       include("../ext/TrixiMakieExt.jl")
     end
diff --git a/test/runtests.jl b/test/runtests.jl
index e56c4d56d0c..f76811dddbf 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -25,13 +25,14 @@ const TRIXI_NTHREADS   = clamp(Sys.CPU_THREADS, 2, 3)
     cmd = string(Base.julia_cmd())
     coverage = occursin("--code-coverage", cmd) && !occursin("--code-coverage=none", cmd)
     if !(coverage && Sys.iswindows()) && !(coverage && Sys.islinux())
+      # We provide a `--heap-size-hint` to avoid/reduce out-of-memory errors during CI testing
       mpiexec() do cmd
-        run(`$cmd -n $TRIXI_MPI_NPROCS $(Base.julia_cmd()) --threads=1 --check-bounds=yes $(abspath("test_mpi.jl"))`)
+        run(`$cmd -n $TRIXI_MPI_NPROCS $(Base.julia_cmd()) --threads=1 --check-bounds=yes --heap-size-hint=1G $(abspath("test_mpi.jl"))`)
       end
     end
   end
 
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "threaded"
+  @time if TRIXI_TEST == "all" || TRIXI_TEST == "threaded" || TRIXI_TEST == "threaded_legacy"
     # Do a dummy `@test true`:
     # If the process errors out the testset would error out as well,
     # cf. https://github.com/JuliaParallel/MPI.jl/pull/391
diff --git a/test/test_dgmulti_2d.jl b/test/test_dgmulti_2d.jl
index 302dbebc8ff..861e30045ce 100644
--- a/test/test_dgmulti_2d.jl
+++ b/test/test_dgmulti_2d.jl
@@ -95,8 +95,9 @@ isdir(outdir) && rm(outdir, recursive=true)
   @trixi_testset "elixir_euler_curved.jl (Quadrilateral elements, GaussSBP, flux differencing)" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_curved.jl"),
       approximation_type = GaussSBP(),
-      l2 = [3.4666312082010235e-6, 3.439277448411873e-6, 3.439277448308561e-6, 1.0965598425655705e-5],
-      linf = [1.1327280369899384e-5, 1.1343911921146699e-5, 1.1343911907157889e-5, 3.6795826181545976e-5]
+      l2 = [3.4666312079259457e-6, 3.4392774480368986e-6, 3.439277447953705e-6, 1.0965598424665836e-5],
+      linf = [1.1327280377004811e-5, 1.1343911926253725e-5, 1.1343911906935844e-5, 3.679582619220412e-5],
+      rtol = 2 * sqrt(eps())
     )
   end
 

From 390a2fce97cba23491bb84ad173f2dd35d66633b Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Tue, 30 May 2023 16:55:05 +0200
Subject: [PATCH 039/163] set version to v0.5.27

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 688f8c89cf1..72393919290 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.27-pre"
+version = "0.5.27"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 29e0ba5f82918276b90a8f2eda537070358c6842 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Tue, 30 May 2023 16:55:25 +0200
Subject: [PATCH 040/163] set development version to v0.5.28-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 72393919290..e8922d26391 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.27"
+version = "0.5.28-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 4f358017f76edc261cc28eff2a6947b036c0a89e Mon Sep 17 00:00:00 2001
From: ArseniyKholod <119304909+ArseniyKholod@users.noreply.github.com>
Date: Thu, 1 Jun 2023 19:52:29 +0200
Subject: [PATCH 041/163] TreeMesh3D error message (#1491)

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 src/meshes/tree_mesh.jl | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/meshes/tree_mesh.jl b/src/meshes/tree_mesh.jl
index 37ab3879e3e..9872d1a590a 100644
--- a/src/meshes/tree_mesh.jl
+++ b/src/meshes/tree_mesh.jl
@@ -111,6 +111,10 @@ function TreeMesh(coordinates_min::NTuple{NDIMS,Real}, coordinates_max::NTuple{N
 
   # TODO: MPI, create nice interface for a parallel tree/mesh
   if mpi_isparallel()
+    if mpi_isroot() && NDIMS == 3
+      println(stderr, "ERROR: TreeMesh3D does not support parallel execution with MPI")
+      MPI.Abort(mpi_comm(), 1)
+    end
     TreeType = ParallelTree{NDIMS}
   else
     TreeType = SerialTree{NDIMS}

From 6bb298dd5491387afc8319c42835c77b663a548a Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Thu, 1 Jun 2023 22:50:20 -0500
Subject: [PATCH 042/163] Add parabolic terms for `P4estMesh{2}`  (#1490)

* generalize function signatures to P4estMesh

* add specializations for P4estMesh


d

* add normals

* add surface integrals

* fix type ambiguity

* generalizing `apply_jacobian!` to P4estMesh

* resolving type ambiguity with apply_jacobian!


d

* `apply_jacobian!` -> `apply_jacobian_parabolic!`

* `apply_jacobian!` -> `apply_jacobian_parabolic!`

* switch to `apply_jacobian_parabolic!`

* Update src/solvers/dgsem_tree/dg_1d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* missed one

* draft of prolong2interfaces and calc_interface_flux

* cache -> cache_parabolic

* adding prolong2boundaries! and calc_boundary_flux_gradients! back

* remove todo

* variable renaming

* extending TreeMesh parabolic functions to P4estMesh

* adding elixir

* comments

* add prolong2boundaries! (untested)

* update test

* fix CI

f

* Update src/solvers/dgsem_p4est/dg_2d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/solvers/dgsem_p4est/dg_2d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* add "no mortars" check

* add curved elixir

* fix gradient bug

* add curved test

* Apply suggestions from code review

Co-authored-by: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* add comment on mapping

* reuse P4estMesh{2} code

* fix += for muladd

* Update examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic_curved.jl

Co-authored-by: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com>

* comment

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
Co-authored-by: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
---
 .../elixir_advection_diffusion_periodic.jl    |  83 ++++
 ...xir_advection_diffusion_periodic_curved.jl |  88 ++++
 src/solvers/dgsem_p4est/dg.jl                 |   3 +
 src/solvers/dgsem_p4est/dg_2d_parabolic.jl    | 411 ++++++++++++++++++
 src/solvers/dgsem_tree/dg_2d_parabolic.jl     |  20 +-
 test/test_parabolic_2d.jl                     |  16 +
 6 files changed, 612 insertions(+), 9 deletions(-)
 create mode 100644 examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic.jl
 create mode 100644 examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic_curved.jl
 create mode 100644 src/solvers/dgsem_p4est/dg_2d_parabolic.jl

diff --git a/examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic.jl b/examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic.jl
new file mode 100644
index 00000000000..1cd075e84ea
--- /dev/null
+++ b/examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic.jl
@@ -0,0 +1,83 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linear advection-diffusion equation
+
+diffusivity() = 5.0e-2
+advection_velocity = (1.0, 0.0)
+equations = LinearScalarAdvectionEquation2D(advection_velocity)
+equations_parabolic = LaplaceDiffusion2D(diffusivity(), equations)
+
+function x_trans_periodic(x, domain_length=SVector(2 * pi), center=SVector(0.0))
+    x_normalized = x .- center
+    x_shifted = x_normalized .% domain_length
+    x_offset = ((x_shifted .< -0.5 * domain_length) - (x_shifted .> 0.5 * domain_length)) .* domain_length
+    return center + x_shifted + x_offset
+end
+
+# Define initial condition (copied from "examples/tree_1d_dgsem/elixir_advection_diffusion.jl")
+function initial_condition_diffusive_convergence_test(x, t, equation::LinearScalarAdvectionEquation2D)
+    # Store translated coordinate for easy use of exact solution
+    # Assumes that advection_velocity[2] = 0 (effectively that we are solving a 1D equation)
+    x_trans = x_trans_periodic(x[1] - equation.advection_velocity[1] * t)
+    
+    nu = diffusivity()
+    c = 0.0
+    A = 1.0
+    omega = 1.0
+    scalar = c + A * sin(omega * sum(x_trans)) * exp(-nu * omega^2 * t)
+    return SVector(scalar)
+end
+initial_condition = initial_condition_diffusive_convergence_test
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs)
+
+coordinates_min = (-pi, -pi) # minimum coordinates (min(x), min(y))
+coordinates_max = ( pi,  pi) # maximum coordinates (max(x), max(y))
+
+trees_per_dimension = (4, 4)
+mesh = P4estMesh(trees_per_dimension,
+                 polydeg=3, initial_refinement_level=2,
+                 coordinates_min=coordinates_min, coordinates_max=coordinates_max,
+                 periodicity=true)
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolicParabolic(mesh,
+                                             (equations, equations_parabolic),
+                                             initial_condition, solver)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span `tspan`
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan);
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+# The AliveCallback prints short status information in regular intervals
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback)
+
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+time_int_tol = 1.0e-11
+sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
+            ode_default_options()..., callback=callbacks)
+
+# Print the timer summary
+summary_callback()
diff --git a/examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic_curved.jl b/examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic_curved.jl
new file mode 100644
index 00000000000..b438fb8a29c
--- /dev/null
+++ b/examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic_curved.jl
@@ -0,0 +1,88 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linear advection-diffusion equation
+
+diffusivity() = 5.0e-2
+advection_velocity = (1.0, 0.0)
+equations = LinearScalarAdvectionEquation2D(advection_velocity)
+equations_parabolic = LaplaceDiffusion2D(diffusivity(), equations)
+
+function x_trans_periodic(x, domain_length=SVector(2 * pi), center=SVector(0.0))
+    x_normalized = x .- center
+    x_shifted = x_normalized .% domain_length
+    x_offset = ((x_shifted .< -0.5 * domain_length) - (x_shifted .> 0.5 * domain_length)) .* domain_length
+    return center + x_shifted + x_offset
+end
+
+# Define initial condition (copied from "examples/tree_1d_dgsem/elixir_advection_diffusion.jl")
+function initial_condition_diffusive_convergence_test(x, t, equation::LinearScalarAdvectionEquation2D)
+    # Store translated coordinate for easy use of exact solution
+    # Assumes that advection_velocity[2] = 0 (effectively that we are solving a 1D equation)
+    x_trans = x_trans_periodic(x[1] - equation.advection_velocity[1] * t)
+    
+    nu = diffusivity()
+    c = 0.0
+    A = 1.0
+    omega = 1.0
+    scalar = c + A * sin(omega * sum(x_trans)) * exp(-nu * omega^2 * t)
+    return SVector(scalar)
+end
+initial_condition = initial_condition_diffusive_convergence_test
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs)
+
+# This maps the domain [-1, 1]^2 to [-pi, pi]^2 while also 
+# introducing a curved warping to interior nodes. 
+function mapping(xi, eta)
+  x = xi  + 0.1 * sin(pi * xi) * sin(pi * eta)
+  y = eta + 0.1 * sin(pi * xi) * sin(pi * eta)
+  return pi * SVector(x, y)
+end
+
+trees_per_dimension = (4, 4)
+mesh = P4estMesh(trees_per_dimension,
+                 polydeg=3, initial_refinement_level=2,                 
+                 mapping=mapping,
+                 periodicity=true)
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolicParabolic(mesh,
+                                             (equations, equations_parabolic),
+                                             initial_condition, solver)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span `tspan`
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan);
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+# The AliveCallback prints short status information in regular intervals
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback)
+
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+time_int_tol = 1.0e-11
+sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
+            ode_default_options()..., callback=callbacks)
+
+# Print the timer summary
+summary_callback()
diff --git a/src/solvers/dgsem_p4est/dg.jl b/src/solvers/dgsem_p4est/dg.jl
index 22f847dbf3e..dabaa896fbf 100644
--- a/src/solvers/dgsem_p4est/dg.jl
+++ b/src/solvers/dgsem_p4est/dg.jl
@@ -46,7 +46,10 @@ end
 
 
 include("containers.jl")
+
 include("dg_2d.jl")
+include("dg_2d_parabolic.jl")
+
 include("dg_3d.jl")
 include("dg_parallel.jl")
 
diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
new file mode 100644
index 00000000000..7ddb83f97db
--- /dev/null
+++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
@@ -0,0 +1,411 @@
+# This method is called when a SemidiscretizationHyperbolicParabolic is constructed.
+# It constructs the basic `cache` used throughout the simulation to compute
+# the RHS etc.
+function create_cache_parabolic(mesh::P4estMesh, equations_hyperbolic::AbstractEquations,
+                                equations_parabolic::AbstractEquationsParabolic,
+                                dg::DG, parabolic_scheme, RealT, uEltype)
+
+  balance!(mesh)
+
+  elements   = init_elements(mesh, equations_hyperbolic, dg.basis, uEltype)
+  interfaces = init_interfaces(mesh, equations_hyperbolic, dg.basis, elements)
+  boundaries = init_boundaries(mesh, equations_hyperbolic, dg.basis, elements)
+ 
+  n_vars = nvariables(equations_hyperbolic)
+  n_elements = nelements(elements)
+  n_nodes = nnodes(dg.basis) # nodes in one direction
+  u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_elements)
+  gradients = ntuple(_ -> similar(u_transformed), ndims(mesh))
+  flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh))
+
+  cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed)
+
+  return cache
+end
+
+function calc_gradient!(gradients, u_transformed, t,
+                        mesh::P4estMesh{2}, equations_parabolic,
+                        boundary_conditions_parabolic, dg::DG, 
+                        cache, cache_parabolic)
+
+  gradients_x, gradients_y = gradients
+
+  # Reset du
+  @trixi_timeit timer() "reset gradients" begin
+    reset_du!(gradients_x, dg, cache)
+    reset_du!(gradients_y, dg, cache)
+  end
+
+  # Calculate volume integral
+  @trixi_timeit timer() "volume integral" begin
+    (; derivative_dhat) = dg.basis
+    (; contravariant_vectors) = cache.elements
+    
+    @threaded for element in eachelement(dg, cache)
+
+      # Calculate gradients with respect to reference coordinates in one element
+      for j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, element)
+
+        for ii in eachnode(dg)
+          multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i], u_node, equations_parabolic, dg, ii, j, element)
+        end
+
+        for jj in eachnode(dg)
+          multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j], u_node, equations_parabolic, dg, i, jj, element)
+        end
+      end
+
+      # now that the reference coordinate gradients are computed, transform them node-by-node to physical gradients
+      # using the contravariant vectors
+      for j in eachnode(dg), i in eachnode(dg)
+        Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element)
+        Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+
+        gradients_reference_1 = get_node_vars(gradients_x, equations_parabolic, dg, i, j, element)
+        gradients_reference_2 = get_node_vars(gradients_y, equations_parabolic, dg, i, j, element)
+
+        # note that the contravariant vectors are transposed compared with computations of flux
+        # divergences in `calc_volume_integral!`. See 
+        # https://github.com/trixi-framework/Trixi.jl/pull/1490#discussion_r1213345190 
+        # for a more detailed discussion. 
+        gradient_x_node = Ja11 * gradients_reference_1 + Ja21 * gradients_reference_2
+        gradient_y_node = Ja12 * gradients_reference_1 + Ja22 * gradients_reference_2
+
+        set_node_vars!(gradients_x, gradient_x_node, equations_parabolic, dg, i, j, element)
+        set_node_vars!(gradients_y, gradient_y_node, equations_parabolic, dg, i, j, element)
+      end
+
+    end
+  end
+
+  # Prolong solution to interfaces
+  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
+    cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg)
+
+  # Calculate interface fluxes for the gradient. This reuses P4est `calc_interface_flux!` along with a 
+  # specialization for AbstractEquationsParabolic.
+  @trixi_timeit timer() "interface flux" calc_interface_flux!(cache_parabolic.elements.surface_flux_values, 
+                                                              mesh, False(), # False() = no nonconservative terms
+                                                              equations_parabolic, dg.surface_integral, dg, 
+                                                              cache_parabolic)
+
+  # Prolong solution to boundaries
+  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
+    cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg)
+
+  # Calculate boundary fluxes
+  @trixi_timeit timer() "boundary flux" calc_boundary_flux_gradients!(
+    cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic,
+    dg.surface_integral, dg)
+
+  # TODO: parabolic; mortars
+  @assert nmortars(dg, cache) == 0
+
+  # Calculate surface integrals
+  @trixi_timeit timer() "surface integral" begin
+    (; boundary_interpolation) = dg.basis
+    (; surface_flux_values) = cache_parabolic.elements
+    (; contravariant_vectors) = cache.elements
+
+    # Access the factors only once before beginning the loop to increase performance.
+    # We also use explicit assignments instead of `+=` to let `@muladd` turn these
+    # into FMAs (see comment at the top of the file).
+    factor_1 = boundary_interpolation[1,          1]
+    factor_2 = boundary_interpolation[nnodes(dg), 2]
+    @threaded for element in eachelement(dg, cache)
+      for l in eachnode(dg)
+        for v in eachvariable(equations_parabolic)
+
+          # Compute x-component of gradients
+
+          # surface at -x
+          normal_direction_x, _ = get_normal_direction(1, contravariant_vectors, 1, l, element)
+          gradients_x[v, 1,          l, element] = (
+            gradients_x[v, 1,          l, element] + surface_flux_values[v, l, 1, element] * factor_1 * normal_direction_x)
+
+          # surface at +x
+          normal_direction_x, _ = get_normal_direction(2, contravariant_vectors, nnodes(dg), l, element)
+          gradients_x[v, nnodes(dg), l, element] = (
+            gradients_x[v, nnodes(dg), l, element] + surface_flux_values[v, l, 2, element] * factor_2 * normal_direction_x)
+
+          # surface at -y
+          normal_direction_x, _ = get_normal_direction(3, contravariant_vectors, l, 1, element)
+          gradients_x[v, l, 1,          element] = (
+            gradients_x[v, l, 1,          element] + surface_flux_values[v, l, 3, element] * factor_1 * normal_direction_x)
+
+          # surface at +y
+          normal_direction_x, _ = get_normal_direction(4, contravariant_vectors, l, nnodes(dg), element)
+          gradients_x[v, l, nnodes(dg), element] = (
+            gradients_x[v, l, nnodes(dg), element] + surface_flux_values[v, l, 4, element] * factor_2 * normal_direction_x)
+
+          # Compute y-component of gradients
+
+          # surface at -x
+          _, normal_direction_y = get_normal_direction(1, contravariant_vectors, 1, l, element)
+          gradients_y[v, 1,          l, element] = (
+            gradients_y[v, 1,          l, element] + surface_flux_values[v, l, 1, element] * factor_1 * normal_direction_y)
+
+          # surface at +x
+          _, normal_direction_y = get_normal_direction(2, contravariant_vectors, nnodes(dg), l, element)
+          gradients_y[v, nnodes(dg), l, element] = (
+            gradients_y[v, nnodes(dg), l, element] + surface_flux_values[v, l, 2, element] * factor_2 * normal_direction_y)
+
+          # surface at -y
+          _, normal_direction_y = get_normal_direction(3, contravariant_vectors, l, 1, element)
+          gradients_y[v, l, 1,          element] = (
+            gradients_y[v, l, 1,          element] + surface_flux_values[v, l, 3, element] * factor_1 * normal_direction_y)
+
+          # surface at +y
+          _, normal_direction_y = get_normal_direction(4, contravariant_vectors, l, nnodes(dg), element)
+          gradients_y[v, l, nnodes(dg), element] = (
+            gradients_y[v, l, nnodes(dg), element] + surface_flux_values[v, l, 4, element] * factor_2 * normal_direction_y)
+        end
+      end
+    end
+  end
+
+  # Apply Jacobian from mapping to reference element
+  @trixi_timeit timer() "Jacobian" begin
+    apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg, cache_parabolic)
+    apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg, cache_parabolic)
+  end
+
+  return nothing
+end
+
+# This version is used for parabolic gradient computations
+@inline function calc_interface_flux!(surface_flux_values, mesh::P4estMesh{2},
+                                      nonconservative_terms::False, 
+                                      equations::AbstractEquationsParabolic,
+                                      surface_integral, dg::DG, cache,
+                                      interface_index, normal_direction,
+                                      primary_node_index, primary_direction_index, primary_element_index,
+                                      secondary_node_index, secondary_direction_index, secondary_element_index)
+  @unpack u = cache.interfaces
+  @unpack surface_flux = surface_integral
+
+  u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, interface_index)
+
+  flux_ = 0.5 * (u_ll + u_rr) # we assume that the gradient computations utilize a central flux
+
+  # Note that we don't flip the sign on the secondondary flux. This is because for parabolic terms,
+  # the normals are not embedded in `flux_` for the parabolic gradient computations. 
+  for v in eachvariable(equations)
+    surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = flux_[v]
+    surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = flux_[v]
+  end
+end
+
+# This is the version used when calculating the divergence of the viscous fluxes
+function calc_volume_integral!(du, flux_viscous,
+                               mesh::P4estMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+                               dg::DGSEM, cache)
+  (; derivative_dhat) = dg.basis
+  (; contravariant_vectors) = cache.elements
+  flux_viscous_x, flux_viscous_y = flux_viscous
+
+  @threaded for element in eachelement(dg, cache)
+    # Calculate volume terms in one element
+    for j in eachnode(dg), i in eachnode(dg)
+      flux1 = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j, element)
+      flux2 = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j, element)
+  
+      # Compute the contravariant flux by taking the scalar product of the
+      # first contravariant vector Ja^1 and the flux vector
+      Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element)
+      contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2
+      for ii in eachnode(dg)
+        multiply_add_to_node_vars!(du, derivative_dhat[ii, i], contravariant_flux1, equations_parabolic, dg, ii, j, element)
+      end
+  
+      # Compute the contravariant flux by taking the scalar product of the
+      # second contravariant vector Ja^2 and the flux vector
+      Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+      contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2
+      for jj in eachnode(dg)
+        multiply_add_to_node_vars!(du, derivative_dhat[jj, j], contravariant_flux2, equations_parabolic, dg, i, jj, element)
+      end
+    end
+  end
+
+  return nothing
+end
+
+
+# This is the version used when calculating the divergence of the viscous fluxes
+# We pass the `surface_integral` argument solely for dispatch
+function prolong2interfaces!(cache_parabolic, flux_viscous,
+                             mesh::P4estMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+                             surface_integral, dg::DG, cache)
+  (; interfaces) = cache_parabolic
+  (; contravariant_vectors) = cache_parabolic.elements 
+  index_range = eachnode(dg)
+  flux_viscous_x, flux_viscous_y = flux_viscous
+
+  @threaded for interface in eachinterface(dg, cache)
+    # Copy solution data from the primary element using "delayed indexing" with
+    # a start value and a step size to get the correct face and orientation.
+    # Note that in the current implementation, the interface will be
+    # "aligned at the primary element", i.e., the index of the primary side
+    # will always run forwards.
+    primary_element = interfaces.neighbor_ids[1, interface]
+    primary_indices = interfaces.node_indices[1, interface]
+    primary_direction = indices2direction(primary_indices)
+
+    i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], index_range)
+    j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], index_range)
+
+    i_primary = i_primary_start
+    j_primary = j_primary_start
+    for i in eachnode(dg)
+
+      # this is the outward normal direction on the primary element
+      normal_direction = get_normal_direction(primary_direction, contravariant_vectors,
+                                              i_primary, j_primary, primary_element)
+
+      for v in eachvariable(equations_parabolic)
+        # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
+        flux_viscous = SVector(flux_viscous_x[v, i_primary, j_primary, primary_element], 
+                               flux_viscous_y[v, i_primary, j_primary, primary_element])
+
+        interfaces.u[1, v, i, interface] = dot(flux_viscous, normal_direction)
+      end
+      i_primary += i_primary_step
+      j_primary += j_primary_step
+    end
+
+    # Copy solution data from the secondary element using "delayed indexing" with
+    # a start value and a step size to get the correct face and orientation.
+    secondary_element = interfaces.neighbor_ids[2, interface]
+    secondary_indices = interfaces.node_indices[2, interface]
+    secondary_direction = indices2direction(secondary_indices)
+
+    i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], index_range)
+    j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], index_range)
+
+    i_secondary = i_secondary_start
+    j_secondary = j_secondary_start
+    for i in eachnode(dg)
+      # This is the outward normal direction on the secondary element.
+      # Here, we assume that normal_direction on the secondary element is 
+      # the negative of normal_direction on the primary element.  
+      normal_direction = get_normal_direction(secondary_direction, contravariant_vectors,
+                                              i_secondary, j_secondary, secondary_element)
+
+      for v in eachvariable(equations_parabolic)
+        # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
+        flux_viscous = SVector(flux_viscous_x[v, i_secondary, j_secondary, secondary_element], 
+                               flux_viscous_y[v, i_secondary, j_secondary, secondary_element])
+        # store the normal flux with respect to the primary normal direction
+        interfaces.u[2, v, i, interface] = -dot(flux_viscous, normal_direction)
+      end
+      i_secondary += i_secondary_step
+      j_secondary += j_secondary_step
+    end
+  end
+
+  return nothing
+end
+
+function calc_interface_flux!(surface_flux_values,
+                              mesh::P4estMesh{2}, equations_parabolic,
+                              dg::DG, cache_parabolic)
+
+  (; neighbor_ids, node_indices) = cache_parabolic.interfaces
+  (; contravariant_vectors) = cache_parabolic.elements
+  index_range = eachnode(dg)
+  index_end = last(index_range)
+
+  @threaded for interface in eachinterface(dg, cache_parabolic)
+    # Get element and side index information on the primary element
+    primary_element = neighbor_ids[1, interface]
+    primary_indices = node_indices[1, interface]
+    primary_direction_index = indices2direction(primary_indices)
+
+    # Create the local i,j indexing on the primary element used to pull normal direction information
+    i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], index_range)
+    j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], index_range)
+
+    i_primary = i_primary_start
+    j_primary = j_primary_start
+
+    # Get element and side index information on the secondary element
+    secondary_element = neighbor_ids[2, interface]
+    secondary_indices = node_indices[2, interface]
+    secondary_direction_index = indices2direction(secondary_indices)
+
+    # Initiate the secondary index to be used in the surface for loop.
+    # This index on the primary side will always run forward but
+    # the secondary index might need to run backwards for flipped sides.
+    if :i_backward in secondary_indices
+      node_secondary = index_end
+      node_secondary_step = -1
+    else
+      node_secondary = 1
+      node_secondary_step = 1
+    end
+
+    for node in eachnode(dg)
+      # We prolong the viscous flux dotted with respect the outward normal on the 
+      # primary element. We assume a BR-1 type of flux.
+      viscous_flux_normal_ll, viscous_flux_normal_rr = 
+        get_surface_node_vars(cache_parabolic.interfaces.u, equations_parabolic, dg, node, interface)
+
+      flux = 0.5 * (viscous_flux_normal_ll + viscous_flux_normal_rr)
+
+      for v in eachvariable(equations_parabolic)
+        surface_flux_values[v, node, primary_direction_index, primary_element] = flux[v]
+        surface_flux_values[v, node_secondary, secondary_direction_index, secondary_element] = -flux[v]
+      end
+
+      # Increment primary element indices to pull the normal direction
+      i_primary += i_primary_step
+      j_primary += j_primary_step
+      # Increment the surface node index along the secondary element
+      node_secondary += node_secondary_step
+    end
+  end
+
+  return nothing
+end
+
+# TODO: parabolic, finish implementing `calc_boundary_flux_gradients!` and `calc_boundary_flux_divergence!`
+function prolong2boundaries!(cache_parabolic, flux_viscous,
+                             mesh::P4estMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+                             surface_integral, dg::DG, cache)
+  (; boundaries) = cache_parabolic
+  (; contravariant_vectors) = cache_parabolic.elements
+  index_range = eachnode(dg)
+
+  flux_viscous_x, flux_viscous_y = flux_viscous
+
+  @threaded for boundary in eachboundary(dg, cache_parabolic)
+    # Copy solution data from the element using "delayed indexing" with
+    # a start value and a step size to get the correct face and orientation.
+    element       = boundaries.neighbor_ids[boundary]
+    node_indices  = boundaries.node_indices[boundary]
+
+    i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range)
+    j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range)
+
+    i_node = i_node_start
+    j_node = j_node_start
+    for i in eachnode(dg)
+      # this is the outward normal direction on the primary element
+      normal_direction = get_normal_direction(primary_direction, contravariant_vectors,
+                        i_node, j_node, primary_element)
+
+      for v in eachvariable(equations_parabolic)
+        flux_viscous = SVector(flux_viscous_x[v, i_primary, j_primary, primary_element], 
+            flux_viscous_y[v, i_primary, j_primary, primary_element])
+
+        boundaries.u[v, i, boundary] = dot(flux_viscous, normal_direction)
+      end
+      i_node += i_node_step
+      j_node += j_node_step
+    end
+  end
+
+  return nothing
+end
diff --git a/src/solvers/dgsem_tree/dg_2d_parabolic.jl b/src/solvers/dgsem_tree/dg_2d_parabolic.jl
index 97bbc7e2633..07146c8d79e 100644
--- a/src/solvers/dgsem_tree/dg_2d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_2d_parabolic.jl
@@ -12,10 +12,11 @@
 #               2. compute f(u, grad(u))
 #               3. compute div(f(u, grad(u))) (i.e., the "regular" rhs! call)
 # boundary conditions will be applied to both grad(u) and div(f(u, grad(u))).
-function rhs_parabolic!(du, u, t, mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+function rhs_parabolic!(du, u, t, mesh::Union{TreeMesh{2}, P4estMesh{2}}, 
+                        equations_parabolic::AbstractEquationsParabolic,
                         initial_condition, boundary_conditions_parabolic, source_terms,
                         dg::DG, parabolic_scheme, cache, cache_parabolic)
-  @unpack u_transformed, gradients, flux_viscous = cache_parabolic
+  (; u_transformed, gradients, flux_viscous) = cache_parabolic
 
   # Convert conservative variables to a form more suitable for viscous flux calculations
   @trixi_timeit timer() "transform variables" transform_variables!(
@@ -85,7 +86,7 @@ end
 # Transform solution variables prior to taking the gradient
 # (e.g., conservative to primitive variables). Defaults to doing nothing.
 # TODO: can we avoid copying data?
-function transform_variables!(u_transformed, u, mesh::TreeMesh{2},
+function transform_variables!(u_transformed, u, mesh::Union{TreeMesh{2}, P4estMesh{2}},
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, parabolic_scheme, cache, cache_parabolic)
   @threaded for element in eachelement(dg, cache)
@@ -245,7 +246,8 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
 end
 
 
-function calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh::TreeMesh{2},
+function calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, 
+                              mesh::Union{TreeMesh{2}, P4estMesh{2}}, 
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, cache, cache_parabolic)
   gradients_x, gradients_y = gradients
@@ -281,20 +283,20 @@ function get_unsigned_normal_vector_2d(direction)
 end
 
 function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic,
-                                      mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+                                      mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations_parabolic::AbstractEquationsParabolic,
                                       surface_integral, dg::DG)
   return nothing
 end
 
 function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic,
-                                        mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+                                        mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations_parabolic::AbstractEquationsParabolic,
                                         surface_integral, dg::DG)
   return nothing
 end
 
 function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::NamedTuple,
-                                      mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic,
-                                      surface_integral, dg::DG)
+                                       mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+                                       surface_integral, dg::DG)
   @unpack surface_flux_values = cache.elements
   @unpack n_boundaries_per_direction = cache.boundaries
 
@@ -586,7 +588,7 @@ end
 # This is because the parabolic fluxes are assumed to be of the form
 #   `du/dt + df/dx = dg/dx + source(x,t)`,
 # where f(u) is the inviscid flux and g(u) is the viscous flux.
-function apply_jacobian_parabolic!(du, mesh::TreeMesh{2},
+function apply_jacobian_parabolic!(du, mesh::Union{TreeMesh{2}, P4estMesh{2}}, 
                                    equations::AbstractEquationsParabolic, dg::DG, cache)
 
   @threaded for element in eachelement(dg, cache)
diff --git a/test/test_parabolic_2d.jl b/test/test_parabolic_2d.jl
index 588f43e4543..b0ac63d4ce9 100644
--- a/test/test_parabolic_2d.jl
+++ b/test/test_parabolic_2d.jl
@@ -184,6 +184,22 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
+  @trixi_testset "P4estMesh2D: elixir_advection_diffusion_periodic.jl" begin
+    @test_trixi_include(joinpath(examples_dir(), "p4est_2d_dgsem", "elixir_advection_diffusion_periodic.jl"),
+      trees_per_dimension = (1, 1), initial_refinement_level = 2, tspan=(0.0, 0.5),
+      l2 = [0.0023754695605828443], 
+      linf = [0.008154128363741964]
+    )
+  end
+
+  @trixi_testset "P4estMesh2D: elixir_advection_diffusion_periodic_curved.jl" begin
+    @test_trixi_include(joinpath(examples_dir(), "p4est_2d_dgsem", "elixir_advection_diffusion_periodic_curved.jl"),
+      trees_per_dimension = (1, 1), initial_refinement_level = 2, tspan=(0.0, 0.5),
+      l2 = [0.012380458938507371], 
+      linf = [0.10860506906472567]
+    )
+  end
+
 end
 
 # Clean up afterwards: delete Trixi.jl output directory

From 3fcd8d54a4c4d8ee425a2dbcee8fd90996ab42ad Mon Sep 17 00:00:00 2001
From: Michael Schlottke-Lakemper <michael@sloede.com>
Date: Fri, 2 Jun 2023 08:10:57 +0200
Subject: [PATCH 043/163] Re-enable MPI tests (#1500)

* Re-enable MPI tests

* Enable p4est 3D tests

* Re-enable all tests

* Revert "Re-enable all tests"

This reverts commit a161c4282eba08f45f65761499f261dc40bd9789.

* Disable 3D p4est tests again :-/

* Update test/test_mpi.jl
---
 test/test_mpi.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_mpi.jl b/test/test_mpi.jl
index 10895665f23..34febf7e268 100644
--- a/test/test_mpi.jl
+++ b/test/test_mpi.jl
@@ -20,8 +20,8 @@ CI_ON_WINDOWS = (get(ENV, "GITHUB_ACTIONS", false) == "true") && Sys.iswindows()
   include("test_mpi_tree.jl")
 
   # P4estMesh tests
+  include("test_mpi_p4est_2d.jl")
   if !CI_ON_WINDOWS # see comment on `CI_ON_WINDOWS` above
-    include("test_mpi_p4est_2d.jl")
     include("test_mpi_p4est_3d.jl")
   end
 end # MPI

From 5676ec0e155de9890fa46d337d744f6adc9e5126 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 6 Jun 2023 05:47:50 +0200
Subject: [PATCH 044/163] Bump crate-ci/typos from 1.14.11 to 1.14.12 (#1509)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.14.11 to 1.14.12.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.14.11...v1.14.12)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/SpellCheck.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index 9e27ce20d06..c4ab3a98557 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v3
       - name: Check spelling
-        uses: crate-ci/typos@v1.14.11
+        uses: crate-ci/typos@v1.14.12

From c47b6f6ae038535d04318c3294ff3f4a4cc41d11 Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Fri, 9 Jun 2023 03:20:59 -0500
Subject: [PATCH 045/163] update to StartUpDG.jl v0.17 (#1515)

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index e8922d26391..273a062322b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -73,7 +73,7 @@ Requires = "1.1"
 SciMLBase = "1.90"
 Setfield = "0.8, 1"
 SimpleUnPack = "1.1"
-StartUpDG = "0.16"
+StartUpDG = "0.17"
 Static = "0.3, 0.4, 0.5, 0.6, 0.7, 0.8"
 StaticArrayInterface = "1.4"
 StaticArrays = "1"

From 5af2de0149749ce0471dedc6d476a8c35c48afc1 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Sat, 10 Jun 2023 08:33:35 +0200
Subject: [PATCH 046/163] fix some problems found using JET.jl (#1519)

* fix typo

* add ! to mul_by

* fix typo
---
 src/auxiliary/precompile.jl             | 2 +-
 src/callbacks_step/analysis_dgmulti.jl  | 8 ++++----
 src/solvers/dgsem_tree/indicators_1d.jl | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/auxiliary/precompile.jl b/src/auxiliary/precompile.jl
index 0695e72efac..3b1cb58e147 100644
--- a/src/auxiliary/precompile.jl
+++ b/src/auxiliary/precompile.jl
@@ -146,7 +146,7 @@ function _precompile_manual_()
 
   function equations_types_1d(RealT)
     ( LinearScalarAdvectionEquation1D{RealT},
-      HyperbolicDiffusionEquation1D{RealT},
+      HyperbolicDiffusionEquations1D{RealT},
       CompressibleEulerEquations1D{RealT},
       IdealGlmMhdEquations1D{RealT},
     )
diff --git a/src/callbacks_step/analysis_dgmulti.jl b/src/callbacks_step/analysis_dgmulti.jl
index 2fbd8eda87a..18640c9379f 100644
--- a/src/callbacks_step/analysis_dgmulti.jl
+++ b/src/callbacks_step/analysis_dgmulti.jl
@@ -155,10 +155,10 @@ function integrate(func::typeof(enstrophy), u,
     gradient_z_quadrature_values = local_gradient_quadrature_values[3][Threads.threadid()]
 
     # interpolate to quadrature on each element
-    apply_to_each_field(mul_by(dg.basis.Vq), u_quadrature_values, view(u, :, e))
-    apply_to_each_field(mul_by(dg.basis.Vq), gradient_x_quadrature_values, view(gradients_x, :, e))
-    apply_to_each_field(mul_by(dg.basis.Vq), gradient_y_quadrature_values, view(gradients_y, :, e))
-    apply_to_each_field(mul_by(dg.basis.Vq), gradient_z_quadrature_values, view(gradients_z, :, e))
+    apply_to_each_field(mul_by!(dg.basis.Vq), u_quadrature_values, view(u, :, e))
+    apply_to_each_field(mul_by!(dg.basis.Vq), gradient_x_quadrature_values, view(gradients_x, :, e))
+    apply_to_each_field(mul_by!(dg.basis.Vq), gradient_y_quadrature_values, view(gradients_y, :, e))
+    apply_to_each_field(mul_by!(dg.basis.Vq), gradient_z_quadrature_values, view(gradients_z, :, e))
 
     # integrate over the element
     for i in eachindex(u_quadrature_values)
diff --git a/src/solvers/dgsem_tree/indicators_1d.jl b/src/solvers/dgsem_tree/indicators_1d.jl
index c1a88161245..7086d77a1a3 100644
--- a/src/solvers/dgsem_tree/indicators_1d.jl
+++ b/src/solvers/dgsem_tree/indicators_1d.jl
@@ -332,7 +332,7 @@ function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkRayHesthaven})(
 
     for direction in eachdirection(mesh.tree)
       if !has_any_neighbor(mesh.tree, cell_id, direction)
-        neighbor_ids[direction] = element_id
+        neighbor_ids[direction] = element
         continue
       end
       if has_neighbor(mesh.tree, cell_id, direction)

From dac5da1ded3d4159bd263007c317c6553cf05038 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Mon, 12 Jun 2023 07:32:45 +0200
Subject: [PATCH 047/163] set version to v0.5.28

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 273a062322b..ea316f9810e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.28-pre"
+version = "0.5.28"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 4357fe9baf88b34531d46eb5bf85b45a4c10955d Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Mon, 12 Jun 2023 07:33:01 +0200
Subject: [PATCH 048/163] set development version to v0.5.29-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index ea316f9810e..9d51e4dcffc 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.28"
+version = "0.5.29-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 2bc1cc68c4fed509a2a3e71d02b84e0be5c5565b Mon Sep 17 00:00:00 2001
From: Michael Schlottke-Lakemper <michael@sloede.com>
Date: Fri, 16 Jun 2023 07:22:10 +0200
Subject: [PATCH 049/163] =?UTF-8?q?Format=20source=20code=20based=20on=20?=
 =?UTF-8?q?=CB=8BSciML=CB=8B=20style=20(4=20spaces)=20(#1513)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Format source code based on ˋSciMLˋ style (4 spaces)

* Make JuliaFormatter consume the two-layer SWE files again

* Remove files from ignore list

* Format two-layer SWE files

* Fix recursive_length formatting

* Find accceptable formatting for SemiHypMeshBCSolver

* Slightly improve semi_hyp_par formatting

* Variant on TrixiODESolution type formatting

* add FormatCheck CI job

* Allow alignment in struct definitions

* Fix formatting abomination

* Remove useless spam

* Sanitize type alias

* Fix using statement

* Update src/auxiliary/mpi.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/callbacks_stage/positivity_zhang_shu.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/callbacks_step/amr_dg2d.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/callbacks_step/analysis.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Format the formatter

* substituted non-ascii lambda (#1521)

* rename format-check job name

* fix bad formatting

* improve some bad formatting

* align some struct fields

* align some struct fields

* align some struct fields

* begin end blocks for some at-trixi_timeit

* Update src/visualization/utilities.jl

* Update src/solvers/fdsbp_tree/fdsbp.jl

* Update src/meshes/p4est_mesh.jl

* Update src/meshes/p4est_mesh.jl

* comments on pssing nothing as mortar

* comment on passing nothing as mortar

* formatting on/off blocks

* Copy JuliaFormatter.toml from TrixiParticles.jl

* Add info on automated source code formatting

* Format `ext/`

* Format `benchmark/`

* Format `utils/`

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
Co-authored-by: Hendrik Ranocha <mail@ranocha.de>
Co-authored-by: Jesse Chan <jesse.lee.chan@gmail.com>
---
 .JuliaFormatter.toml                          |    8 +
 .github/workflows/FormatCheck.yml             |   44 +
 benchmark/benchmark_ec.jl                     |   68 +-
 benchmark/benchmarks.jl                       |    3 +
 benchmark/elixir_2d_euler_vortex_p4est.jl     |   80 +-
 .../elixir_2d_euler_vortex_structured.jl      |   74 +-
 benchmark/elixir_2d_euler_vortex_tree.jl      |   78 +-
 .../elixir_2d_euler_vortex_unstructured.jl    |   78 +-
 ...elixir_3d_euler_source_terms_structured.jl |   13 +-
 .../elixir_3d_euler_source_terms_tree.jl      |   17 +-
 .../benchmark_multiply_dimensionwise.jl       |    4 +
 benchmark/run_benchmarks.jl                   |   21 +-
 docs/src/styleguide.md                        |   52 +-
 ext/TrixiMakieExt.jl                          |  570 ++--
 src/Trixi.jl                                  |  127 +-
 src/auxiliary/auxiliary.jl                    |  288 +-
 src/auxiliary/containers.jl                   |  364 ++-
 src/auxiliary/math.jl                         |   40 +-
 src/auxiliary/mpi.jl                          |   84 +-
 src/auxiliary/p4est.jl                        |  219 +-
 src/auxiliary/precompile.jl                   |  791 ++---
 src/auxiliary/special_elixirs.jl              |  342 +--
 src/basic_types.jl                            |   49 +-
 src/callbacks_stage/callbacks_stage.jl        |    4 +-
 src/callbacks_stage/positivity_zhang_shu.jl   |   48 +-
 .../positivity_zhang_shu_dg1d.jl              |   68 +-
 .../positivity_zhang_shu_dg2d.jl              |   68 +-
 .../positivity_zhang_shu_dg3d.jl              |   68 +-
 src/callbacks_step/alive.jl                   |  126 +-
 src/callbacks_step/amr.jl                     | 1144 ++++----
 src/callbacks_step/amr_dg.jl                  |  138 +-
 src/callbacks_step/amr_dg1d.jl                |  432 +--
 src/callbacks_step/amr_dg2d.jl                |  576 ++--
 src/callbacks_step/amr_dg3d.jl                |  530 ++--
 src/callbacks_step/analysis.jl                |  887 +++---
 src/callbacks_step/analysis_dg1d.jl           |  320 +-
 src/callbacks_step/analysis_dg2d.jl           |  517 ++--
 src/callbacks_step/analysis_dg2d_parallel.jl  |  341 +--
 src/callbacks_step/analysis_dg3d.jl           |  517 ++--
 src/callbacks_step/analysis_dg3d_parallel.jl  |  157 +-
 src/callbacks_step/analysis_dgmulti.jl        |  273 +-
 src/callbacks_step/averaging.jl               |  152 +-
 src/callbacks_step/averaging_dg.jl            |   78 +-
 src/callbacks_step/averaging_dg2d.jl          |  109 +-
 src/callbacks_step/callbacks_step.jl          |   33 +-
 .../euler_acoustics_coupling.jl               |  218 +-
 .../euler_acoustics_coupling_dg2d.jl          |   64 +-
 src/callbacks_step/glm_speed.jl               |   96 +-
 src/callbacks_step/glm_speed_dg.jl            |   47 +-
 src/callbacks_step/lbm_collision.jl           |   60 +-
 src/callbacks_step/lbm_collision_dg2d.jl      |   19 +-
 src/callbacks_step/lbm_collision_dg3d.jl      |   19 +-
 src/callbacks_step/save_restart.jl            |  166 +-
 src/callbacks_step/save_restart_dg.jl         |  545 ++--
 src/callbacks_step/save_solution.jl           |  311 +-
 src/callbacks_step/save_solution_dg.jl        |  431 +--
 src/callbacks_step/steady_state.jl            |   93 +-
 src/callbacks_step/steady_state_dg1d.jl       |   25 +-
 src/callbacks_step/steady_state_dg2d.jl       |   25 +-
 src/callbacks_step/steady_state_dg3d.jl       |   25 +-
 src/callbacks_step/stepsize.jl                |  128 +-
 src/callbacks_step/stepsize_dg1d.jl           |  103 +-
 src/callbacks_step/stepsize_dg2d.jl           |  243 +-
 src/callbacks_step/stepsize_dg3d.jl           |  223 +-
 src/callbacks_step/summary.jl                 |  322 +-
 src/callbacks_step/time_series.jl             |  310 +-
 src/callbacks_step/time_series_dg.jl          |   46 +-
 src/callbacks_step/time_series_dg2d.jl        |  223 +-
 src/callbacks_step/trivial.jl                 |   21 +-
 src/callbacks_step/visualization.jl           |  340 +--
 src/equations/acoustic_perturbation_2d.jl     |  386 +--
 src/equations/compressible_euler_1d.jl        |  996 ++++---
 src/equations/compressible_euler_2d.jl        | 1881 ++++++------
 src/equations/compressible_euler_3d.jl        | 2134 +++++++-------
 .../compressible_euler_multicomponent_1d.jl   |  672 +++--
 .../compressible_euler_multicomponent_2d.jl   |  810 ++---
 .../compressible_navier_stokes_2d.jl          |  434 +--
 .../compressible_navier_stokes_3d.jl          |  505 ++--
 src/equations/equations.jl                    |  173 +-
 src/equations/equations_parabolic.jl          |    6 +-
 src/equations/hyperbolic_diffusion_1d.jl      |  173 +-
 src/equations/hyperbolic_diffusion_2d.jl      |  322 +-
 src/equations/hyperbolic_diffusion_3d.jl      |  308 +-
 src/equations/ideal_glm_mhd_1d.jl             |  704 ++---
 src/equations/ideal_glm_mhd_2d.jl             | 1648 ++++++-----
 src/equations/ideal_glm_mhd_3d.jl             | 1792 ++++++------
 .../ideal_glm_mhd_multicomponent_1d.jl        |  792 ++---
 .../ideal_glm_mhd_multicomponent_2d.jl        | 1035 ++++---
 src/equations/inviscid_burgers_1d.jl          |  130 +-
 src/equations/laplace_diffusion_1d.jl         |   54 +-
 src/equations/laplace_diffusion_2d.jl         |   64 +-
 src/equations/lattice_boltzmann_2d.jl         |  398 ++-
 src/equations/lattice_boltzmann_3d.jl         |  375 +--
 src/equations/linear_scalar_advection_1d.jl   |  150 +-
 src/equations/linear_scalar_advection_2d.jl   |  230 +-
 src/equations/linear_scalar_advection_3d.jl   |  171 +-
 src/equations/linearized_euler_2d.jl          |   50 +-
 src/equations/numerical_fluxes.jl             |  232 +-
 src/equations/shallow_water_1d.jl             |  494 ++--
 src/equations/shallow_water_2d.jl             |  932 +++---
 src/equations/shallow_water_two_layer_1d.jl   |  591 ++--
 src/equations/shallow_water_two_layer_2d.jl   | 1251 ++++----
 src/meshes/abstract_tree.jl                   |  914 +++---
 src/meshes/dgmulti_meshes.jl                  |   43 +-
 src/meshes/face_interpolant.jl                |   64 +-
 src/meshes/mesh_io.jl                         |  743 ++---
 src/meshes/meshes.jl                          |    4 +-
 src/meshes/p4est_mesh.jl                      | 2604 +++++++++--------
 src/meshes/parallel_tree.jl                   |  326 +--
 src/meshes/parallel_tree_mesh.jl              |  156 +-
 src/meshes/serial_tree.jl                     |  301 +-
 src/meshes/structured_mesh.jl                 |  356 +--
 src/meshes/surface_interpolant.jl             |  161 +-
 src/meshes/transfinite_mappings_3d.jl         |  204 +-
 src/meshes/tree_mesh.jl                       |  281 +-
 src/meshes/unstructured_mesh.jl               |  429 +--
 src/semidiscretization/semidiscretization.jl  |  307 +-
 .../semidiscretization_euler_acoustics.jl     |  255 +-
 .../semidiscretization_euler_gravity.jl       |  721 ++---
 .../semidiscretization_hyperbolic.jl          |  419 +--
 ...semidiscretization_hyperbolic_parabolic.jl |  419 +--
 src/solvers/dg.jl                             |  577 ++--
 src/solvers/dgmulti.jl                        |    2 +-
 src/solvers/dgmulti/dg.jl                     |  812 ++---
 src/solvers/dgmulti/dg_parabolic.jl           |  599 ++--
 src/solvers/dgmulti/flux_differencing.jl      |  870 +++---
 .../flux_differencing_compressible_euler.jl   |  309 +-
 .../dgmulti/flux_differencing_gauss_sbp.jl    |  823 +++---
 src/solvers/dgmulti/sbp.jl                    |  879 +++---
 src/solvers/dgmulti/shock_capturing.jl        |  514 ++--
 src/solvers/dgmulti/types.jl                  |  420 +--
 src/solvers/dgsem/basis_lobatto_legendre.jl   |  910 +++---
 src/solvers/dgsem/dgsem.jl                    |   59 +-
 src/solvers/dgsem/interpolation.jl            |  664 +++--
 src/solvers/dgsem/l2projection.jl             |  172 +-
 src/solvers/dgsem_p4est/containers.jl         |  945 +++---
 src/solvers/dgsem_p4est/containers_2d.jl      |  250 +-
 src/solvers/dgsem_p4est/containers_3d.jl      |  514 ++--
 .../dgsem_p4est/containers_parallel.jl        |  818 +++---
 .../dgsem_p4est/containers_parallel_2d.jl     |  118 +-
 .../dgsem_p4est/containers_parallel_3d.jl     |  227 +-
 src/solvers/dgsem_p4est/dg.jl                 |   53 +-
 src/solvers/dgsem_p4est/dg_2d.jl              |  951 +++---
 src/solvers/dgsem_p4est/dg_2d_parabolic.jl    |  752 ++---
 src/solvers/dgsem_p4est/dg_2d_parallel.jl     |  504 ++--
 src/solvers/dgsem_p4est/dg_3d.jl              | 1155 ++++----
 src/solvers/dgsem_p4est/dg_3d_parallel.jl     |  879 +++---
 src/solvers/dgsem_p4est/dg_parallel.jl        |  897 +++---
 src/solvers/dgsem_structured/containers.jl    |   83 +-
 src/solvers/dgsem_structured/containers_1d.jl |   97 +-
 src/solvers/dgsem_structured/containers_2d.jl |  265 +-
 src/solvers/dgsem_structured/containers_3d.jl |  516 ++--
 src/solvers/dgsem_structured/dg.jl            |   86 +-
 src/solvers/dgsem_structured/dg_1d.jl         |  137 +-
 src/solvers/dgsem_structured/dg_2d.jl         |  980 ++++---
 .../dg_2d_compressible_euler.jl               |  907 +++---
 src/solvers/dgsem_structured/dg_3d.jl         | 1244 ++++----
 .../dg_3d_compressible_euler.jl               | 1417 ++++-----
 src/solvers/dgsem_structured/indicators_1d.jl |   28 +-
 src/solvers/dgsem_structured/indicators_2d.jl |   34 +-
 src/solvers/dgsem_structured/indicators_3d.jl |   42 +-
 src/solvers/dgsem_tree/containers.jl          |   83 +-
 src/solvers/dgsem_tree/containers_1d.jl       |  636 ++--
 src/solvers/dgsem_tree/containers_2d.jl       | 1867 ++++++------
 src/solvers/dgsem_tree/containers_3d.jl       | 1156 ++++----
 src/solvers/dgsem_tree/dg.jl                  |   45 +-
 src/solvers/dgsem_tree/dg_1d.jl               |  908 +++---
 src/solvers/dgsem_tree/dg_1d_parabolic.jl     |  799 ++---
 src/solvers/dgsem_tree/dg_2d.jl               | 1716 +++++------
 .../dgsem_tree/dg_2d_compressible_euler.jl    |  796 ++---
 src/solvers/dgsem_tree/dg_2d_parabolic.jl     | 1042 ++++---
 src/solvers/dgsem_tree/dg_2d_parallel.jl      | 1376 ++++-----
 src/solvers/dgsem_tree/dg_3d.jl               | 2095 +++++++------
 .../dgsem_tree/dg_3d_compressible_euler.jl    | 1137 +++----
 src/solvers/dgsem_tree/dg_3d_parabolic.jl     | 1216 ++++----
 src/solvers/dgsem_tree/dg_parallel.jl         |   39 +-
 src/solvers/dgsem_tree/indicators.jl          |  462 +--
 src/solvers/dgsem_tree/indicators_1d.jl       |  601 ++--
 src/solvers/dgsem_tree/indicators_2d.jl       |  873 +++---
 src/solvers/dgsem_tree/indicators_3d.jl       |  380 +--
 .../dgsem_unstructured/containers_2d.jl       |  492 ++--
 src/solvers/dgsem_unstructured/dg.jl          |   21 +-
 src/solvers/dgsem_unstructured/dg_2d.jl       |  735 ++---
 .../dgsem_unstructured/indicators_2d.jl       |   28 +-
 .../mappings_geometry_curved_2d.jl            |  243 +-
 .../mappings_geometry_straight_2d.jl          |  156 +-
 .../sort_boundary_conditions.jl               |  128 +-
 src/solvers/fdsbp_tree/fdsbp.jl               |   17 +-
 src/solvers/fdsbp_tree/fdsbp_1d.jl            |  431 ++-
 src/solvers/fdsbp_tree/fdsbp_2d.jl            |  519 ++--
 src/solvers/fdsbp_tree/fdsbp_3d.jl            |  616 ++--
 src/solvers/solvers.jl                        |    2 +-
 src/solvers/solvers_parabolic.jl              |    4 +-
 src/time_integration/methods_2N.jl            |  266 +-
 src/time_integration/methods_3Sstar.jl        |  384 +--
 src/time_integration/time_integration.jl      |   10 +-
 src/visualization/recipes_plots.jl            |  389 +--
 src/visualization/types.jl                    |  804 ++---
 src/visualization/utilities.jl                | 2310 ++++++++-------
 src/visualization/visualization.jl            |    2 +-
 utils/build_sysimage.jl                       |   29 +-
 utils/euler-manufactured.jl                   |  107 +-
 utils/julia-format.jl                         |   38 -
 utils/precompile_execution_file.jl            |    1 +
 utils/trixi-format.jl                         |   30 +
 utils/trixi2tec.jl                            |  103 +-
 utils/trixi2txt.jl                            |  539 ++--
 207 files changed, 46607 insertions(+), 43236 deletions(-)
 create mode 100644 .JuliaFormatter.toml
 create mode 100644 .github/workflows/FormatCheck.yml
 delete mode 100755 utils/julia-format.jl
 create mode 100755 utils/trixi-format.jl

diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
new file mode 100644
index 00000000000..8518d202812
--- /dev/null
+++ b/.JuliaFormatter.toml
@@ -0,0 +1,8 @@
+# Use SciML style: https://github.com/SciML/SciMLStyle
+style = "sciml"
+
+# Python style alignment. See https://github.com/domluna/JuliaFormatter.jl/pull/732.
+yas_style_nesting = true
+
+# Align struct fields for better readability of large struct definitions
+align_struct_field = true
diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml
new file mode 100644
index 00000000000..628d938dd76
--- /dev/null
+++ b/.github/workflows/FormatCheck.yml
@@ -0,0 +1,44 @@
+name: format-check
+
+on:
+  push:
+    branches:
+      - 'main'
+    tags: '*'
+  pull_request:
+
+jobs:
+  check-format:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        julia-version: [1]
+        julia-arch: [x86]
+        os: [ubuntu-latest]
+    steps:
+      - uses: julia-actions/setup-julia@latest
+        with:
+          version: ${{ matrix.julia-version }}
+
+      - uses: actions/checkout@v3
+      - name: Install JuliaFormatter and format
+        # This will use the latest version by default but you can set the version like so:
+        #
+        # julia  -e 'using Pkg; Pkg.add(PackageSpec(name = "JuliaFormatter", version = "0.13.0"))'
+        #
+        # TODO: Change the call below to
+        #       format(".")
+        run: |
+          julia  -e 'using Pkg; Pkg.add(PackageSpec(name = "JuliaFormatter"))'
+          julia  -e 'using JuliaFormatter; format(["benchmark", "ext", "src", "utils"])'
+      - name: Format check
+        run: |
+          julia -e '
+          out = Cmd(`git diff --name-only`) |> read |> String
+          if out == ""
+              exit(0)
+          else
+              @error "Some files have not been formatted !!!"
+              write(stdout, out)
+              exit(1)
+          end'
diff --git a/benchmark/benchmark_ec.jl b/benchmark/benchmark_ec.jl
index f714d933b77..5bd20b41111 100644
--- a/benchmark/benchmark_ec.jl
+++ b/benchmark/benchmark_ec.jl
@@ -1,50 +1,50 @@
 using Printf, BenchmarkTools, Trixi
 
-function run_benchmarks(benchmark_run; levels=0:5, polydeg=3)
-  runtimes = zeros(length(levels))
-  for (idx,initial_refinement_level) in enumerate(levels)
-    result = benchmark_run(; initial_refinement_level, polydeg)
-    display(result)
-    runtimes[idx] = result |> median |> time # in nanoseconds
-  end
-  return (; levels, runtimes, polydeg)
+function run_benchmarks(benchmark_run; levels = 0:5, polydeg = 3)
+    runtimes = zeros(length(levels))
+    for (idx, initial_refinement_level) in enumerate(levels)
+        result = benchmark_run(; initial_refinement_level, polydeg)
+        display(result)
+        runtimes[idx] = result |> median |> time # in nanoseconds
+    end
+    return (; levels, runtimes, polydeg)
 end
 
 function tabulate_benchmarks(args...; kwargs...)
-  result = run_benchmarks(args...; kwargs...)
-  println("#Elements | Runtime in seconds")
-  for (level,runtime) in zip(result.levels, result.runtimes)
-    @printf("%9d | %.2e\n", 4^level, 1.0e-9 * runtime)
-  end
-  for (level,runtime) in zip(result.levels, result.runtimes)
-    @printf("%.16e\n", 1.0e-9 * runtime)
-  end
+    result = run_benchmarks(args...; kwargs...)
+    println("#Elements | Runtime in seconds")
+    for (level, runtime) in zip(result.levels, result.runtimes)
+        @printf("%9d | %.2e\n", 4^level, 1.0e-9*runtime)
+    end
+    for (level, runtime) in zip(result.levels, result.runtimes)
+        @printf("%.16e\n", 1.0e-9*runtime)
+    end
 end
 
-function benchmark_euler(; initial_refinement_level=1, polydeg=3)
+function benchmark_euler(; initial_refinement_level = 1, polydeg = 3)
+    γ = 1.4
+    equations = CompressibleEulerEquations2D(γ)
 
-  γ = 1.4
-  equations = CompressibleEulerEquations2D(γ)
+    surface_flux = flux_ranocha
+    volume_flux = flux_ranocha
+    solver = DGSEM(polydeg, surface_flux, VolumeIntegralFluxDifferencing(volume_flux))
 
-  surface_flux = flux_ranocha
-  volume_flux  = flux_ranocha
-  solver = DGSEM(polydeg, surface_flux, VolumeIntegralFluxDifferencing(volume_flux))
+    coordinates_min = (-2.0, -2.0)
+    coordinates_max = (2.0, 2.0)
+    mesh = TreeMesh(coordinates_min, coordinates_max,
+                    initial_refinement_level = initial_refinement_level,
+                    n_cells_max = 100_000)
 
-  coordinates_min = (-2.0, -2.0)
-  coordinates_max = ( 2.0,  2.0)
-  mesh = TreeMesh(coordinates_min, coordinates_max,
-                  initial_refinement_level=initial_refinement_level,
-                  n_cells_max=100_000)
+    semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_weak_blast_wave,
+                                        solver)
 
-  semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_weak_blast_wave, solver)
+    t0 = 0.0
+    u0 = compute_coefficients(t0, semi)
+    du = similar(u0)
 
-  t0 = 0.0
-  u0 = compute_coefficients(t0, semi)
-  du = similar(u0)
-
-  @benchmark Trixi.rhs!($du, $u0, $semi, $t0)
+    @benchmark Trixi.rhs!($du, $u0, $semi, $t0)
 end
 
 # versioninfo(verbose=true)
 @show Threads.nthreads()
-tabulate_benchmarks(benchmark_euler, levels=0:8)
+tabulate_benchmarks(benchmark_euler, levels = 0:8)
diff --git a/benchmark/benchmarks.jl b/benchmark/benchmarks.jl
index edaeed63577..a3f7d1d2569 100644
--- a/benchmark/benchmarks.jl
+++ b/benchmark/benchmarks.jl
@@ -1,3 +1,6 @@
+# Disable formatting this file since it contains highly unusual formatting for better
+# readability
+#! format: off
 
 using BenchmarkTools
 using Trixi
diff --git a/benchmark/elixir_2d_euler_vortex_p4est.jl b/benchmark/elixir_2d_euler_vortex_p4est.jl
index 6c151842ebb..3ee97cc752f 100644
--- a/benchmark/elixir_2d_euler_vortex_p4est.jl
+++ b/benchmark/elixir_2d_euler_vortex_p4est.jl
@@ -17,43 +17,42 @@ The classical isentropic vortex test case of
   [NASA/CR-97-206253](https://ntrs.nasa.gov/citations/19980007543)
 """
 function initial_condition_isentropic_vortex(x, t, equations::CompressibleEulerEquations2D)
-  # needs appropriate mesh size, e.g. [-10,-10]x[10,10]
-  # make sure that the inicenter does not exit the domain, e.g. T=10.0
-  # initial center of the vortex
-  inicenter = SVector(0.0, 0.0)
-  # size and strength of the vortex
-  iniamplitude = 0.2
-  # base flow
-  rho = 1.0
-  v1 = 1.0
-  v2 = 1.0
-  vel = SVector(v1, v2)
-  p = 10.0
-  rt = p / rho                  # ideal gas equation
-  cent = inicenter + vel*t      # advection of center
-  cent = x - cent               # distance to centerpoint
-  #cent=cross(iniaxis,cent)     # distance to axis, tangent vector, length r
-  # cross product with iniaxis = [0,0,1]
-  cent = SVector(-cent[2], cent[1])
-  r2 = cent[1]^2 + cent[2]^2
-  du = iniamplitude/(2*π)*exp(0.5*(1-r2)) # vel. perturbation
-  dtemp = -(equations.gamma-1)/(2*equations.gamma*rt)*du^2            # isentrop
-  rho = rho * (1+dtemp)^(1\(equations.gamma-1))
-  vel = vel + du*cent
-  v1, v2 = vel
-  p = p * (1+dtemp)^(equations.gamma/(equations.gamma-1))
-  prim = SVector(rho, v1, v2, p)
-  return prim2cons(prim, equations)
+    # needs appropriate mesh size, e.g. [-10,-10]x[10,10]
+    # make sure that the inicenter does not exit the domain, e.g. T=10.0
+    # initial center of the vortex
+    inicenter = SVector(0.0, 0.0)
+    # size and strength of the vortex
+    iniamplitude = 0.2
+    # base flow
+    rho = 1.0
+    v1 = 1.0
+    v2 = 1.0
+    vel = SVector(v1, v2)
+    p = 10.0
+    rt = p / rho                  # ideal gas equation
+    cent = inicenter + vel * t      # advection of center
+    cent = x - cent               # distance to centerpoint
+    #cent=cross(iniaxis,cent)     # distance to axis, tangent vector, length r
+    # cross product with iniaxis = [0,0,1]
+    cent = SVector(-cent[2], cent[1])
+    r2 = cent[1]^2 + cent[2]^2
+    du = iniamplitude / (2 * π) * exp(0.5 * (1 - r2)) # vel. perturbation
+    dtemp = -(equations.gamma - 1) / (2 * equations.gamma * rt) * du^2            # isentrop
+    rho = rho * (1 + dtemp)^(1 \ (equations.gamma - 1))
+    vel = vel + du * cent
+    v1, v2 = vel
+    p = p * (1 + dtemp)^(equations.gamma / (equations.gamma - 1))
+    prim = SVector(rho, v1, v2, p)
+    return prim2cons(prim, equations)
 end
 initial_condition = initial_condition_isentropic_vortex
-solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs)
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = (-10.0, -10.0)
-coordinates_max = ( 10.0,  10.0)
-mesh = P4estMesh((1, 1), polydeg=Trixi.polydeg(solver),
-                 coordinates_min=coordinates_min, coordinates_max=coordinates_max,
-                 initial_refinement_level=4)
-
+coordinates_max = (10.0, 10.0)
+mesh = P4estMesh((1, 1), polydeg = Trixi.polydeg(solver),
+                 coordinates_min = coordinates_min, coordinates_max = coordinates_max,
+                 initial_refinement_level = 4)
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
 
@@ -66,19 +65,20 @@ ode = semidiscretize(semi, tspan)
 summary_callback = SummaryCallback()
 
 analysis_interval = 100
-analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=true,
-                                     extra_analysis_errors=(:conservation_error,),
-                                     extra_analysis_integrals=(entropy, energy_total,
-                                                               energy_kinetic, energy_internal))
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
+                                     save_analysis = true,
+                                     extra_analysis_errors = (:conservation_error,),
+                                     extra_analysis_integrals = (entropy, energy_total,
+                                                                 energy_kinetic,
+                                                                 energy_internal))
 
-alive_callback = AliveCallback(analysis_interval=analysis_interval)
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
 callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback)
 
-
 ###############################################################################
 # run the simulation
 
 sol = solve(ode, BS3(),
-            save_everystep=false, callback=callbacks);
+            save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
diff --git a/benchmark/elixir_2d_euler_vortex_structured.jl b/benchmark/elixir_2d_euler_vortex_structured.jl
index 344ac2d20c8..5627049c9e2 100644
--- a/benchmark/elixir_2d_euler_vortex_structured.jl
+++ b/benchmark/elixir_2d_euler_vortex_structured.jl
@@ -17,43 +17,42 @@ The classical isentropic vortex test case of
   [NASA/CR-97-206253](https://ntrs.nasa.gov/citations/19980007543)
 """
 function initial_condition_isentropic_vortex(x, t, equations::CompressibleEulerEquations2D)
-  # needs appropriate mesh size, e.g. [-10,-10]x[10,10]
-  # make sure that the inicenter does not exit the domain, e.g. T=10.0
-  # initial center of the vortex
-  inicenter = SVector(0.0, 0.0)
-  # size and strength of the vortex
-  iniamplitude = 0.2
-  # base flow
-  rho = 1.0
-  v1 = 1.0
-  v2 = 1.0
-  vel = SVector(v1, v2)
-  p = 10.0
-  rt = p / rho                  # ideal gas equation
-  cent = inicenter + vel*t      # advection of center
-  cent = x - cent               # distance to centerpoint
-  #cent=cross(iniaxis,cent)     # distance to axis, tangent vector, length r
-  # cross product with iniaxis = [0,0,1]
-  cent = SVector(-cent[2], cent[1])
-  r2 = cent[1]^2 + cent[2]^2
-  du = iniamplitude/(2*π)*exp(0.5*(1-r2)) # vel. perturbation
-  dtemp = -(equations.gamma-1)/(2*equations.gamma*rt)*du^2            # isentrop
-  rho = rho * (1+dtemp)^(1\(equations.gamma-1))
-  vel = vel + du*cent
-  v1, v2 = vel
-  p = p * (1+dtemp)^(equations.gamma/(equations.gamma-1))
-  prim = SVector(rho, v1, v2, p)
-  return prim2cons(prim, equations)
+    # needs appropriate mesh size, e.g. [-10,-10]x[10,10]
+    # make sure that the inicenter does not exit the domain, e.g. T=10.0
+    # initial center of the vortex
+    inicenter = SVector(0.0, 0.0)
+    # size and strength of the vortex
+    iniamplitude = 0.2
+    # base flow
+    rho = 1.0
+    v1 = 1.0
+    v2 = 1.0
+    vel = SVector(v1, v2)
+    p = 10.0
+    rt = p / rho                  # ideal gas equation
+    cent = inicenter + vel * t      # advection of center
+    cent = x - cent               # distance to centerpoint
+    #cent=cross(iniaxis,cent)     # distance to axis, tangent vector, length r
+    # cross product with iniaxis = [0,0,1]
+    cent = SVector(-cent[2], cent[1])
+    r2 = cent[1]^2 + cent[2]^2
+    du = iniamplitude / (2 * π) * exp(0.5 * (1 - r2)) # vel. perturbation
+    dtemp = -(equations.gamma - 1) / (2 * equations.gamma * rt) * du^2            # isentrop
+    rho = rho * (1 + dtemp)^(1 \ (equations.gamma - 1))
+    vel = vel + du * cent
+    v1, v2 = vel
+    p = p * (1 + dtemp)^(equations.gamma / (equations.gamma - 1))
+    prim = SVector(rho, v1, v2, p)
+    return prim2cons(prim, equations)
 end
 initial_condition = initial_condition_isentropic_vortex
-solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs)
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = (-10.0, -10.0)
-coordinates_max = ( 10.0,  10.0)
+coordinates_max = (10.0, 10.0)
 cells_per_dimension = (16, 16)
 mesh = StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max)
 
-
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
 
 ###############################################################################
@@ -65,19 +64,20 @@ ode = semidiscretize(semi, tspan)
 summary_callback = SummaryCallback()
 
 analysis_interval = 100
-analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=true,
-                                     extra_analysis_errors=(:conservation_error,),
-                                     extra_analysis_integrals=(entropy, energy_total,
-                                                               energy_kinetic, energy_internal))
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
+                                     save_analysis = true,
+                                     extra_analysis_errors = (:conservation_error,),
+                                     extra_analysis_integrals = (entropy, energy_total,
+                                                                 energy_kinetic,
+                                                                 energy_internal))
 
-alive_callback = AliveCallback(analysis_interval=analysis_interval)
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
 callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback)
 
-
 ###############################################################################
 # run the simulation
 
 sol = solve(ode, BS3(),
-            save_everystep=false, callback=callbacks);
+            save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
diff --git a/benchmark/elixir_2d_euler_vortex_tree.jl b/benchmark/elixir_2d_euler_vortex_tree.jl
index b3873b4e01b..68e207c5344 100644
--- a/benchmark/elixir_2d_euler_vortex_tree.jl
+++ b/benchmark/elixir_2d_euler_vortex_tree.jl
@@ -17,43 +17,42 @@ The classical isentropic vortex test case of
   [NASA/CR-97-206253](https://ntrs.nasa.gov/citations/19980007543)
 """
 function initial_condition_isentropic_vortex(x, t, equations::CompressibleEulerEquations2D)
-  # needs appropriate mesh size, e.g. [-10,-10]x[10,10]
-  # make sure that the inicenter does not exit the domain, e.g. T=10.0
-  # initial center of the vortex
-  inicenter = SVector(0.0, 0.0)
-  # size and strength of the vortex
-  iniamplitude = 0.2
-  # base flow
-  rho = 1.0
-  v1 = 1.0
-  v2 = 1.0
-  vel = SVector(v1, v2)
-  p = 10.0
-  rt = p / rho                  # ideal gas equation
-  cent = inicenter + vel*t      # advection of center
-  cent = x - cent               # distance to centerpoint
-  #cent=cross(iniaxis,cent)     # distance to axis, tangent vector, length r
-  # cross product with iniaxis = [0,0,1]
-  cent = SVector(-cent[2], cent[1])
-  r2 = cent[1]^2 + cent[2]^2
-  du = iniamplitude/(2*π)*exp(0.5*(1-r2)) # vel. perturbation
-  dtemp = -(equations.gamma-1)/(2*equations.gamma*rt)*du^2            # isentrop
-  rho = rho * (1+dtemp)^(1\(equations.gamma-1))
-  vel = vel + du*cent
-  v1, v2 = vel
-  p = p * (1+dtemp)^(equations.gamma/(equations.gamma-1))
-  prim = SVector(rho, v1, v2, p)
-  return prim2cons(prim, equations)
+    # needs appropriate mesh size, e.g. [-10,-10]x[10,10]
+    # make sure that the inicenter does not exit the domain, e.g. T=10.0
+    # initial center of the vortex
+    inicenter = SVector(0.0, 0.0)
+    # size and strength of the vortex
+    iniamplitude = 0.2
+    # base flow
+    rho = 1.0
+    v1 = 1.0
+    v2 = 1.0
+    vel = SVector(v1, v2)
+    p = 10.0
+    rt = p / rho                  # ideal gas equation
+    cent = inicenter + vel * t      # advection of center
+    cent = x - cent               # distance to centerpoint
+    #cent=cross(iniaxis,cent)     # distance to axis, tangent vector, length r
+    # cross product with iniaxis = [0,0,1]
+    cent = SVector(-cent[2], cent[1])
+    r2 = cent[1]^2 + cent[2]^2
+    du = iniamplitude / (2 * π) * exp(0.5 * (1 - r2)) # vel. perturbation
+    dtemp = -(equations.gamma - 1) / (2 * equations.gamma * rt) * du^2            # isentrop
+    rho = rho * (1 + dtemp)^(1 \ (equations.gamma - 1))
+    vel = vel + du * cent
+    v1, v2 = vel
+    p = p * (1 + dtemp)^(equations.gamma / (equations.gamma - 1))
+    prim = SVector(rho, v1, v2, p)
+    return prim2cons(prim, equations)
 end
 initial_condition = initial_condition_isentropic_vortex
-solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs)
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = (-10.0, -10.0)
-coordinates_max = ( 10.0,  10.0)
+coordinates_max = (10.0, 10.0)
 mesh = TreeMesh(coordinates_min, coordinates_max,
-                initial_refinement_level=4,
-                n_cells_max=10_000)
-
+                initial_refinement_level = 4,
+                n_cells_max = 10_000)
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
 
@@ -66,19 +65,20 @@ ode = semidiscretize(semi, tspan)
 summary_callback = SummaryCallback()
 
 analysis_interval = 100
-analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=true,
-                                     extra_analysis_errors=(:conservation_error,),
-                                     extra_analysis_integrals=(entropy, energy_total,
-                                                               energy_kinetic, energy_internal))
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
+                                     save_analysis = true,
+                                     extra_analysis_errors = (:conservation_error,),
+                                     extra_analysis_integrals = (entropy, energy_total,
+                                                                 energy_kinetic,
+                                                                 energy_internal))
 
-alive_callback = AliveCallback(analysis_interval=analysis_interval)
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
 callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback)
 
-
 ###############################################################################
 # run the simulation
 
 sol = solve(ode, BS3(),
-            save_everystep=false, callback=callbacks);
+            save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
diff --git a/benchmark/elixir_2d_euler_vortex_unstructured.jl b/benchmark/elixir_2d_euler_vortex_unstructured.jl
index bd0ab4a8040..082b6648abf 100644
--- a/benchmark/elixir_2d_euler_vortex_unstructured.jl
+++ b/benchmark/elixir_2d_euler_vortex_unstructured.jl
@@ -18,42 +18,43 @@ The classical isentropic vortex test case of
   [NASA/CR-97-206253](https://ntrs.nasa.gov/citations/19980007543)
 """
 function initial_condition_isentropic_vortex(x, t, equations::CompressibleEulerEquations2D)
-  # needs appropriate mesh size, e.g. [-10,-10]x[10,10]
-  # make sure that the inicenter does not exit the domain, e.g. T=10.0
-  # initial center of the vortex
-  inicenter = SVector(0.0, 0.0)
-  # size and strength of the vortex
-  iniamplitude = 0.2
-  # base flow
-  rho = 1.0
-  v1 = 1.0
-  v2 = 1.0
-  vel = SVector(v1, v2)
-  p = 10.0
-  rt = p / rho                  # ideal gas equation
-  cent = inicenter + vel*t      # advection of center
-  cent = x - cent               # distance to centerpoint
-  #cent=cross(iniaxis,cent)     # distance to axis, tangent vector, length r
-  # cross product with iniaxis = [0,0,1]
-  cent = SVector(-cent[2], cent[1])
-  r2 = cent[1]^2 + cent[2]^2
-  du = iniamplitude/(2*π)*exp(0.5*(1-r2)) # vel. perturbation
-  dtemp = -(equations.gamma-1)/(2*equations.gamma*rt)*du^2            # isentrop
-  rho = rho * (1+dtemp)^(1\(equations.gamma-1))
-  vel = vel + du*cent
-  v1, v2 = vel
-  p = p * (1+dtemp)^(equations.gamma/(equations.gamma-1))
-  prim = SVector(rho, v1, v2, p)
-  return prim2cons(prim, equations)
+    # needs appropriate mesh size, e.g. [-10,-10]x[10,10]
+    # make sure that the inicenter does not exit the domain, e.g. T=10.0
+    # initial center of the vortex
+    inicenter = SVector(0.0, 0.0)
+    # size and strength of the vortex
+    iniamplitude = 0.2
+    # base flow
+    rho = 1.0
+    v1 = 1.0
+    v2 = 1.0
+    vel = SVector(v1, v2)
+    p = 10.0
+    rt = p / rho                  # ideal gas equation
+    cent = inicenter + vel * t      # advection of center
+    cent = x - cent               # distance to centerpoint
+    #cent=cross(iniaxis,cent)     # distance to axis, tangent vector, length r
+    # cross product with iniaxis = [0,0,1]
+    cent = SVector(-cent[2], cent[1])
+    r2 = cent[1]^2 + cent[2]^2
+    du = iniamplitude / (2 * π) * exp(0.5 * (1 - r2)) # vel. perturbation
+    dtemp = -(equations.gamma - 1) / (2 * equations.gamma * rt) * du^2            # isentrop
+    rho = rho * (1 + dtemp)^(1 \ (equations.gamma - 1))
+    vel = vel + du * cent
+    v1, v2 = vel
+    p = p * (1 + dtemp)^(equations.gamma / (equations.gamma - 1))
+    prim = SVector(rho, v1, v2, p)
+    return prim2cons(prim, equations)
 end
 initial_condition = initial_condition_isentropic_vortex
-solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs)
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 default_mesh_file = joinpath(@__DIR__, "mesh_uniform_cartesian.mesh")
-isfile(default_mesh_file) || download("https://gist.githubusercontent.com/ranocha/f4ea19ba3b62348968c971db43d7798b/raw/a506abb9479c020920cf6068c142670fc1a9aadc/mesh_uniform_cartesian.mesh", default_mesh_file)
+isfile(default_mesh_file) ||
+    download("https://gist.githubusercontent.com/ranocha/f4ea19ba3b62348968c971db43d7798b/raw/a506abb9479c020920cf6068c142670fc1a9aadc/mesh_uniform_cartesian.mesh",
+             default_mesh_file)
 mesh_file = default_mesh_file
-mesh = UnstructuredMesh2D(mesh_file, periodicity=true)
-
+mesh = UnstructuredMesh2D(mesh_file, periodicity = true)
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
 
@@ -66,19 +67,20 @@ ode = semidiscretize(semi, tspan)
 summary_callback = SummaryCallback()
 
 analysis_interval = 100
-analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=true,
-                                     extra_analysis_errors=(:conservation_error,),
-                                     extra_analysis_integrals=(entropy, energy_total,
-                                                               energy_kinetic, energy_internal))
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
+                                     save_analysis = true,
+                                     extra_analysis_errors = (:conservation_error,),
+                                     extra_analysis_integrals = (entropy, energy_total,
+                                                                 energy_kinetic,
+                                                                 energy_internal))
 
-alive_callback = AliveCallback(analysis_interval=analysis_interval)
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
 callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback)
 
-
 ###############################################################################
 # run the simulation
 
 sol = solve(ode, BS3(),
-            save_everystep=false, callback=callbacks);
+            save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
diff --git a/benchmark/elixir_3d_euler_source_terms_structured.jl b/benchmark/elixir_3d_euler_source_terms_structured.jl
index 9a284653f6f..b44eb0caa7c 100644
--- a/benchmark/elixir_3d_euler_source_terms_structured.jl
+++ b/benchmark/elixir_3d_euler_source_terms_structured.jl
@@ -9,17 +9,15 @@ equations = CompressibleEulerEquations3D(1.4)
 
 initial_condition = initial_condition_convergence_test
 
-solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs)
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = (0.0, 0.0, 0.0)
 coordinates_max = (2.0, 2.0, 2.0)
 cells_per_dimension = (4, 4, 4)
 mesh = StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max)
 
-
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
-                                    source_terms=source_terms_convergence_test)
-
+                                    source_terms = source_terms_convergence_test)
 
 ###############################################################################
 # ODE solvers, callbacks etc.
@@ -30,16 +28,15 @@ ode = semidiscretize(semi, tspan)
 summary_callback = SummaryCallback()
 
 analysis_interval = 100
-analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
-alive_callback = AliveCallback(analysis_interval=analysis_interval)
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
 callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback)
 
-
 ###############################################################################
 # run the simulation
 
 sol = solve(ode, BS3(),
-            save_everystep=false, callback=callbacks);
+            save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
diff --git a/benchmark/elixir_3d_euler_source_terms_tree.jl b/benchmark/elixir_3d_euler_source_terms_tree.jl
index 6a182da91e6..369b9359580 100644
--- a/benchmark/elixir_3d_euler_source_terms_tree.jl
+++ b/benchmark/elixir_3d_euler_source_terms_tree.jl
@@ -9,18 +9,16 @@ equations = CompressibleEulerEquations3D(1.4)
 
 initial_condition = initial_condition_convergence_test
 
-solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs)
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
 
 coordinates_min = (0.0, 0.0, 0.0)
 coordinates_max = (2.0, 2.0, 2.0)
 mesh = TreeMesh(coordinates_min, coordinates_max,
-                initial_refinement_level=2,
-                n_cells_max=10_000)
-
+                initial_refinement_level = 2,
+                n_cells_max = 10_000)
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
-                                    source_terms=source_terms_convergence_test)
-
+                                    source_terms = source_terms_convergence_test)
 
 ###############################################################################
 # ODE solvers, callbacks etc.
@@ -31,16 +29,15 @@ ode = semidiscretize(semi, tspan)
 summary_callback = SummaryCallback()
 
 analysis_interval = 100
-analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
 
-alive_callback = AliveCallback(analysis_interval=analysis_interval)
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
 
 callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback)
 
-
 ###############################################################################
 # run the simulation
 
 sol = solve(ode, BS3(),
-            save_everystep=false, callback=callbacks);
+            save_everystep = false, callback = callbacks);
 summary_callback() # print the timer summary
diff --git a/benchmark/multiply_dimensionwise/benchmark_multiply_dimensionwise.jl b/benchmark/multiply_dimensionwise/benchmark_multiply_dimensionwise.jl
index e270941cbe9..e6dd0d47448 100644
--- a/benchmark/multiply_dimensionwise/benchmark_multiply_dimensionwise.jl
+++ b/benchmark/multiply_dimensionwise/benchmark_multiply_dimensionwise.jl
@@ -1,3 +1,7 @@
+# Disable formatting this file since it contains highly unusual formatting for better
+# readability
+#! format: off
+
 import Pkg; Pkg.activate(@__DIR__); Pkg.instantiate()
 
 using BenchmarkTools
diff --git a/benchmark/run_benchmarks.jl b/benchmark/run_benchmarks.jl
index ea7aae3c8d4..3a92a9ba700 100644
--- a/benchmark/run_benchmarks.jl
+++ b/benchmark/run_benchmarks.jl
@@ -3,18 +3,17 @@ using PkgBenchmark
 using Trixi
 
 let results = judge(Trixi,
-            BenchmarkConfig(juliacmd=`$(Base.julia_cmd()) --check-bounds=no --threads=1`), # target
-            BenchmarkConfig(juliacmd=`$(Base.julia_cmd()) --check-bounds=no --threads=1`, id="main") # baseline
-        )
-
-    export_markdown(pkgdir(Trixi, "benchmark", "results_$(gethostname())_threads1.md"), results)
+                    BenchmarkConfig(juliacmd = `$(Base.julia_cmd()) --check-bounds=no --threads=1`), # target
+                    BenchmarkConfig(juliacmd = `$(Base.julia_cmd()) --check-bounds=no --threads=1`,
+                                    id = "main"))
+    export_markdown(pkgdir(Trixi, "benchmark", "results_$(gethostname())_threads1.md"),
+                    results)
 end
 
-
 let results = judge(Trixi,
-            BenchmarkConfig(juliacmd=`$(Base.julia_cmd()) --check-bounds=no --threads=2`), # target
-            BenchmarkConfig(juliacmd=`$(Base.julia_cmd()) --check-bounds=no --threads=2`, id="main") # baseline
-        )
-
-    export_markdown(pkgdir(Trixi, "benchmark", "results_$(gethostname())_threads2.md"), results)
+                    BenchmarkConfig(juliacmd = `$(Base.julia_cmd()) --check-bounds=no --threads=2`), # target
+                    BenchmarkConfig(juliacmd = `$(Base.julia_cmd()) --check-bounds=no --threads=2`,
+                                    id = "main"))
+    export_markdown(pkgdir(Trixi, "benchmark", "results_$(gethostname())_threads2.md"),
+                    results)
 end
diff --git a/docs/src/styleguide.md b/docs/src/styleguide.md
index a6fc9585ec4..de367c086cc 100644
--- a/docs/src/styleguide.md
+++ b/docs/src/styleguide.md
@@ -1,10 +1,20 @@
 # Style guide
-The following lists a few coding conventions for Trixi.jl:
+Coding style is an inherently personal - and thus hotly contested - issue. Since code is
+usually "written once, read often", it helps regular developers, new users, and reviewers if
+code is formatted consistently. We therefore believe in the merit of using a common coding
+style throughout Trixi.jl, even at the expense that not everyone can be happy with every
+detailed style decision. If you came here because you are furious about our code formatting
+rules, here is a happy little whale for you to calm you down: 🐳
+
+## Conventions
+The following lists a few coding conventions for Trixi.jl. Note that in addition to these
+conventions, we apply and enforce automated source code formatting
+(see [below](@ref automated-source-code-formatting) for more details):
 
   * Modules, types, structs with `CamelCase`.
   * Functions, variables with lowercase `snake_case`.
-  * Indentation with 2 spaces (*never* tabs!), line continuations indented with 4 spaces.
-  * Maximum line length (strictly): **100**.
+  * Indentation with 4 spaces (*never* tabs!)
+  * Maximum line length (strictly): **92**.
   * Functions that mutate their *input* are named with a trailing `!`.
   * Functions order their parameters [similar to Julia Base](https://docs.julialang.org/en/v1/manual/style-guide/#Write-functions-with-argument-ordering-similar-to-Julia-Base-1).
     * The main modified argument comes first. For example, if the right-hand side `du` is modified, 
@@ -24,12 +34,34 @@ The following lists a few coding conventions for Trixi.jl:
     instead of `central_flux`. This helps when searching for available functions on the REPL
     (e.g., when trying to find all flux functions).
 
-Based on that, and personal experience, a formatting tool with a few helpful
-options is included in `utils/julia-format.jl`. Note, however, that this tool is
-not yet optimal, as it re-indents too greedily.
+## [Automated source code formatting](@id automated-source-code-formatting)
+We use [JuliaFormatter.jl](https://github.com/domluna/JuliaFormatter.jl) to format the
+source code of Trixi.jl, which will also enforce *some* of the [Conventions](@ref) listed
+above (e.g., line length or indentation with 4 spaces are automatically handled, while
+capitalization of names is not). Our format is mostly based on the
+[SciML](https://domluna.github.io/JuliaFormatter.jl/stable/sciml_style/)-style formatting
+rules. For more details you can have a look at the current
+[`.JuliaFormatter.toml`](https://github.com/trixi-framework/Trixi.jl/blob/main/.JuliaFormatter.toml)
+file that holds the configuration options we use for JuliaFormatter.jl.
 
-This is a list of handy style guides that are mostly consistent with each
-other and this guide, and which have been used as a basis:
+Note that we expect all contributions to Trixi.jl to be formatted with JuliaFormatter.jl
+before being merged to the `main` branch. We ensure this by running a automated check on all
+PRs that verify that running JuliaFormatter.jl again will not change the source code.
 
-  * [https://www.juliaopt.org/JuMP.jl/stable/style/](https://www.juliaopt.org/JuMP.jl/stable/style/)
-  * [https://github.com/jrevels/YASGuide](https://github.com/jrevels/YASGuide)
+To format your contributions before created a PR (or, at least, before requesting a review
+of your PR), you need to install JuliaFormatter.jl first by running
+```shell
+julia -e 'using Pkg; Pkg.add("JuliaFormatter")'
+```
+You can then recursively format all Julia files in the Trixi.jl repo by executing
+```shell
+julia -e 'using JuliaFormatter; format(".")
+```
+from inside the Trixi.jl repository. For convenience, there is also a script you can
+directly run from your terminal shell, which will automatically install JuliaFormatter in a
+temporary environment and then run it:
+```shell
+utils/trixi-format.jl
+```
+You can get more information about using the convenience script by running it with the
+`--help`/`-h` flag.
diff --git a/ext/TrixiMakieExt.jl b/ext/TrixiMakieExt.jl
index 4618048556b..1eb11f6a422 100644
--- a/ext/TrixiMakieExt.jl
+++ b/ext/TrixiMakieExt.jl
@@ -3,10 +3,10 @@ module TrixiMakieExt
 
 # Required for visualization code
 if isdefined(Base, :get_extension)
-  using Makie: Makie, GeometryBasics
+    using Makie: Makie, GeometryBasics
 else
-  # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl
-  using ..Makie: Makie, GeometryBasics
+    # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl
+    using ..Makie: Makie, GeometryBasics
 end
 
 # Use all exported symbols to avoid having to rewrite `recipes_makie.jl`
@@ -24,121 +24,130 @@ import Trixi: iplot, iplot!
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 # First some utilities
 # Given a reference plotting triangulation, this function generates a plotting triangulation for
 # the entire global mesh. The output can be plotted using `Makie.mesh`.
-function global_plotting_triangulation_makie(pds::PlotDataSeries{<:PlotData2DTriangulated};
+function global_plotting_triangulation_makie(pds::PlotDataSeries{
+                                                                 <:PlotData2DTriangulated
+                                                                 };
                                              set_z_coordinate_zero = false)
-  @unpack variable_id = pds
-  pd = pds.plot_data
-  @unpack x, y, data, t = pd
-
-  makie_triangles = Makie.to_triangles(t)
-
-  # trimesh[i] holds GeometryBasics.Mesh containing plotting information on the ith element.
-  # Note: Float32 is required by GeometryBasics
-  num_plotting_nodes, num_elements = size(x)
-  trimesh = Vector{GeometryBasics.Mesh{3, Float32}}(undef, num_elements)
-  coordinates = zeros(Float32, num_plotting_nodes, 3)
-  for element in Base.OneTo(num_elements)
-    for i in Base.OneTo(num_plotting_nodes)
-      coordinates[i, 1] = x[i, element]
-      coordinates[i, 2] = y[i, element]
-      if set_z_coordinate_zero == false
-        coordinates[i, 3] = data[i, element][variable_id]
-      end
+    @unpack variable_id = pds
+    pd = pds.plot_data
+    @unpack x, y, data, t = pd
+
+    makie_triangles = Makie.to_triangles(t)
+
+    # trimesh[i] holds GeometryBasics.Mesh containing plotting information on the ith element.
+    # Note: Float32 is required by GeometryBasics
+    num_plotting_nodes, num_elements = size(x)
+    trimesh = Vector{GeometryBasics.Mesh{3, Float32}}(undef, num_elements)
+    coordinates = zeros(Float32, num_plotting_nodes, 3)
+    for element in Base.OneTo(num_elements)
+        for i in Base.OneTo(num_plotting_nodes)
+            coordinates[i, 1] = x[i, element]
+            coordinates[i, 2] = y[i, element]
+            if set_z_coordinate_zero == false
+                coordinates[i, 3] = data[i, element][variable_id]
+            end
+        end
+        trimesh[element] = GeometryBasics.normal_mesh(Makie.to_vertices(coordinates),
+                                                      makie_triangles)
     end
-    trimesh[element] = GeometryBasics.normal_mesh(Makie.to_vertices(coordinates), makie_triangles)
-  end
-  plotting_mesh = merge([trimesh...]) # merge meshes on each element into one large mesh
-  return plotting_mesh
+    plotting_mesh = merge([trimesh...]) # merge meshes on each element into one large mesh
+    return plotting_mesh
 end
 
 # Returns a list of `Makie.Point`s which can be used to plot the mesh, or a solution "wireframe"
 # (e.g., a plot of the mesh lines but with the z-coordinate equal to the value of the solution).
-function convert_PlotData2D_to_mesh_Points(pds::PlotDataSeries{<:PlotData2DTriangulated};
+function convert_PlotData2D_to_mesh_Points(pds::PlotDataSeries{<:PlotData2DTriangulated
+                                                               };
                                            set_z_coordinate_zero = false)
-  @unpack variable_id = pds
-  pd = pds.plot_data
-  @unpack x_face, y_face, face_data = pd
-
-  if set_z_coordinate_zero
-    # plot 2d surface by setting z coordinate to zero.
-    # Uses `x_face` since `face_data` may be `::Nothing`, as it's not used for 2D plots.
-    sol_f = zeros(eltype(first(x_face)), size(x_face))
-  else
-    sol_f = StructArrays.component(face_data, variable_id)
-  end
-
-  # This line separates solution lines on each edge by NaNs to ensure that they are rendered
-  # separately. The coordinates `xf`, `yf` and the solution `sol_f`` are assumed to be a matrix
-  # whose columns correspond to different elements. We add NaN separators by appending a row of
-  # NaNs to this matrix. We also flatten (e.g., apply `vec` to) the result, as this speeds up
-  # plotting.
-  xyz_wireframe = GeometryBasics.Point.(map(x->vec(vcat(x, fill(NaN, 1, size(x, 2)))), (x_face, y_face, sol_f))...)
-
-  return xyz_wireframe
+    @unpack variable_id = pds
+    pd = pds.plot_data
+    @unpack x_face, y_face, face_data = pd
+
+    if set_z_coordinate_zero
+        # plot 2d surface by setting z coordinate to zero.
+        # Uses `x_face` since `face_data` may be `::Nothing`, as it's not used for 2D plots.
+        sol_f = zeros(eltype(first(x_face)), size(x_face))
+    else
+        sol_f = StructArrays.component(face_data, variable_id)
+    end
+
+    # This line separates solution lines on each edge by NaNs to ensure that they are rendered
+    # separately. The coordinates `xf`, `yf` and the solution `sol_f`` are assumed to be a matrix
+    # whose columns correspond to different elements. We add NaN separators by appending a row of
+    # NaNs to this matrix. We also flatten (e.g., apply `vec` to) the result, as this speeds up
+    # plotting.
+    xyz_wireframe = GeometryBasics.Point.(map(x -> vec(vcat(x,
+                                                            fill(NaN, 1, size(x, 2)))),
+                                              (x_face, y_face, sol_f))...)
+
+    return xyz_wireframe
 end
 
 # Creates a GeometryBasics triangulation for the visualization of a ScalarData2D plot object.
 function global_plotting_triangulation_makie(pd::PlotData2DTriangulated{<:ScalarData};
                                              set_z_coordinate_zero = false)
-  @unpack x, y, data, t = pd
-
-  makie_triangles = Makie.to_triangles(t)
-
-  # trimesh[i] holds GeometryBasics.Mesh containing plotting information on the ith element.
-  # Note: Float32 is required by GeometryBasics
-  num_plotting_nodes, num_elements = size(x)
-  trimesh = Vector{GeometryBasics.Mesh{3, Float32}}(undef, num_elements)
-  coordinates = zeros(Float32, num_plotting_nodes, 3)
-  for element in Base.OneTo(num_elements)
-    for i in Base.OneTo(num_plotting_nodes)
-      coordinates[i, 1] = x[i, element]
-      coordinates[i, 2] = y[i, element]
-      if set_z_coordinate_zero == false
-        coordinates[i, 3] = data.data[i, element]
-      end
+    @unpack x, y, data, t = pd
+
+    makie_triangles = Makie.to_triangles(t)
+
+    # trimesh[i] holds GeometryBasics.Mesh containing plotting information on the ith element.
+    # Note: Float32 is required by GeometryBasics
+    num_plotting_nodes, num_elements = size(x)
+    trimesh = Vector{GeometryBasics.Mesh{3, Float32}}(undef, num_elements)
+    coordinates = zeros(Float32, num_plotting_nodes, 3)
+    for element in Base.OneTo(num_elements)
+        for i in Base.OneTo(num_plotting_nodes)
+            coordinates[i, 1] = x[i, element]
+            coordinates[i, 2] = y[i, element]
+            if set_z_coordinate_zero == false
+                coordinates[i, 3] = data.data[i, element]
+            end
+        end
+        trimesh[element] = GeometryBasics.normal_mesh(Makie.to_vertices(coordinates),
+                                                      makie_triangles)
     end
-    trimesh[element] = GeometryBasics.normal_mesh(Makie.to_vertices(coordinates), makie_triangles)
-  end
-  plotting_mesh = merge([trimesh...]) # merge meshes on each element into one large mesh
-  return plotting_mesh
+    plotting_mesh = merge([trimesh...]) # merge meshes on each element into one large mesh
+    return plotting_mesh
 end
 
 # Returns a list of `GeometryBasics.Point`s which can be used to plot the mesh, or a solution "wireframe"
 # (e.g., a plot of the mesh lines but with the z-coordinate equal to the value of the solution).
 function convert_PlotData2D_to_mesh_Points(pd::PlotData2DTriangulated{<:ScalarData};
                                            set_z_coordinate_zero = false)
-  @unpack x_face, y_face, face_data = pd
-
-  if set_z_coordinate_zero
-    # plot 2d surface by setting z coordinate to zero.
-    # Uses `x_face` since `face_data` may be `::Nothing`, as it's not used for 2D plots.
-    sol_f = zeros(eltype(first(x_face)), size(x_face))
-  else
-    sol_f = face_data
-  end
-
-  # This line separates solution lines on each edge by NaNs to ensure that they are rendered
-  # separately. The coordinates `xf`, `yf` and the solution `sol_f`` are assumed to be a matrix
-  # whose columns correspond to different elements. We add NaN separators by appending a row of
-  # NaNs to this matrix. We also flatten (e.g., apply `vec` to) the result, as this speeds up
-  # plotting.
-  xyz_wireframe = GeometryBasics.Point.(map(x->vec(vcat(x, fill(NaN, 1, size(x, 2)))), (x_face, y_face, sol_f))...)
-
-  return xyz_wireframe
-end
+    @unpack x_face, y_face, face_data = pd
 
+    if set_z_coordinate_zero
+        # plot 2d surface by setting z coordinate to zero.
+        # Uses `x_face` since `face_data` may be `::Nothing`, as it's not used for 2D plots.
+        sol_f = zeros(eltype(first(x_face)), size(x_face))
+    else
+        sol_f = face_data
+    end
+
+    # This line separates solution lines on each edge by NaNs to ensure that they are rendered
+    # separately. The coordinates `xf`, `yf` and the solution `sol_f`` are assumed to be a matrix
+    # whose columns correspond to different elements. We add NaN separators by appending a row of
+    # NaNs to this matrix. We also flatten (e.g., apply `vec` to) the result, as this speeds up
+    # plotting.
+    xyz_wireframe = GeometryBasics.Point.(map(x -> vec(vcat(x,
+                                                            fill(NaN, 1, size(x, 2)))),
+                                              (x_face, y_face, sol_f))...)
+
+    return xyz_wireframe
+end
 
 # We set the Makie default colormap to match Plots.jl, which uses `:inferno` by default.
 default_Makie_colormap() = :inferno
 
 # convenience struct for editing Makie plots after they're created.
 struct FigureAndAxes{Axes}
-  fig::Makie.Figure
-  axes::Axes
+    fig::Makie.Figure
+    axes::Axes
 end
 
 # for "quiet" return arguments to Makie.plot(::TrixiODESolution) and
@@ -146,14 +155,14 @@ end
 Base.show(io::IO, fa::FigureAndAxes) = nothing
 
 # allows for returning fig, axes = Makie.plot(...)
-function Base.iterate(fa::FigureAndAxes, state=1)
-  if state == 1
-    return (fa.fig, 2)
-  elseif state == 2
-    return (fa.axes, 3)
-  else
-    return nothing
-  end
+function Base.iterate(fa::FigureAndAxes, state = 1)
+    if state == 1
+        return (fa.fig, 2)
+    elseif state == 2
+        return (fa.axes, 3)
+    else
+        return nothing
+    end
 end
 
 """
@@ -173,183 +182,192 @@ function iplot end
 
 # Enables `iplot(PlotData2D(sol))`.
 function iplot(pd::PlotData2DTriangulated;
-               plot_mesh=true, show_axis=false, colormap=default_Makie_colormap(),
-               variable_to_plot_in=1)
-
-  @unpack variable_names = pd
-
-  # Initialize a Makie figure that we'll add the solution and toggle switches to.
-  fig = Makie.Figure()
-
-  # Set up options for the drop-down menu
-  menu_options = [zip(variable_names, 1:length(variable_names))...]
-  menu = Makie.Menu(fig, options=menu_options)
-
-  # Initialize toggle switches for viewing the mesh
-  toggle_solution_mesh = Makie.Toggle(fig, active=plot_mesh)
-  toggle_mesh = Makie.Toggle(fig, active=plot_mesh)
-
-  # Add dropdown menu and toggle switches to the left side of the figure.
-  fig[1, 1] = Makie.vgrid!(
-      Makie.Label(fig, "Solution field", width=nothing), menu,
-      Makie.Label(fig, "Solution mesh visible"), toggle_solution_mesh,
-      Makie.Label(fig, "Mesh visible"), toggle_mesh;
-      tellheight=false, width = 200
-  )
-
-  # Create a zoomable interactive axis object on top of which to plot the solution.
-  ax = Makie.LScene(fig[1, 2], scenekw=(show_axis=show_axis,))
-
-  # Initialize the dropdown menu to `variable_to_plot_in`
-  # Since menu.selection is an Observable type, we need to dereference it using `[]` to set.
-  menu.selection[] = variable_to_plot_in
-  menu.i_selected[] = variable_to_plot_in
-
-  # Since `variable_to_plot` is an Observable, these lines are re-run whenever `variable_to_plot[]`
-  # is updated from the drop-down menu.
-  plotting_mesh = Makie.@lift(global_plotting_triangulation_makie(getindex(pd, variable_names[$(menu.selection)])))
-  solution_z = Makie.@lift(getindex.($plotting_mesh.position, 3))
-
-  # Plot the actual solution.
-  Makie.mesh!(ax, plotting_mesh; color=solution_z, colormap)
-
-  # Create a mesh overlay by plotting a mesh both on top of and below the solution contours.
-  wire_points = Makie.@lift(convert_PlotData2D_to_mesh_Points(getindex(pd, variable_names[$(menu.selection)])))
-  wire_mesh_top = Makie.lines!(ax, wire_points, color=:white)
-  wire_mesh_bottom = Makie.lines!(ax, wire_points, color=:white)
-  Makie.translate!(wire_mesh_top, 0, 0, 1e-3)
-  Makie.translate!(wire_mesh_bottom, 0, 0, -1e-3)
-
-  # This draws flat mesh lines below the solution.
-  function compute_z_offset(solution_z)
-      zmin = minimum(solution_z)
-      zrange = (x->x[2]-x[1])(extrema(solution_z))
-      return zmin - .25*zrange
-  end
-  z_offset = Makie.@lift(compute_z_offset($solution_z))
-  get_flat_points(wire_points, z_offset) = [Makie.Point(point.data[1:2]..., z_offset) for point in wire_points]
-  flat_wire_points = Makie.@lift get_flat_points($wire_points, $z_offset)
-  wire_mesh_flat = Makie.lines!(ax, flat_wire_points, color=:black)
-
-  # create a small variation in the extrema to avoid the Makie `range_step` cannot be zero error.
-  # see https://github.com/MakieOrg/Makie.jl/issues/931 for more details.
-  # the colorbar range is perturbed by 1e-5 * the magnitude of the solution.
-  function scaled_extrema(x)
-    ex = extrema(x)
-    if ex[2] ≈ ex[1] # if solution is close to constant, perturb colorbar
-      return ex .+ 1e-5 .* maximum(abs.(ex)) .* (-1, 1)
-    else
-      return ex
+               plot_mesh = true, show_axis = false, colormap = default_Makie_colormap(),
+               variable_to_plot_in = 1)
+    @unpack variable_names = pd
+
+    # Initialize a Makie figure that we'll add the solution and toggle switches to.
+    fig = Makie.Figure()
+
+    # Set up options for the drop-down menu
+    menu_options = [zip(variable_names, 1:length(variable_names))...]
+    menu = Makie.Menu(fig, options = menu_options)
+
+    # Initialize toggle switches for viewing the mesh
+    toggle_solution_mesh = Makie.Toggle(fig, active = plot_mesh)
+    toggle_mesh = Makie.Toggle(fig, active = plot_mesh)
+
+    # Add dropdown menu and toggle switches to the left side of the figure.
+    fig[1, 1] = Makie.vgrid!(Makie.Label(fig, "Solution field", width = nothing), menu,
+                             Makie.Label(fig, "Solution mesh visible"),
+                             toggle_solution_mesh,
+                             Makie.Label(fig, "Mesh visible"), toggle_mesh;
+                             tellheight = false, width = 200)
+
+    # Create a zoomable interactive axis object on top of which to plot the solution.
+    ax = Makie.LScene(fig[1, 2], scenekw = (show_axis = show_axis,))
+
+    # Initialize the dropdown menu to `variable_to_plot_in`
+    # Since menu.selection is an Observable type, we need to dereference it using `[]` to set.
+    menu.selection[] = variable_to_plot_in
+    menu.i_selected[] = variable_to_plot_in
+
+    # Since `variable_to_plot` is an Observable, these lines are re-run whenever `variable_to_plot[]`
+    # is updated from the drop-down menu.
+    plotting_mesh = Makie.@lift(global_plotting_triangulation_makie(getindex(pd,
+                                                                             variable_names[$(menu.selection)])))
+    solution_z = Makie.@lift(getindex.($plotting_mesh.position, 3))
+
+    # Plot the actual solution.
+    Makie.mesh!(ax, plotting_mesh; color = solution_z, colormap)
+
+    # Create a mesh overlay by plotting a mesh both on top of and below the solution contours.
+    wire_points = Makie.@lift(convert_PlotData2D_to_mesh_Points(getindex(pd,
+                                                                         variable_names[$(menu.selection)])))
+    wire_mesh_top = Makie.lines!(ax, wire_points, color = :white)
+    wire_mesh_bottom = Makie.lines!(ax, wire_points, color = :white)
+    Makie.translate!(wire_mesh_top, 0, 0, 1e-3)
+    Makie.translate!(wire_mesh_bottom, 0, 0, -1e-3)
+
+    # This draws flat mesh lines below the solution.
+    function compute_z_offset(solution_z)
+        zmin = minimum(solution_z)
+        zrange = (x -> x[2] - x[1])(extrema(solution_z))
+        return zmin - 0.25 * zrange
+    end
+    z_offset = Makie.@lift(compute_z_offset($solution_z))
+    function get_flat_points(wire_points, z_offset)
+        [Makie.Point(point.data[1:2]..., z_offset) for point in wire_points]
+    end
+    flat_wire_points = Makie.@lift get_flat_points($wire_points, $z_offset)
+    wire_mesh_flat = Makie.lines!(ax, flat_wire_points, color = :black)
+
+    # create a small variation in the extrema to avoid the Makie `range_step` cannot be zero error.
+    # see https://github.com/MakieOrg/Makie.jl/issues/931 for more details.
+    # the colorbar range is perturbed by 1e-5 * the magnitude of the solution.
+    function scaled_extrema(x)
+        ex = extrema(x)
+        if ex[2] ≈ ex[1] # if solution is close to constant, perturb colorbar
+            return ex .+ 1e-5 .* maximum(abs.(ex)) .* (-1, 1)
+        else
+            return ex
+        end
     end
-  end
 
-  # Resets the colorbar each time the solution changes.
-  Makie.Colorbar(fig[1, 3], limits = Makie.@lift(scaled_extrema($solution_z)), colormap=colormap)
+    # Resets the colorbar each time the solution changes.
+    Makie.Colorbar(fig[1, 3], limits = Makie.@lift(scaled_extrema($solution_z)),
+                   colormap = colormap)
 
-  # This syncs the toggle buttons to the mesh plots.
-  Makie.connect!(wire_mesh_top.visible, toggle_solution_mesh.active)
-  Makie.connect!(wire_mesh_bottom.visible, toggle_solution_mesh.active)
-  Makie.connect!(wire_mesh_flat.visible, toggle_mesh.active)
+    # This syncs the toggle buttons to the mesh plots.
+    Makie.connect!(wire_mesh_top.visible, toggle_solution_mesh.active)
+    Makie.connect!(wire_mesh_bottom.visible, toggle_solution_mesh.active)
+    Makie.connect!(wire_mesh_flat.visible, toggle_mesh.active)
 
-  # On OSX, shift-command-4 for screenshots triggers a constant "up-zoom".
-  # To avoid this, we remap up-zoom to the right shift button instead.
-  Makie.cameracontrols(ax.scene).attributes[:up_key][] = Makie.Keyboard.right_shift
+    # On OSX, shift-command-4 for screenshots triggers a constant "up-zoom".
+    # To avoid this, we remap up-zoom to the right shift button instead.
+    Makie.cameracontrols(ax.scene).attributes[:up_key][] = Makie.Keyboard.right_shift
 
-  # typing this pulls up the figure (similar to display(plot!()) in Plots.jl)
-  fig
+    # typing this pulls up the figure (similar to display(plot!()) in Plots.jl)
+    fig
 end
 
 function iplot(u, mesh, equations, solver, cache;
-               solution_variables=nothing, nvisnodes=2*nnodes(solver), kwargs...)
-  @assert ndims(mesh) == 2
+               solution_variables = nothing, nvisnodes = 2 * nnodes(solver), kwargs...)
+    @assert ndims(mesh) == 2
 
-  pd = PlotData2DTriangulated(u, mesh, equations, solver, cache;
-      solution_variables=solution_variables, nvisnodes=nvisnodes)
+    pd = PlotData2DTriangulated(u, mesh, equations, solver, cache;
+                                solution_variables = solution_variables,
+                                nvisnodes = nvisnodes)
 
-  iplot(pd; kwargs...)
+    iplot(pd; kwargs...)
 end
 
 # redirect `iplot(sol)` to dispatchable `iplot` signature.
 iplot(sol::TrixiODESolution; kwargs...) = iplot(sol.u[end], sol.prob.p; kwargs...)
-iplot(u, semi; kwargs...) = iplot(wrap_array_native(u, semi), mesh_equations_solver_cache(semi)...; kwargs...)
+function iplot(u, semi; kwargs...)
+    iplot(wrap_array_native(u, semi), mesh_equations_solver_cache(semi)...; kwargs...)
+end
 
 # Interactive visualization of user-defined ScalarData.
 function iplot(pd::PlotData2DTriangulated{<:ScalarData};
-               show_axis=false, colormap=default_Makie_colormap(), plot_mesh=false)
-  fig = Makie.Figure()
+               show_axis = false, colormap = default_Makie_colormap(),
+               plot_mesh = false)
+    fig = Makie.Figure()
 
-  # Create a zoomable interactive axis object on top of which to plot the solution.
-  ax = Makie.LScene(fig[1, 1], scenekw=(show_axis=show_axis,))
+    # Create a zoomable interactive axis object on top of which to plot the solution.
+    ax = Makie.LScene(fig[1, 1], scenekw = (show_axis = show_axis,))
 
-  # plot the user-defined ScalarData
-  fig_axis_plt = iplot!(FigureAndAxes(fig, ax), pd; colormap=colormap, plot_mesh=plot_mesh)
+    # plot the user-defined ScalarData
+    fig_axis_plt = iplot!(FigureAndAxes(fig, ax), pd; colormap = colormap,
+                          plot_mesh = plot_mesh)
 
-  fig
-  return fig_axis_plt
+    fig
+    return fig_axis_plt
 end
 
 function iplot!(fig_axis::Union{FigureAndAxes, Makie.FigureAxisPlot},
                 pd::PlotData2DTriangulated{<:ScalarData};
-                colormap=default_Makie_colormap(), plot_mesh=false)
-
-  # destructure first two fields of either FigureAndAxes or Makie.FigureAxisPlot
-  fig, ax = fig_axis
-
-  # create triangulation of the scalar data to plot
-  plotting_mesh = global_plotting_triangulation_makie(pd)
-  solution_z = getindex.(plotting_mesh.position, 3)
-  plt = Makie.mesh!(ax, plotting_mesh; color=solution_z, colormap)
-
-  if plot_mesh
-    wire_points = convert_PlotData2D_to_mesh_Points(pd)
-    wire_mesh_top = Makie.lines!(ax, wire_points, color=:white)
-    wire_mesh_bottom = Makie.lines!(ax, wire_points, color=:white)
-    Makie.translate!(wire_mesh_top, 0, 0, 1e-3)
-    Makie.translate!(wire_mesh_bottom, 0, 0, -1e-3)
-  end
+                colormap = default_Makie_colormap(), plot_mesh = false)
+
+    # destructure first two fields of either FigureAndAxes or Makie.FigureAxisPlot
+    fig, ax = fig_axis
+
+    # create triangulation of the scalar data to plot
+    plotting_mesh = global_plotting_triangulation_makie(pd)
+    solution_z = getindex.(plotting_mesh.position, 3)
+    plt = Makie.mesh!(ax, plotting_mesh; color = solution_z, colormap)
+
+    if plot_mesh
+        wire_points = convert_PlotData2D_to_mesh_Points(pd)
+        wire_mesh_top = Makie.lines!(ax, wire_points, color = :white)
+        wire_mesh_bottom = Makie.lines!(ax, wire_points, color = :white)
+        Makie.translate!(wire_mesh_top, 0, 0, 1e-3)
+        Makie.translate!(wire_mesh_bottom, 0, 0, -1e-3)
+    end
 
-  # Add a colorbar to the rightmost part of the layout
-  Makie.Colorbar(fig[1, end+1], plt)
+    # Add a colorbar to the rightmost part of the layout
+    Makie.Colorbar(fig[1, end + 1], plt)
 
-  fig
-  return Makie.FigureAxisPlot(fig, ax, plt)
+    fig
+    return Makie.FigureAxisPlot(fig, ax, plt)
 end
 
 # ================== new Makie plot recipes ====================
 
 # This initializes a Makie recipe, which creates a new type definition which Makie uses to create
 # custom `trixiheatmap` plots. See also https://makie.juliaplots.org/stable/recipes.html
-@Makie.recipe(TrixiHeatmap, plot_data_series) do scene
-  Makie.Theme(
-    colormap = default_Makie_colormap()
-  )
+Makie.@recipe(TrixiHeatmap, plot_data_series) do scene
+    Makie.Theme(colormap = default_Makie_colormap())
 end
 
 function Makie.plot!(myplot::TrixiHeatmap)
-  pds = myplot[:plot_data_series][]
-
-  plotting_mesh = global_plotting_triangulation_makie(pds; set_z_coordinate_zero = true)
-
-  @unpack variable_id = pds
-  pd = pds.plot_data
-  solution_z = vec(StructArrays.component(pd.data, variable_id))
-  Makie.mesh!(myplot, plotting_mesh, color=solution_z, shading=false, colormap=myplot[:colormap])
-  myplot.colorrange = extrema(solution_z)
-
-  # Makie hides keyword arguments within `myplot`; see also
-  # https://github.com/JuliaPlots/Makie.jl/issues/837#issuecomment-845985070
-  plot_mesh = if haskey(myplot, :plot_mesh)
-    myplot.plot_mesh[]
-  else
-    true # default to plotting the mesh
-  end
-
-  if plot_mesh
-    xyz_wireframe = convert_PlotData2D_to_mesh_Points(pds; set_z_coordinate_zero = true)
-    Makie.lines!(myplot, xyz_wireframe, color=:lightgrey)
-  end
-
-  myplot
+    pds = myplot[:plot_data_series][]
+
+    plotting_mesh = global_plotting_triangulation_makie(pds;
+                                                        set_z_coordinate_zero = true)
+
+    @unpack variable_id = pds
+    pd = pds.plot_data
+    solution_z = vec(StructArrays.component(pd.data, variable_id))
+    Makie.mesh!(myplot, plotting_mesh, color = solution_z, shading = false,
+                colormap = myplot[:colormap])
+    myplot.colorrange = extrema(solution_z)
+
+    # Makie hides keyword arguments within `myplot`; see also
+    # https://github.com/JuliaPlots/Makie.jl/issues/837#issuecomment-845985070
+    plot_mesh = if haskey(myplot, :plot_mesh)
+        myplot.plot_mesh[]
+    else
+        true # default to plotting the mesh
+    end
+
+    if plot_mesh
+        xyz_wireframe = convert_PlotData2D_to_mesh_Points(pds;
+                                                          set_z_coordinate_zero = true)
+        Makie.lines!(myplot, xyz_wireframe, color = :lightgrey)
+    end
+
+    myplot
 end
 
 # redirects Makie.plot(pd::PlotDataSeries) to custom recipe TrixiHeatmap(pd)
@@ -357,50 +375,52 @@ Makie.plottype(::Trixi.PlotDataSeries{<:Trixi.PlotData2DTriangulated}) = TrixiHe
 
 # Makie does not yet support layouts in its plot recipes, so we overload `Makie.plot` directly.
 function Makie.plot(sol::TrixiODESolution;
-                    plot_mesh=false, solution_variables=nothing, colormap=default_Makie_colormap())
-  return Makie.plot(PlotData2DTriangulated(sol; solution_variables); plot_mesh, colormap)
+                    plot_mesh = false, solution_variables = nothing,
+                    colormap = default_Makie_colormap())
+    return Makie.plot(PlotData2DTriangulated(sol; solution_variables); plot_mesh,
+                      colormap)
 end
 
-function Makie.plot(pd::PlotData2DTriangulated, fig=Makie.Figure();
-                    plot_mesh=false, colormap=default_Makie_colormap())
-  figAxes = Makie.plot!(fig, pd; plot_mesh, colormap)
-  display(figAxes.fig)
-  return figAxes
+function Makie.plot(pd::PlotData2DTriangulated, fig = Makie.Figure();
+                    plot_mesh = false, colormap = default_Makie_colormap())
+    figAxes = Makie.plot!(fig, pd; plot_mesh, colormap)
+    display(figAxes.fig)
+    return figAxes
 end
 
 function Makie.plot!(fig, pd::PlotData2DTriangulated;
-                     plot_mesh=false, colormap=default_Makie_colormap())
-  # Create layout that is as square as possible, when there are more than 3 subplots.
-  # This is done with a preference for more columns than rows if not.
-  if length(pd) <= 3
-    cols = length(pd)
-    rows = 1
-  else
-    cols = ceil(Int, sqrt(length(pd)))
-    rows = cld(length(pd), cols)
-  end
-
-  axes = [Makie.Axis(fig[i,j], xlabel="x", ylabel="y") for j in 1:rows, i in 1:cols]
-  row_list, col_list = [i for j in 1:rows, i in 1:cols], [j for j in 1:rows, i in 1:cols]
-
-  for (variable_to_plot, (variable_name, pds)) in enumerate(pd)
-    ax = axes[variable_to_plot]
-    plt = trixiheatmap!(ax, pds; plot_mesh, colormap)
-
-    row = row_list[variable_to_plot]
-    col = col_list[variable_to_plot]
-    Makie.Colorbar(fig[row, col][1,2], plt)
-
-    ax.aspect = Makie.DataAspect() # equal aspect ratio
-    ax.title  = variable_name
-    Makie.xlims!(ax, extrema(pd.x))
-    Makie.ylims!(ax, extrema(pd.y))
-  end
-
-  return FigureAndAxes(fig, axes)
-end
+                     plot_mesh = false, colormap = default_Makie_colormap())
+    # Create layout that is as square as possible, when there are more than 3 subplots.
+    # This is done with a preference for more columns than rows if not.
+    if length(pd) <= 3
+        cols = length(pd)
+        rows = 1
+    else
+        cols = ceil(Int, sqrt(length(pd)))
+        rows = cld(length(pd), cols)
+    end
+
+    axes = [Makie.Axis(fig[i, j], xlabel = "x", ylabel = "y")
+            for j in 1:rows, i in 1:cols]
+    row_list, col_list = ([i for j in 1:rows, i in 1:cols],
+                          [j for j in 1:rows, i in 1:cols])
 
+    for (variable_to_plot, (variable_name, pds)) in enumerate(pd)
+        ax = axes[variable_to_plot]
+        plt = trixiheatmap!(ax, pds; plot_mesh, colormap)
 
+        row = row_list[variable_to_plot]
+        col = col_list[variable_to_plot]
+        Makie.Colorbar(fig[row, col][1, 2], plt)
+
+        ax.aspect = Makie.DataAspect() # equal aspect ratio
+        ax.title = variable_name
+        Makie.xlims!(ax, extrema(pd.x))
+        Makie.ylims!(ax, extrema(pd.y))
+    end
+
+    return FigureAndAxes(fig, axes)
+end
 end # @muladd
 
 end
diff --git a/src/Trixi.jl b/src/Trixi.jl
index 76cd74edcaf..d5579aeea33 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -18,9 +18,11 @@ module Trixi
 # Include other packages that are used in Trixi.jl
 # (standard library packages first, other packages next, all of them sorted alphabetically)
 
-using LinearAlgebra: LinearAlgebra, Diagonal, diag, dot, mul!, norm, cross, normalize, I, UniformScaling, det
+using LinearAlgebra: LinearAlgebra, Diagonal, diag, dot, mul!, norm, cross, normalize, I,
+                     UniformScaling, det
 using Printf: @printf, @sprintf, println
-using SparseArrays: AbstractSparseMatrix, AbstractSparseMatrixCSC, sparse, droptol!, rowvals, nzrange, nonzeros, spzeros
+using SparseArrays: AbstractSparseMatrix, AbstractSparseMatrixCSC, sparse, droptol!,
+                    rowvals, nzrange, nonzeros, spzeros
 
 # import @reexport now to make it available for further imports/exports
 using Reexport: @reexport
@@ -70,14 +72,15 @@ using SimpleUnPack: @pack!
 
 # finite difference SBP operators
 using SummationByPartsOperators: AbstractDerivativeOperator,
-  AbstractNonperiodicDerivativeOperator, DerivativeOperator,
-  AbstractPeriodicDerivativeOperator, PeriodicDerivativeOperator, grid
+                                 AbstractNonperiodicDerivativeOperator, DerivativeOperator,
+                                 AbstractPeriodicDerivativeOperator,
+                                 PeriodicDerivativeOperator, grid
 import SummationByPartsOperators: integrate, semidiscretize,
                                   compute_coefficients, compute_coefficients!,
                                   left_boundary_weight, right_boundary_weight
-@reexport using SummationByPartsOperators:
-  SummationByPartsOperators, derivative_operator, periodic_derivative_operator,
-  upwind_operators
+@reexport using SummationByPartsOperators: SummationByPartsOperators, derivative_operator,
+                                           periodic_derivative_operator,
+                                           upwind_operators
 
 # DGMulti solvers
 @reexport using StartUpDG: StartUpDG, Polynomial, Gauss, SBP, Line, Tri, Quad, Hex, Tet
@@ -95,7 +98,6 @@ using StartUpDG: RefElemData, MeshData, AbstractElemShape
 #   include(expr -> quote @muladd begin $expr end end, filename)
 # end
 
-
 # Define the entry points of our type hierarchy, e.g.
 #     AbstractEquations, AbstractSemidiscretization etc.
 # Placing them here allows us to make use of them for dispatch even for
@@ -129,12 +131,16 @@ include("visualization/visualization.jl")
 # export types/functions that define the public API of Trixi.jl
 
 export AcousticPerturbationEquations2D,
-       CompressibleEulerEquations1D, CompressibleEulerEquations2D, CompressibleEulerEquations3D,
-       CompressibleEulerMulticomponentEquations1D, CompressibleEulerMulticomponentEquations2D,
+       CompressibleEulerEquations1D, CompressibleEulerEquations2D,
+       CompressibleEulerEquations3D,
+       CompressibleEulerMulticomponentEquations1D,
+       CompressibleEulerMulticomponentEquations2D,
        IdealGlmMhdEquations1D, IdealGlmMhdEquations2D, IdealGlmMhdEquations3D,
        IdealGlmMhdMulticomponentEquations1D, IdealGlmMhdMulticomponentEquations2D,
-       HyperbolicDiffusionEquations1D, HyperbolicDiffusionEquations2D, HyperbolicDiffusionEquations3D,
-       LinearScalarAdvectionEquation1D, LinearScalarAdvectionEquation2D, LinearScalarAdvectionEquation3D,
+       HyperbolicDiffusionEquations1D, HyperbolicDiffusionEquations2D,
+       HyperbolicDiffusionEquations3D,
+       LinearScalarAdvectionEquation1D, LinearScalarAdvectionEquation2D,
+       LinearScalarAdvectionEquation3D,
        InviscidBurgersEquation1D,
        LatticeBoltzmannEquations2D, LatticeBoltzmannEquations3D,
        ShallowWaterEquations1D, ShallowWaterEquations2D,
@@ -146,7 +152,8 @@ export LaplaceDiffusion1D, LaplaceDiffusion2D,
 
 export GradientVariablesPrimitive, GradientVariablesEntropy
 
-export flux, flux_central, flux_lax_friedrichs, flux_hll, flux_hllc, flux_hlle, flux_godunov,
+export flux, flux_central, flux_lax_friedrichs, flux_hll, flux_hllc, flux_hlle,
+       flux_godunov,
        flux_chandrashekar, flux_ranocha, flux_derigs_etal, flux_hindenlang_gassner,
        flux_nonconservative_powell,
        flux_kennedy_gruber, flux_shima_etal, flux_ec,
@@ -181,13 +188,17 @@ export boundary_condition_do_nothing,
 
 export initial_condition_convergence_test, source_terms_convergence_test
 export source_terms_harmonic
-export initial_condition_poisson_nonperiodic, source_terms_poisson_nonperiodic, boundary_condition_poisson_nonperiodic
-export initial_condition_eoc_test_coupled_euler_gravity, source_terms_eoc_test_coupled_euler_gravity, source_terms_eoc_test_euler
+export initial_condition_poisson_nonperiodic, source_terms_poisson_nonperiodic,
+       boundary_condition_poisson_nonperiodic
+export initial_condition_eoc_test_coupled_euler_gravity,
+       source_terms_eoc_test_coupled_euler_gravity, source_terms_eoc_test_euler
 
 export cons2cons, cons2prim, prim2cons, cons2macroscopic, cons2state, cons2mean,
        cons2entropy, entropy2cons
-export density, pressure, density_pressure, velocity, global_mean_vars, equilibrium_distribution, waterheight_pressure
-export entropy, energy_total, energy_kinetic, energy_internal, energy_magnetic, cross_helicity,
+export density, pressure, density_pressure, velocity, global_mean_vars,
+       equilibrium_distribution, waterheight_pressure
+export entropy, energy_total, energy_kinetic, energy_internal, energy_magnetic,
+       cross_helicity,
        enstrophy
 export lake_at_rest_error
 export ncomponents, eachcomponent
@@ -229,7 +240,8 @@ export load_mesh, load_time
 
 export ControllerThreeLevel, ControllerThreeLevelCombined,
        IndicatorLöhner, IndicatorLoehner, IndicatorMax,
-       IndicatorNeuralNetwork, NeuralNetworkPerssonPeraire, NeuralNetworkRayHesthaven, NeuralNetworkCNN
+       IndicatorNeuralNetwork, NeuralNetworkPerssonPeraire, NeuralNetworkRayHesthaven,
+       NeuralNetworkCNN
 
 export PositivityPreservingLimiterZhangShu
 
@@ -245,58 +257,57 @@ export DGMulti, DGMultiBasis, estimate_dt, DGMultiMesh, GaussSBP
 export ViscousFormulationBassiRebay1, ViscousFormulationLocalDG
 
 # Visualization-related exports
-export PlotData1D, PlotData2D, ScalarPlotData2D, getmesh, adapt_to_mesh_level!, adapt_to_mesh_level,
+export PlotData1D, PlotData2D, ScalarPlotData2D, getmesh, adapt_to_mesh_level!,
+       adapt_to_mesh_level,
        iplot, iplot!
 
 function __init__()
-  init_mpi()
+    init_mpi()
 
-  init_p4est()
+    init_p4est()
 
-  register_error_hints()
+    register_error_hints()
 
-  # Enable features that depend on the availability of the Plots package
-  @require Plots="91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin
-    using .Plots: Plots
-  end
+    # Enable features that depend on the availability of the Plots package
+    @require Plots="91a5bcdd-55d7-5caf-9e0b-520d859cae80" begin
+        using .Plots: Plots
+    end
 
-  # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl
-  @static if !isdefined(Base, :get_extension)
-    @require Makie="ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" begin
-      include("../ext/TrixiMakieExt.jl")
+    # Until Julia v1.9 is the minimum required version for Trixi.jl, we still support Requires.jl
+    @static if !isdefined(Base, :get_extension)
+        @require Makie="ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" begin
+            include("../ext/TrixiMakieExt.jl")
+        end
     end
-  end
-
-  @require Flux="587475ba-b771-5e3f-ad9e-33799f191a9c" begin
-    using .Flux: params
-  end
-
-  # FIXME upstream. This is a hacky workaround for
-  #       https://github.com/trixi-framework/Trixi.jl/issues/628
-  #       https://github.com/trixi-framework/Trixi.jl/issues/1185
-  # The related upstream issues appear to be
-  #       https://github.com/JuliaLang/julia/issues/35800
-  #       https://github.com/JuliaLang/julia/issues/32552
-  #       https://github.com/JuliaLang/julia/issues/41740
-  # See also https://discourse.julialang.org/t/performance-depends-dramatically-on-compilation-order/58425
-  if VERSION < v"1.9.0"
-    let
-      for T in (Float32, Float64)
-        u_mortars_2d = zeros(T, 2, 2, 2, 2, 2)
-        u_view_2d = view(u_mortars_2d, 1, :, 1, :, 1)
-        LoopVectorization.axes(u_view_2d)
-
-        u_mortars_3d = zeros(T, 2, 2, 2, 2, 2, 2)
-        u_view_3d = view(u_mortars_3d, 1, :, 1, :, :, 1)
-        LoopVectorization.axes(u_view_3d)
-      end
+
+    @require Flux="587475ba-b771-5e3f-ad9e-33799f191a9c" begin
+        using .Flux: params
     end
-  end
-end
 
+    # FIXME upstream. This is a hacky workaround for
+    #       https://github.com/trixi-framework/Trixi.jl/issues/628
+    #       https://github.com/trixi-framework/Trixi.jl/issues/1185
+    # The related upstream issues appear to be
+    #       https://github.com/JuliaLang/julia/issues/35800
+    #       https://github.com/JuliaLang/julia/issues/32552
+    #       https://github.com/JuliaLang/julia/issues/41740
+    # See also https://discourse.julialang.org/t/performance-depends-dramatically-on-compilation-order/58425
+    if VERSION < v"1.9.0"
+        let
+            for T in (Float32, Float64)
+                u_mortars_2d = zeros(T, 2, 2, 2, 2, 2)
+                u_view_2d = view(u_mortars_2d, 1, :, 1, :, 1)
+                LoopVectorization.axes(u_view_2d)
+
+                u_mortars_3d = zeros(T, 2, 2, 2, 2, 2, 2)
+                u_view_3d = view(u_mortars_3d, 1, :, 1, :, :, 1)
+                LoopVectorization.axes(u_view_3d)
+            end
+        end
+    end
+end
 
 include("auxiliary/precompile.jl")
 _precompile_manual_()
 
-
 end
diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl
index 4de743e93fe..115d055c0ca 100644
--- a/src/auxiliary/auxiliary.jl
+++ b/src/auxiliary/auxiliary.jl
@@ -17,13 +17,12 @@ const main_timer = TimerOutput()
 # Always call timer() to hide implementation details
 timer() = main_timer
 
-
 # By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
 # Since these FMAs can increase the performance of many numerical algorithms,
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     PerformanceCounter()
@@ -34,27 +33,26 @@ runtime of all measurements added so far via `take!(counter)`, resetting the
 `counter`.
 """
 mutable struct PerformanceCounter
-  ncalls_since_readout::Int
-  runtime::Float64
+    ncalls_since_readout::Int
+    runtime::Float64
 end
 
 PerformanceCounter() = PerformanceCounter(0, 0.0)
 
 @inline function Base.take!(counter::PerformanceCounter)
-  time_per_call = counter.runtime / counter.ncalls_since_readout
-  counter.ncalls_since_readout = 0
-  counter.runtime = 0.0
-  return time_per_call
+    time_per_call = counter.runtime / counter.ncalls_since_readout
+    counter.ncalls_since_readout = 0
+    counter.runtime = 0.0
+    return time_per_call
 end
 
 @inline function Base.put!(counter::PerformanceCounter, runtime::Real)
-  counter.ncalls_since_readout += 1
-  counter.runtime += runtime
+    counter.ncalls_since_readout += 1
+    counter.runtime += runtime
 end
 
 @inline ncalls(counter::PerformanceCounter) = counter.ncalls_since_readout
 
-
 """
     PerformanceCounterList{N}()
 
@@ -65,41 +63,38 @@ the averaged runtime of all measurements added so far via `take!(counter)`,
 resetting the `counter`.
 """
 struct PerformanceCounterList{N}
-  counters::NTuple{N, PerformanceCounter}
-  check_ncalls_consistency::Bool
+    counters::NTuple{N, PerformanceCounter}
+    check_ncalls_consistency::Bool
 end
 
 function PerformanceCounterList{N}(check_ncalls_consistency) where {N}
-  counters = ntuple(_ -> PerformanceCounter(), Val{N}())
-  return PerformanceCounterList{N}(counters, check_ncalls_consistency)
+    counters = ntuple(_ -> PerformanceCounter(), Val{N}())
+    return PerformanceCounterList{N}(counters, check_ncalls_consistency)
 end
 PerformanceCounterList{N}() where {N} = PerformanceCounterList{N}(true)
 
 @inline function Base.take!(counter_list::PerformanceCounterList)
-  time_per_call = 0.0
-  for c in counter_list.counters
-    time_per_call += take!(c)
-  end
-  return time_per_call
+    time_per_call = 0.0
+    for c in counter_list.counters
+        time_per_call += take!(c)
+    end
+    return time_per_call
 end
 
 @inline function ncalls(counter_list::PerformanceCounterList)
-  ncalls_first = ncalls(first(counter_list.counters))
-
-  if counter_list.check_ncalls_consistency
-    for c in counter_list.counters
-      if ncalls_first != ncalls(c)
-        error("Some counters have a different number of calls. Using `ncalls` on the counter list is undefined behavior.")
-      end
+    ncalls_first = ncalls(first(counter_list.counters))
+
+    if counter_list.check_ncalls_consistency
+        for c in counter_list.counters
+            if ncalls_first != ncalls(c)
+                error("Some counters have a different number of calls. Using `ncalls` on the counter list is undefined behavior.")
+            end
+        end
     end
-  end
 
-  return ncalls_first
+    return ncalls_first
 end
 
-
-
-
 """
     examples_dir()
 
@@ -114,7 +109,6 @@ readdir(examples_dir())
 """
 examples_dir() = pkgdir(Trixi, "examples")
 
-
 """
     get_examples()
 
@@ -122,27 +116,27 @@ Return a list of all example elixirs that are provided by Trixi.jl. See also
 [`examples_dir`](@ref) and [`default_example`](@ref).
 """
 function get_examples()
-  examples = String[]
-  for (root, dirs, files) in walkdir(examples_dir())
-    for f in files
-      if startswith(f, "elixir_") && endswith(f, ".jl")
-        push!(examples, joinpath(root, f))
-      end
+    examples = String[]
+    for (root, dirs, files) in walkdir(examples_dir())
+        for f in files
+            if startswith(f, "elixir_") && endswith(f, ".jl")
+                push!(examples, joinpath(root, f))
+            end
+        end
     end
-  end
 
-  return examples
+    return examples
 end
 
-
 """
     default_example()
 
 Return the path to an example elixir that can be used to quickly see Trixi.jl in action on a
 [`TreeMesh`]@(ref). See also [`examples_dir`](@ref) and [`get_examples`](@ref).
 """
-default_example() = joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_basic.jl")
-
+function default_example()
+    joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_basic.jl")
+end
 
 """
     default_example_unstructured()
@@ -151,8 +145,9 @@ Return the path to an example elixir that can be used to quickly see Trixi.jl in
 [`UnstructuredMesh2D`]@(ref). This simulation is run on the example curved, unstructured mesh
 given in the Trixi.jl documentation regarding unstructured meshes.
 """
-default_example_unstructured() = joinpath(examples_dir(), "unstructured_2d_dgsem", "elixir_euler_basic.jl")
-
+function default_example_unstructured()
+    joinpath(examples_dir(), "unstructured_2d_dgsem", "elixir_euler_basic.jl")
+end
 
 """
     ode_default_options()
@@ -163,28 +158,28 @@ whenever MPI is used.
 For example, use `solve(ode, alg; ode_default_options()...)`
 """
 function ode_default_options()
-  if mpi_isparallel()
-    return (; save_everystep = false, internalnorm = ode_norm, unstable_check = ode_unstable_check)
-  else
-    return (; save_everystep = false)
-  end
+    if mpi_isparallel()
+        return (; save_everystep = false, internalnorm = ode_norm,
+                unstable_check = ode_unstable_check)
+    else
+        return (; save_everystep = false)
+    end
 end
 
 # Print informative message at startup
 function print_startup_message()
-  s = """
-
-    ████████╗██████╗ ██╗██╗  ██╗██╗
-    ╚══██╔══╝██╔══██╗██║╚██╗██╔╝██║
-       ██║   ██████╔╝██║ ╚███╔╝ ██║
-       ██║   ██╔══██╗██║ ██╔██╗ ██║
-       ██║   ██║  ██║██║██╔╝ ██╗██║
-       ╚═╝   ╚═╝  ╚═╝╚═╝╚═╝  ╚═╝╚═╝
-    """
-  mpi_println(s)
+    s = """
+
+      ████████╗██████╗ ██╗██╗  ██╗██╗
+      ╚══██╔══╝██╔══██╗██║╚██╗██╔╝██║
+         ██║   ██████╔╝██║ ╚███╔╝ ██║
+         ██║   ██╔══██╗██║ ██╔██╗ ██║
+         ██║   ██║  ██║██║██╔╝ ██╗██║
+         ╚═╝   ╚═╝  ╚═╝╚═╝╚═╝  ╚═╝╚═╝
+      """
+    mpi_println(s)
 end
 
-
 """
     get_name(x)
 
@@ -202,9 +197,7 @@ julia> Trixi.get_name(Val(:test))
 ```
 """
 get_name(x) = string(x)
-get_name(::Val{x}) where x = string(x)
-
-
+get_name(::Val{x}) where {x} = string(x)
 
 """
     @threaded for ... end
@@ -224,35 +217,36 @@ Some discussion can be found at https://discourse.julialang.org/t/overhead-of-th
 and https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-remove-the-overhead/58435.
 """
 macro threaded(expr)
-  # Use `esc(quote ... end)` for nested macro calls as suggested in
-  # https://github.com/JuliaLang/julia/issues/23221
-  #
-  # The following code is a simple version using only `Threads.@threads` from the
-  # standard library with an additional check whether only a single thread is used
-  # to reduce some overhead (and allocations) for serial execution.
-  #
-  # return esc(quote
-  #   let
-  #     if Threads.nthreads() == 1
-  #       $(expr)
-  #     else
-  #       Threads.@threads $(expr)
-  #     end
-  #   end
-  # end)
-  #
-  # However, the code below using `@batch` from Polyester.jl is more efficient,
-  # since this packages provides threads with less overhead. Since it is written
-  # by Chris Elrod, the author of LoopVectorization.jl, we expect this package
-  # to provide the most efficient and useful implementation of threads (as we use
-  # them) available in Julia.
-  # !!! danger "Heisenbug"
-  #     Look at the comments for `wrap_array` when considering to change this macro.
-
-  return esc(quote Trixi.@batch $(expr) end)
+    # Use `esc(quote ... end)` for nested macro calls as suggested in
+    # https://github.com/JuliaLang/julia/issues/23221
+    #
+    # The following code is a simple version using only `Threads.@threads` from the
+    # standard library with an additional check whether only a single thread is used
+    # to reduce some overhead (and allocations) for serial execution.
+    #
+    # return esc(quote
+    #   let
+    #     if Threads.nthreads() == 1
+    #       $(expr)
+    #     else
+    #       Threads.@threads $(expr)
+    #     end
+    #   end
+    # end)
+    #
+    # However, the code below using `@batch` from Polyester.jl is more efficient,
+    # since this packages provides threads with less overhead. Since it is written
+    # by Chris Elrod, the author of LoopVectorization.jl, we expect this package
+    # to provide the most efficient and useful implementation of threads (as we use
+    # them) available in Julia.
+    # !!! danger "Heisenbug"
+    #     Look at the comments for `wrap_array` when considering to change this macro.
+
+    return esc(quote
+                   Trixi.@batch $(expr)
+               end)
 end
 
-
 #     @trixi_timeit timer() "some label" expression
 #
 # Basically the same as a special case of `@timeit_debug` from
@@ -261,26 +255,25 @@ end
 # but it also avoids some related performance problems. Since we do not use
 # exception handling in Trixi.jl, that's not really an issue.
 macro trixi_timeit(timer_output, label, expr)
-  timeit_block = quote
-    if timeit_debug_enabled()
-      local to = $(esc(timer_output))
-      local enabled = to.enabled
-      if enabled
-        local accumulated_data = $(TimerOutputs.push!)(to, $(esc(label)))
-      end
-      local b₀ = $(TimerOutputs.gc_bytes)()
-      local t₀ = $(TimerOutputs.time_ns)()
-    end
-    local val = $(esc(expr))
-    if timeit_debug_enabled() && enabled
-      $(TimerOutputs.do_accumulate!)(accumulated_data, t₀, b₀)
-      $(TimerOutputs.pop!)(to)
+    timeit_block = quote
+        if timeit_debug_enabled()
+            local to = $(esc(timer_output))
+            local enabled = to.enabled
+            if enabled
+                local accumulated_data = $(TimerOutputs.push!)(to, $(esc(label)))
+            end
+            local b₀ = $(TimerOutputs.gc_bytes)()
+            local t₀ = $(TimerOutputs.time_ns)()
+        end
+        local val = $(esc(expr))
+        if timeit_debug_enabled() && enabled
+            $(TimerOutputs.do_accumulate!)(accumulated_data, t₀, b₀)
+            $(TimerOutputs.pop!)(to)
+        end
+        val
     end
-    val
-  end
 end
 
-
 """
     @autoinfiltrate
     @autoinfiltrate condition::Bool
@@ -304,33 +297,29 @@ See also: [Infiltrator.jl](https://github.com/JuliaDebug/Infiltrator.jl)
     a breaking change.
 """
 macro autoinfiltrate(condition = true)
-  pkgid = Base.PkgId(Base.UUID("5903a43b-9cc3-4c30-8d17-598619ec4e9b"), "Infiltrator")
-  if !haskey(Base.loaded_modules, pkgid)
-    try
-      Base.eval(Main, :(using Infiltrator))
-    catch err
-      @error "Cannot load Infiltrator.jl. Make sure it is included in your environment stack."
+    pkgid = Base.PkgId(Base.UUID("5903a43b-9cc3-4c30-8d17-598619ec4e9b"), "Infiltrator")
+    if !haskey(Base.loaded_modules, pkgid)
+        try
+            Base.eval(Main, :(using Infiltrator))
+        catch err
+            @error "Cannot load Infiltrator.jl. Make sure it is included in your environment stack."
+        end
+    end
+    i = get(Base.loaded_modules, pkgid, nothing)
+    lnn = LineNumberNode(__source__.line, __source__.file)
+
+    if i === nothing
+        return Expr(:macrocall,
+                    Symbol("@warn"),
+                    lnn,
+                    "Could not load Infiltrator.")
     end
-  end
-  i = get(Base.loaded_modules, pkgid, nothing)
-  lnn = LineNumberNode(__source__.line, __source__.file)
-
-  if i === nothing
-    return Expr(
-      :macrocall,
-      Symbol("@warn"),
-      lnn,
-      "Could not load Infiltrator.")
-  end
-
-  return Expr(
-    :macrocall,
-    Expr(:., i, QuoteNode(Symbol("@infiltrate"))),
-    lnn,
-    esc(condition)
-  )
-end
 
+    return Expr(:macrocall,
+                Expr(:., i, QuoteNode(Symbol("@infiltrate"))),
+                lnn,
+                esc(condition))
+end
 
 # Use the *experimental* feature in `Base` to add error hints for specific errors. We use it to
 # warn users in case they try to execute functions that are extended in package extensions which
@@ -338,23 +327,22 @@ end
 #
 # Reference: https://docs.julialang.org/en/v1/base/base/#Base.Experimental.register_error_hint
 function register_error_hints()
-  # We follow the advice in the docs and gracefully exit without doing anything if the experimental
-  # features gets silently removed.
-  if !isdefined(Base.Experimental, :register_error_hint)
-    return nothing
-  end
-
-  Base.Experimental.register_error_hint(MethodError) do io, exc, argtypes, kwargs
-    if exc.f in [iplot, iplot!] && isempty(methods(exc.f))
-      print(io, "\n$(exc.f) has no methods yet. It is part of a plotting extension of Trixi.jl " *
-                "that relies on Makie being loaded.\n" *
-                "To activate the extension, execute `using Makie`, `using CairoMakie`, " *
-                "`using GLMakie`, or load any other package that also uses Makie.")
+    # We follow the advice in the docs and gracefully exit without doing anything if the experimental
+    # features gets silently removed.
+    if !isdefined(Base.Experimental, :register_error_hint)
+        return nothing
     end
-  end
-
-  return nothing
-end
 
+    Base.Experimental.register_error_hint(MethodError) do io, exc, argtypes, kwargs
+        if exc.f in [iplot, iplot!] && isempty(methods(exc.f))
+            print(io,
+                  "\n$(exc.f) has no methods yet. It is part of a plotting extension of Trixi.jl " *
+                  "that relies on Makie being loaded.\n" *
+                  "To activate the extension, execute `using Makie`, `using CairoMakie`, " *
+                  "`using GLMakie`, or load any other package that also uses Makie.")
+        end
+    end
 
+    return nothing
+end
 end # @muladd
diff --git a/src/auxiliary/containers.jl b/src/auxiliary/containers.jl
index 711471bdafc..90650f6abcf 100644
--- a/src/auxiliary/containers.jl
+++ b/src/auxiliary/containers.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Abstract base type - all containers that want to use these features must inherit from it
 abstract type AbstractContainer end
@@ -15,33 +15,30 @@ function move_connectivity! end
 function delete_connectivity! end
 function reset_data_structures! end
 
-
 # Auxiliary copy function to copy data between containers
 function copy_data!(target::AbstractArray, source::AbstractArray,
-                    first::Int, last::Int, destination::Int, block_size::Int=1)
-  count = last - first + 1
-  if destination <= first || destination > last
-    # In this case it is safe to copy forward (left-to-right) without overwriting data
-    for i in 0:(count-1), j in 1:block_size
-      target[block_size*(destination+i-1) + j] = source[block_size*(first+i-1) + j]
+                    first::Int, last::Int, destination::Int, block_size::Int = 1)
+    count = last - first + 1
+    if destination <= first || destination > last
+        # In this case it is safe to copy forward (left-to-right) without overwriting data
+        for i in 0:(count - 1), j in 1:block_size
+            target[block_size * (destination + i - 1) + j] = source[block_size * (first + i - 1) + j]
+        end
+    else
+        # In this case we need to copy backward (right-to-left) to prevent overwriting data
+        for i in (count - 1):-1:0, j in 1:block_size
+            target[block_size * (destination + i - 1) + j] = source[block_size * (first + i - 1) + j]
+        end
     end
-  else
-    # In this case we need to copy backward (right-to-left) to prevent overwriting data
-    for i in (count-1):-1:0, j in 1:block_size
-      target[block_size*(destination+i-1) + j] = source[block_size*(first+i-1) + j]
-    end
-  end
 
-  return target
+    return target
 end
 
-
 # Inquire about capacity and size
 capacity(c::AbstractContainer) = c.capacity
 Base.length(c::AbstractContainer) = c.length
 Base.size(c::AbstractContainer) = (length(c),)
 
-
 """
     resize!(c::AbstractContainer, new_length) -> AbstractContainer
 
@@ -50,26 +47,25 @@ length, the first `new_length` elements will be retained. If `new_length` is
 larger, the new elements are invalidated.
 """
 function Base.resize!(c::AbstractContainer, new_length)
-  @assert new_length >= zero(new_length) "New length must be >= 0"
-  @assert new_length <= capacity(c) "New length would exceed capacity"
-
-  # If new length is greater than current length, append to container.
-  # If new length is less than current length, shrink container.
-  # If new length is equal to current length, do nothing.
-  if new_length > length(c)
-    # First, invalidate range (to be sure that no sensible values are accidentally left there)
-    invalidate!(c, length(c) + 1, new_length)
-
-    # Then, set new container length
-    c.length = new_length
-  elseif new_length < length(c)
-    # Rely on remove&shift to do The Right Thing (`remove_shift!` also updates the length)
-    remove_shift!(c, new_length + 1, length(c))
-  end
-
-  return c
-end
+    @assert new_length>=zero(new_length) "New length must be >= 0"
+    @assert new_length<=capacity(c) "New length would exceed capacity"
+
+    # If new length is greater than current length, append to container.
+    # If new length is less than current length, shrink container.
+    # If new length is equal to current length, do nothing.
+    if new_length > length(c)
+        # First, invalidate range (to be sure that no sensible values are accidentally left there)
+        invalidate!(c, length(c) + 1, new_length)
+
+        # Then, set new container length
+        c.length = new_length
+    elseif new_length < length(c)
+        # Rely on remove&shift to do The Right Thing (`remove_shift!` also updates the length)
+        remove_shift!(c, new_length + 1, length(c))
+    end
 
+    return c
+end
 
 # Copy data range from source to target container.
 #
@@ -77,255 +73,245 @@ end
 # inheriting from AbstractContainer.
 # TODO: Shall we extend Base.copyto! ?
 function Trixi.copy!(target::AbstractContainer, source::AbstractContainer,
-               first::Int, last::Int, destination::Int)
-  @assert 1 <= first <= length(source) "First cell out of range"
-  @assert 1 <= last <= length(source) "Last cell out of range"
-  @assert 1 <= destination <= length(target) "Destination out of range"
-  @assert destination + (last - first) <= length(target) "Target range out of bounds"
-
-  # Return if copy would be a no-op
-  if last < first || (source === target && first == destination)
-    return target
-  end
+                     first::Int, last::Int, destination::Int)
+    @assert 1<=first<=length(source) "First cell out of range"
+    @assert 1<=last<=length(source) "Last cell out of range"
+    @assert 1<=destination<=length(target) "Destination out of range"
+    @assert destination + (last - first)<=length(target) "Target range out of bounds"
+
+    # Return if copy would be a no-op
+    if last < first || (source === target && first == destination)
+        return target
+    end
 
-  raw_copy!(target, source, first, last, destination)
+    raw_copy!(target, source, first, last, destination)
 
-  return target
+    return target
 end
 
-
 # Convenience method to copy a single element
-function Trixi.copy!(target::AbstractContainer, source::AbstractContainer, from::Int, destination::Int)
-  Trixi.copy!(target, source, from, from, destination)
+function Trixi.copy!(target::AbstractContainer, source::AbstractContainer, from::Int,
+                     destination::Int)
+    Trixi.copy!(target, source, from, from, destination)
 end
 
-
 # Convenience method for copies within a single container
 function Trixi.copy!(c::AbstractContainer, first::Int, last::Int, destination::Int)
-  Trixi.copy!(c, c, first, last, destination)
+    Trixi.copy!(c, c, first, last, destination)
 end
 
-
 # Convenience method for copying a single element within a single container
 function Trixi.copy!(c::AbstractContainer, from::Int, destination::Int)
-  Trixi.copy!(c, c, from, from, destination)
+    Trixi.copy!(c, c, from, from, destination)
 end
 
-
 # Move elements in a way that preserves connectivity.
 function move!(c::AbstractContainer, first::Int, last::Int, destination::Int)
-  @assert 1 <= first <= length(c) "First cell $first out of range"
-  @assert 1 <= last <= length(c) "Last cell $last out of range"
-  @assert 1 <= destination <= length(c) "Destination $destination out of range"
-  @assert destination + (last - first) <= length(c) "Target range out of bounds"
-
-  # Return if move would be a no-op
-  if last < first || first == destination
-    return c
-  end
-
-  # Copy cells to new location
-  raw_copy!(c, first, last, destination)
+    @assert 1<=first<=length(c) "First cell $first out of range"
+    @assert 1<=last<=length(c) "Last cell $last out of range"
+    @assert 1<=destination<=length(c) "Destination $destination out of range"
+    @assert destination + (last - first)<=length(c) "Target range out of bounds"
+
+    # Return if move would be a no-op
+    if last < first || first == destination
+        return c
+    end
 
-  # Move connectivity
-  move_connectivity!(c, first, last, destination)
+    # Copy cells to new location
+    raw_copy!(c, first, last, destination)
 
+    # Move connectivity
+    move_connectivity!(c, first, last, destination)
 
-  # Invalidate original cell locations (unless they already contain new data due to overlap)
-  # 1) If end of destination range is within original range, shift first_invalid to the right
-  count = last - first + 1
-  first_invalid = (first <= destination + count - 1 <= last) ? destination + count : first
-  # 2) If beginning of destination range is within original range, shift last_invalid to the left
-  last_invalid = (first <= destination <= last) ? destination - 1 : last
-  # 3) Invalidate range
-  invalidate!(c, first_invalid, last_invalid)
+    # Invalidate original cell locations (unless they already contain new data due to overlap)
+    # 1) If end of destination range is within original range, shift first_invalid to the right
+    count = last - first + 1
+    first_invalid = (first <= destination + count - 1 <= last) ? destination + count :
+                    first
+    # 2) If beginning of destination range is within original range, shift last_invalid to the left
+    last_invalid = (first <= destination <= last) ? destination - 1 : last
+    # 3) Invalidate range
+    invalidate!(c, first_invalid, last_invalid)
 
-  return c
+    return c
+end
+function move!(c::AbstractContainer, from::Int, destination::Int)
+    move!(c, from, from, destination)
 end
-move!(c::AbstractContainer, from::Int, destination::Int) = move!(c, from, from, destination)
 
 # Default implementation for moving a single element
 function move_connectivity!(c::AbstractContainer, from::Int, destination::Int)
-  return move_connectivity!(c, from, from, destination)
+    return move_connectivity!(c, from, from, destination)
 end
 
 # Default implementation for invalidating a single element
 function invalidate!(c::AbstractContainer, id::Int)
-  return invalidate!(c, id, id)
+    return invalidate!(c, id, id)
 end
 
-
 # Swap two elements in a container while preserving element connectivity.
 function swap!(c::AbstractContainer, a::Int, b::Int)
-  @assert 1 <= a <= length(c) "a out of range"
-  @assert 1 <= b <= length(c) "b out of range"
+    @assert 1<=a<=length(c) "a out of range"
+    @assert 1<=b<=length(c) "b out of range"
 
-  # Return if swap would be a no-op
-  if a == b
-    return c
-  end
+    # Return if swap would be a no-op
+    if a == b
+        return c
+    end
 
-  # Move a to dummy location
-  raw_copy!(c, a, c.dummy)
-  move_connectivity!(c, a, c.dummy)
+    # Move a to dummy location
+    raw_copy!(c, a, c.dummy)
+    move_connectivity!(c, a, c.dummy)
 
-  # Move b to a
-  raw_copy!(c, b, a)
-  move_connectivity!(c, b, a)
+    # Move b to a
+    raw_copy!(c, b, a)
+    move_connectivity!(c, b, a)
 
-  # Move from dummy location to b
-  raw_copy!(c, c.dummy, b)
-  move_connectivity!(c, c.dummy, b)
+    # Move from dummy location to b
+    raw_copy!(c, c.dummy, b)
+    move_connectivity!(c, c.dummy, b)
 
-  # Invalidate dummy to be sure
-  invalidate!(c, c.dummy)
+    # Invalidate dummy to be sure
+    invalidate!(c, c.dummy)
 
-  return c
+    return c
 end
 
-
 # Insert blank elements in container, shifting the following elements back.
 #
 # After a call to insert!, the range `position:position + count - 1` will be available for use.
 # TODO: Shall we extend Base.insert! ?
 function insert!(c::AbstractContainer, position::Int, count::Int)
-  @assert 1 <= position <= length(c) + 1 "Insert position out of range"
-  @assert count >= 0 "Count must be non-negative"
-  @assert count + length(c) <= capacity(c) "New length would exceed capacity"
+    @assert 1<=position<=length(c)+1 "Insert position out of range"
+    @assert count>=0 "Count must be non-negative"
+    @assert count + length(c)<=capacity(c) "New length would exceed capacity"
 
-  # Return if insertation would be a no-op
-  if count == 0
-    return c
-  end
+    # Return if insertation would be a no-op
+    if count == 0
+        return c
+    end
 
-  # Append and return if insertion is beyond last current element
-  if position == length(c) + 1
-    resize!(c, length(c) + count)
-    return c
-  end
+    # Append and return if insertion is beyond last current element
+    if position == length(c) + 1
+        resize!(c, length(c) + count)
+        return c
+    end
 
-  # Increase length
-  c.length += count
+    # Increase length
+    c.length += count
 
-  # Move original cells that currently occupy the insertion region, unless
-  # insert position is one beyond previous length
-  if position <= length(c) - count
-    move!(c, position, length(c) - count, position + count)
-  end
+    # Move original cells that currently occupy the insertion region, unless
+    # insert position is one beyond previous length
+    if position <= length(c) - count
+        move!(c, position, length(c) - count, position + count)
+    end
 
-  return c
+    return c
 end
 
-
 # Erase elements from container, deleting their connectivity and then invalidating their data.
 # TODO: Shall we extend Base.deleteat! or Base.delete! ?
 function erase!(c::AbstractContainer, first::Int, last::Int)
-  @assert 1 <= first <= length(c) "First cell out of range"
-  @assert 1 <= last <= length(c) "Last cell out of range"
+    @assert 1<=first<=length(c) "First cell out of range"
+    @assert 1<=last<=length(c) "Last cell out of range"
 
-  # Return if eraseure would be a no-op
-  if last < first
-    return c
-  end
+    # Return if eraseure would be a no-op
+    if last < first
+        return c
+    end
 
-  # Delete connectivity and invalidate cells
-  delete_connectivity!(c, first, last)
-  invalidate!(c, first, last)
+    # Delete connectivity and invalidate cells
+    delete_connectivity!(c, first, last)
+    invalidate!(c, first, last)
 
-  return c
+    return c
 end
 erase!(c::AbstractContainer, id::Int) = erase!(c, id, id)
 
-
 # Remove cells and shift existing cells forward to close the gap
 function remove_shift!(c::AbstractContainer, first::Int, last::Int)
-  @assert 1 <= first <= length(c) "First cell out of range"
-  @assert 1 <= last <= length(c) "Last cell out of range"
+    @assert 1<=first<=length(c) "First cell out of range"
+    @assert 1<=last<=length(c) "Last cell out of range"
 
-  # Return if removal would be a no-op
-  if last < first
-    return c
-  end
+    # Return if removal would be a no-op
+    if last < first
+        return c
+    end
 
-  # Delete connectivity of cells to be removed
-  delete_connectivity!(c, first, last)
+    # Delete connectivity of cells to be removed
+    delete_connectivity!(c, first, last)
 
-  if last == length(c)
-    # If everything up to the last cell is removed, no shifting is required
-    invalidate!(c, first, last)
-  else
-    # Otherwise, the corresponding cells are moved forward
-    move!(c, last + 1, length(c), first)
-  end
+    if last == length(c)
+        # If everything up to the last cell is removed, no shifting is required
+        invalidate!(c, first, last)
+    else
+        # Otherwise, the corresponding cells are moved forward
+        move!(c, last + 1, length(c), first)
+    end
 
-  # Reduce length
-  count = last - first + 1
-  c.length -= count
+    # Reduce length
+    count = last - first + 1
+    c.length -= count
 
-  return c
+    return c
 end
 remove_shift!(c::AbstractContainer, id::Int) = remove_shift!(c, id, id)
 
-
 # Remove cells and fill gap with cells from the end of the container (to reduce copy operations)
 function remove_fill!(c::AbstractContainer, first::Int, last::Int)
-  @assert 1 <= first <= length(c) "First cell out of range"
-  @assert 1 <= last <= length(c) "Last cell out of range"
+    @assert 1<=first<=length(c) "First cell out of range"
+    @assert 1<=last<=length(c) "Last cell out of range"
 
-  # Return if removal would be a no-op
-  if last < first
-    return c
-  end
+    # Return if removal would be a no-op
+    if last < first
+        return c
+    end
 
-  # Delete connectivity of cells to be removed and then invalidate them
-  delete_connectivity!(c, first, last)
-  invalidate!(c, first, last)
+    # Delete connectivity of cells to be removed and then invalidate them
+    delete_connectivity!(c, first, last)
+    invalidate!(c, first, last)
 
-  # Copy cells from end (unless last is already the last cell)
-  count = last - first + 1
-  if last < length(c)
-    move!(c, max(length(c) - count + 1, last + 1), length(c), first)
-  end
+    # Copy cells from end (unless last is already the last cell)
+    count = last - first + 1
+    if last < length(c)
+        move!(c, max(length(c) - count + 1, last + 1), length(c), first)
+    end
 
-  # Reduce length
-  c.length -= count
+    # Reduce length
+    c.length -= count
 
-  return c
+    return c
 end
 
-
 # Reset container to zero-length and with a new capacity
 function reset!(c::AbstractContainer, capacity::Int)
-  @assert capacity >=0
+    @assert capacity >= 0
 
-  c.capacity = capacity
-  c.length = 0
-  c.dummy = capacity + 1
-  reset_data_structures!(c)
+    c.capacity = capacity
+    c.length = 0
+    c.dummy = capacity + 1
+    reset_data_structures!(c)
 
-  return c
+    return c
 end
 
-
 # Invalidate all elements and set length to zero.
 function clear!(c::AbstractContainer)
-  invalidate!(c)
-  c.length = 0
+    invalidate!(c)
+    c.length = 0
 
-  return c
+    return c
 end
 
-
 # Helpful overloads for `raw_copy`
 function raw_copy!(c::AbstractContainer, first::Int, last::Int, destination::Int)
-  raw_copy!(c, c, first, last, destination)
+    raw_copy!(c, c, first, last, destination)
 end
-function raw_copy!(target::AbstractContainer, source::AbstractContainer, from::Int, destination::Int)
-  raw_copy!(target, source, from, from, destination)
+function raw_copy!(target::AbstractContainer, source::AbstractContainer, from::Int,
+                   destination::Int)
+    raw_copy!(target, source, from, from, destination)
 end
 function raw_copy!(c::AbstractContainer, from::Int, destination::Int)
-  raw_copy!(c, c, from, from, destination)
+    raw_copy!(c, c, from, from, destination)
 end
-
-
 end # @muladd
diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl
index 78340c86cc3..27c1bed5ca4 100644
--- a/src/auxiliary/math.jl
+++ b/src/auxiliary/math.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     ln_mean(x, y)
@@ -54,13 +54,13 @@ Given ε = 1.0e-4, we use the following algorithm.
   https://www.agner.org/optimize/instruction_tables.pdf
 """
 @inline function ln_mean(x, y)
-  epsilon_f2 = 1.0e-4
-  f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
-  if f2 < epsilon_f2
-    return (x + y) / @evalpoly(f2, 2, 2/3, 2/5, 2/7)
-  else
-    return (y - x) / log(y / x)
-  end
+    epsilon_f2 = 1.0e-4
+    f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
+    if f2 < epsilon_f2
+        return (x + y) / @evalpoly(f2, 2, 2/3, 2/5, 2/7)
+    else
+        return (y - x) / log(y / x)
+    end
 end
 
 """
@@ -74,17 +74,15 @@ logarithmic mean is needed, by replacing a (slow) division by a (fast)
 multiplication.
 """
 @inline function inv_ln_mean(x, y)
-  epsilon_f2 = 1.0e-4
-  f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
-  if f2 < epsilon_f2
-    return @evalpoly(f2, 2, 2/3, 2/5, 2/7) / (x + y)
-  else
-    return log(y / x) / (y - x)
-  end
+    epsilon_f2 = 1.0e-4
+    f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
+    if f2 < epsilon_f2
+        return @evalpoly(f2, 2, 2/3, 2/5, 2/7) / (x + y)
+    else
+        return log(y / x) / (y - x)
+    end
 end
 
-
-
 # `Base.max` and `Base.min` perform additional checks for signed zeros and `NaN`s
 # which are not present in comparable functions in Fortran/C++. For example,
 # ```julia
@@ -190,8 +188,6 @@ julia> min(2, 5, 1)
 """
 @inline min(args...) = @fastmath min(args...)
 
-
-
 """
     positive_part(x)
 
@@ -199,7 +195,7 @@ Return `x` if `x` is positive, else zero. In other words, return
 `(x + abs(x)) / 2` for real numbers `x`.
 """
 @inline function positive_part(x)
-  return max(x, zero(x))
+    return max(x, zero(x))
 end
 
 """
@@ -209,8 +205,6 @@ Return `x` if `x` is negative, else zero. In other words, return
 `(x - abs(x)) / 2` for real numbers `x`.
 """
 @inline function negative_part(x)
-  return min(x, zero(x))
+    return min(x, zero(x))
 end
-
-
 end # @muladd
diff --git a/src/auxiliary/mpi.jl b/src/auxiliary/mpi.jl
index ab1b13d49da..2c485b4832c 100644
--- a/src/auxiliary/mpi.jl
+++ b/src/auxiliary/mpi.jl
@@ -6,29 +6,28 @@ Initialize MPI by calling `MPI.Initialized()`. The function will check if MPI is
 and if yes, do nothing, thus it is safe to call it multiple times.
 """
 function init_mpi()
-  if MPI_INITIALIZED[]
+    if MPI_INITIALIZED[]
+        return nothing
+    end
+
+    # MPI.jl handles multiple calls to MPI.Init appropriately. Thus, we don't need
+    # any common checks of the form `if MPI.Initialized() ...`.
+    # threadlevel=MPI.THREAD_FUNNELED: Only main thread makes MPI calls
+    # finalize_atexit=true           : MPI.jl will call call MPI.Finalize as `atexit` hook
+    provided = MPI.Init(threadlevel = MPI.THREAD_FUNNELED, finalize_atexit = true)
+    @assert provided>=MPI.THREAD_FUNNELED "MPI library with insufficient threading support"
+
+    # Initialize global MPI state
+    MPI_RANK[] = MPI.Comm_rank(MPI.COMM_WORLD)
+    MPI_SIZE[] = MPI.Comm_size(MPI.COMM_WORLD)
+    MPI_IS_PARALLEL[] = MPI_SIZE[] > 1
+    MPI_IS_SERIAL[] = !MPI_IS_PARALLEL[]
+    MPI_IS_ROOT[] = MPI_IS_SERIAL[] || MPI_RANK[] == 0
+    MPI_INITIALIZED[] = true
+
     return nothing
-  end
-
-  # MPI.jl handles multiple calls to MPI.Init appropriately. Thus, we don't need
-  # any common checks of the form `if MPI.Initialized() ...`.
-  # threadlevel=MPI.THREAD_FUNNELED: Only main thread makes MPI calls
-  # finalize_atexit=true           : MPI.jl will call call MPI.Finalize as `atexit` hook
-  provided = MPI.Init(threadlevel=MPI.THREAD_FUNNELED, finalize_atexit=true)
-  @assert provided >= MPI.THREAD_FUNNELED "MPI library with insufficient threading support"
-
-  # Initialize global MPI state
-  MPI_RANK[] = MPI.Comm_rank(MPI.COMM_WORLD)
-  MPI_SIZE[] = MPI.Comm_size(MPI.COMM_WORLD)
-  MPI_IS_PARALLEL[] = MPI_SIZE[] > 1
-  MPI_IS_SERIAL[] = !MPI_IS_PARALLEL[]
-  MPI_IS_ROOT[] = MPI_IS_SERIAL[] || MPI_RANK[] == 0
-  MPI_INITIALIZED[] = true
-
-  return nothing
 end
 
-
 const MPI_INITIALIZED = Ref(false)
 const MPI_RANK = Ref(-1)
 const MPI_SIZE = Ref(-1)
@@ -36,7 +35,6 @@ const MPI_IS_PARALLEL = Ref(false)
 const MPI_IS_SERIAL = Ref(true)
 const MPI_IS_ROOT = Ref(true)
 
-
 @inline mpi_comm() = MPI.COMM_WORLD
 
 @inline mpi_rank() = MPI_RANK[]
@@ -50,19 +48,18 @@ const MPI_IS_ROOT = Ref(true)
 @inline mpi_root() = 0
 
 @inline function mpi_println(args...)
-  if mpi_isroot()
-    println(args...)
-  end
-  return nothing
+    if mpi_isroot()
+        println(args...)
+    end
+    return nothing
 end
 @inline function mpi_print(args...)
-  if mpi_isroot()
-    print(args...)
-  end
-  return nothing
+    if mpi_isroot()
+        print(args...)
+    end
+    return nothing
 end
 
-
 """
     ode_norm(u, t)
 
@@ -79,14 +76,15 @@ See the "Advanced Adaptive Stepsize Control" section of the [documentation](http
 """
 ode_norm(u::Number, t) = @fastmath abs(u)
 function ode_norm(u::AbstractArray, t)
-  local_sumabs2 = recursive_sum_abs2(u) # sum(abs2, u)
-  local_length  = recursive_length(u)   # length(u)
-  if mpi_isparallel()
-    global_sumabs2, global_length = MPI.Allreduce([local_sumabs2, local_length], +, mpi_comm())
-    return sqrt(global_sumabs2 / global_length)
-  else
-    return sqrt(local_sumabs2 / local_length)
-  end
+    local_sumabs2 = recursive_sum_abs2(u) # sum(abs2, u)
+    local_length = recursive_length(u)    # length(u)
+    if mpi_isparallel()
+        global_sumabs2, global_length = MPI.Allreduce([local_sumabs2, local_length], +,
+                                                      mpi_comm())
+        return sqrt(global_sumabs2 / global_length)
+    else
+        return sqrt(local_sumabs2 / local_length)
+    end
 end
 
 # Recursive `sum(abs2, ...)` and `length(...)` are required when dealing with
@@ -102,16 +100,18 @@ recursive_sum_abs2(u::Number) = abs2(u)
 # https://github.com/SciML/RecursiveArrayTools.jl
 # However, what you have is good enough for us for now, so we don't need this 
 # additional dependency at the moment.
-recursive_sum_abs2(u::AbstractArray) = mapreduce(recursive_sum_abs2, +, u; init=zero(eltype(eltype(u))))
+function recursive_sum_abs2(u::AbstractArray)
+    mapreduce(recursive_sum_abs2, +, u; init = zero(eltype(eltype(u))))
+end
 
 recursive_length(u::Number) = length(u)
 recursive_length(u::AbstractArray{<:Number}) = length(u)
 recursive_length(u::AbstractArray{<:AbstractArray}) = sum(recursive_length, u)
-function recursive_length(u::AbstractArray{<:StaticArrays.StaticArray{S, <:Number}}) where {S}
-  prod(StaticArrays.Size(eltype(u))) * length(u)
+function recursive_length(u::AbstractArray{<:StaticArrays.StaticArray{S,
+                                                                      <:Number}}) where {S}
+    prod(StaticArrays.Size(eltype(u))) * length(u)
 end
 
-
 """
     ode_unstable_check(dt, u, semi, t)
 
diff --git a/src/auxiliary/p4est.jl b/src/auxiliary/p4est.jl
index b7851ba6f24..93b5166cd81 100644
--- a/src/auxiliary/p4est.jl
+++ b/src/auxiliary/p4est.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     init_p4est()
@@ -13,117 +13,117 @@ This function will check if `p4est` is already initialized
 and if yes, do nothing, thus it is safe to call it multiple times.
 """
 function init_p4est()
-  p4est_package_id = P4est.package_id()
-  if p4est_package_id >= 0
-    return nothing
-  end
+    p4est_package_id = P4est.package_id()
+    if p4est_package_id >= 0
+        return nothing
+    end
 
-  # Initialize `p4est` with log level ERROR to prevent a lot of output in AMR simulations
-  p4est_init(C_NULL, SC_LP_ERROR)
+    # Initialize `p4est` with log level ERROR to prevent a lot of output in AMR simulations
+    p4est_init(C_NULL, SC_LP_ERROR)
 
-  return nothing
+    return nothing
 end
 
-
 # Convert sc_array of type T to Julia array
-function unsafe_wrap_sc(::Type{T}, sc_array::Ptr{sc_array}) where T
-  sc_array_obj = unsafe_load(sc_array)
-  return unsafe_wrap_sc(T, sc_array_obj)
+function unsafe_wrap_sc(::Type{T}, sc_array::Ptr{sc_array}) where {T}
+    sc_array_obj = unsafe_load(sc_array)
+    return unsafe_wrap_sc(T, sc_array_obj)
 end
 
-function unsafe_wrap_sc(::Type{T}, sc_array_obj::sc_array) where T
-  elem_count = sc_array_obj.elem_count
-  array = sc_array_obj.array
+function unsafe_wrap_sc(::Type{T}, sc_array_obj::sc_array) where {T}
+    elem_count = sc_array_obj.elem_count
+    array = sc_array_obj.array
 
-  return unsafe_wrap(Array, Ptr{T}(array), elem_count)
+    return unsafe_wrap(Array, Ptr{T}(array), elem_count)
 end
 
-
 # Load the ith element (1-indexed) of an sc array of type T
-function unsafe_load_sc(::Type{T}, sc_array::Ptr{sc_array}, i=1) where T
-  sc_array_obj = unsafe_load(sc_array)
-  return unsafe_load_sc(T, sc_array_obj, i)
+function unsafe_load_sc(::Type{T}, sc_array::Ptr{sc_array}, i = 1) where {T}
+    sc_array_obj = unsafe_load(sc_array)
+    return unsafe_load_sc(T, sc_array_obj, i)
 end
 
-function unsafe_load_sc(::Type{T}, sc_array_obj::sc_array, i=1) where T
-  element_size = sc_array_obj.elem_size
-  @assert element_size == sizeof(T)
+function unsafe_load_sc(::Type{T}, sc_array_obj::sc_array, i = 1) where {T}
+    element_size = sc_array_obj.elem_size
+    @assert element_size == sizeof(T)
 
-  return unsafe_load(Ptr{T}(sc_array_obj.array), i)
+    return unsafe_load(Ptr{T}(sc_array_obj.array), i)
 end
 
-
 # Create new `p4est` from a p4est_connectivity
 # 2D
 function new_p4est(connectivity::Ptr{p4est_connectivity_t}, initial_refinement_level)
-  comm = P4est.uses_mpi() ? mpi_comm() : 0 # Use Trixi.jl's MPI communicator if p4est supports MPI
-  p4est_new_ext(comm,
-                connectivity,
-                0, # No minimum initial qudrants per processor
-                initial_refinement_level,
-                true, # Refine uniformly
-                2 * sizeof(Int), # Use Int-Vector of size 2 as quadrant user data
-                C_NULL, # No init function
-                C_NULL) # No user pointer
+    comm = P4est.uses_mpi() ? mpi_comm() : 0 # Use Trixi.jl's MPI communicator if p4est supports MPI
+    p4est_new_ext(comm,
+                  connectivity,
+                  0, # No minimum initial qudrants per processor
+                  initial_refinement_level,
+                  true, # Refine uniformly
+                  2 * sizeof(Int), # Use Int-Vector of size 2 as quadrant user data
+                  C_NULL, # No init function
+                  C_NULL) # No user pointer
 end
 
 # 3D
 function new_p4est(connectivity::Ptr{p8est_connectivity_t}, initial_refinement_level)
-  comm = P4est.uses_mpi() ? mpi_comm() : 0 # Use Trixi.jl's MPI communicator if p4est supports MPI
-  p8est_new_ext(comm, connectivity, 0, initial_refinement_level, true, 2 * sizeof(Int), C_NULL, C_NULL)
+    comm = P4est.uses_mpi() ? mpi_comm() : 0 # Use Trixi.jl's MPI communicator if p4est supports MPI
+    p8est_new_ext(comm, connectivity, 0, initial_refinement_level, true,
+                  2 * sizeof(Int), C_NULL, C_NULL)
 end
 
-
 # Save `p4est` data to file
 # 2D
 function save_p4est!(file, p4est::Ptr{p4est_t})
-  # Don't save user data of the quads
-  p4est_save(file, p4est, false)
+    # Don't save user data of the quads
+    p4est_save(file, p4est, false)
 end
 
 # 3D
 function save_p4est!(file, p8est::Ptr{p8est_t})
-  # Don't save user data of the quads
-  p8est_save(file, p8est, false)
+    # Don't save user data of the quads
+    p8est_save(file, p8est, false)
 end
 
-
 # Load `p4est` from file
 # 2D
 function load_p4est(file, ::Val{2})
-  conn_vec = Vector{Ptr{p4est_connectivity_t}}(undef, 1)
-  comm = P4est.uses_mpi() ? mpi_comm() : C_NULL # Use Trixi.jl's MPI communicator if p4est supports MPI
-  p4est_load_ext(file, comm, 0, 0, 1, 0, C_NULL, pointer(conn_vec))
+    conn_vec = Vector{Ptr{p4est_connectivity_t}}(undef, 1)
+    comm = P4est.uses_mpi() ? mpi_comm() : C_NULL # Use Trixi.jl's MPI communicator if p4est supports MPI
+    p4est_load_ext(file, comm, 0, 0, 1, 0, C_NULL, pointer(conn_vec))
 end
 
 # 3D
 function load_p4est(file, ::Val{3})
-  conn_vec = Vector{Ptr{p8est_connectivity_t}}(undef, 1)
-  comm = P4est.uses_mpi() ? mpi_comm() : C_NULL # Use Trixi.jl's MPI communicator if p4est supports MPI
-  p8est_load_ext(file, comm, 0, 0, 1, 0, C_NULL, pointer(conn_vec))
+    conn_vec = Vector{Ptr{p8est_connectivity_t}}(undef, 1)
+    comm = P4est.uses_mpi() ? mpi_comm() : C_NULL # Use Trixi.jl's MPI communicator if p4est supports MPI
+    p8est_load_ext(file, comm, 0, 0, 1, 0, C_NULL, pointer(conn_vec))
 end
 
-
 # Read `p4est` connectivity from Abaqus mesh file (.inp)
 # 2D
 read_inp_p4est(meshfile, ::Val{2}) = p4est_connectivity_read_inp(meshfile)
 # 3D
 read_inp_p4est(meshfile, ::Val{3}) = p8est_connectivity_read_inp(meshfile)
 
-
 # Refine `p4est` if refine_fn_c returns 1
 # 2D
-refine_p4est!(p4est::Ptr{p4est_t}, recursive, refine_fn_c, init_fn_c) = p4est_refine(p4est, recursive, refine_fn_c, init_fn_c)
+function refine_p4est!(p4est::Ptr{p4est_t}, recursive, refine_fn_c, init_fn_c)
+    p4est_refine(p4est, recursive, refine_fn_c, init_fn_c)
+end
 # 3D
-refine_p4est!(p8est::Ptr{p8est_t}, recursive, refine_fn_c, init_fn_c) = p8est_refine(p8est, recursive, refine_fn_c, init_fn_c)
-
+function refine_p4est!(p8est::Ptr{p8est_t}, recursive, refine_fn_c, init_fn_c)
+    p8est_refine(p8est, recursive, refine_fn_c, init_fn_c)
+end
 
 # Refine `p4est` if coarsen_fn_c returns 1
 # 2D
-coarsen_p4est!(p4est::Ptr{p4est_t}, recursive, coarsen_fn_c, init_fn_c) = p4est_coarsen(p4est, recursive, coarsen_fn_c, init_fn_c)
+function coarsen_p4est!(p4est::Ptr{p4est_t}, recursive, coarsen_fn_c, init_fn_c)
+    p4est_coarsen(p4est, recursive, coarsen_fn_c, init_fn_c)
+end
 # 3D
-coarsen_p4est!(p8est::Ptr{p8est_t}, recursive, coarsen_fn_c, init_fn_c) = p8est_coarsen(p8est, recursive, coarsen_fn_c, init_fn_c)
-
+function coarsen_p4est!(p8est::Ptr{p8est_t}, recursive, coarsen_fn_c, init_fn_c)
+    p8est_coarsen(p8est, recursive, coarsen_fn_c, init_fn_c)
+end
 
 # Create new ghost layer from p4est, only connections via faces are relevant
 # 2D
@@ -152,11 +152,11 @@ ghost_new_p4est(p8est::Ptr{p8est_t}) = p8est_ghost_new(p8est, P4est.P8EST_CONNEC
 # Check if ghost layer is valid
 # 2D
 function ghost_is_valid_p4est(p4est::Ptr{p4est_t}, ghost_layer::Ptr{p4est_ghost_t})
-  return p4est_ghost_is_valid(p4est, ghost_layer)
+    return p4est_ghost_is_valid(p4est, ghost_layer)
 end
 # 3D
 function ghost_is_valid_p4est(p4est::Ptr{p8est_t}, ghost_layer::Ptr{p8est_ghost_t})
-  return p8est_ghost_is_valid(p4est, ghost_layer)
+    return p8est_ghost_is_valid(p4est, ghost_layer)
 end
 
 # Destroy ghost layer
@@ -165,79 +165,74 @@ ghost_destroy_p4est(ghost_layer::Ptr{p4est_ghost_t}) = p4est_ghost_destroy(ghost
 # 3D
 ghost_destroy_p4est(ghost_layer::Ptr{p8est_ghost_t}) = p8est_ghost_destroy(ghost_layer)
 
-
 # Let `p4est` iterate over each cell volume and cell face.
 # Call iter_volume_c for each cell and iter_face_c for each face.
 # 2D
-function iterate_p4est(p4est::Ptr{p4est_t}, user_data; ghost_layer=C_NULL,
-                       iter_volume_c=C_NULL, iter_face_c=C_NULL)
-  if user_data === C_NULL
-    user_data_ptr = user_data
-  elseif user_data isa AbstractArray
-    user_data_ptr = pointer(user_data)
-  else
-    user_data_ptr = pointer_from_objref(user_data)
-  end
-
-  GC.@preserve user_data begin
-    p4est_iterate(p4est,
-                  ghost_layer,
-                  user_data_ptr,
-                  iter_volume_c, # iter_volume
-                  iter_face_c, # iter_face
-                  C_NULL) # iter_corner
-  end
-
-  return nothing
+function iterate_p4est(p4est::Ptr{p4est_t}, user_data; ghost_layer = C_NULL,
+                       iter_volume_c = C_NULL, iter_face_c = C_NULL)
+    if user_data === C_NULL
+        user_data_ptr = user_data
+    elseif user_data isa AbstractArray
+        user_data_ptr = pointer(user_data)
+    else
+        user_data_ptr = pointer_from_objref(user_data)
+    end
+
+    GC.@preserve user_data begin
+        p4est_iterate(p4est,
+                      ghost_layer,
+                      user_data_ptr,
+                      iter_volume_c, # iter_volume
+                      iter_face_c, # iter_face
+                      C_NULL) # iter_corner
+    end
+
+    return nothing
 end
 
 # 3D
-function iterate_p4est(p8est::Ptr{p8est_t}, user_data; ghost_layer=C_NULL,
-                       iter_volume_c=C_NULL, iter_face_c=C_NULL)
-  if user_data === C_NULL
-    user_data_ptr = user_data
-  elseif user_data isa AbstractArray
-    user_data_ptr = pointer(user_data)
-  else
-    user_data_ptr = pointer_from_objref(user_data)
-  end
-
-  GC.@preserve user_data begin
-    p8est_iterate(p8est,
-                  ghost_layer,
-                  user_data_ptr,
-                  iter_volume_c, # iter_volume
-                  iter_face_c, # iter_face
-                  C_NULL, # iter_edge
-                  C_NULL) # iter_corner
-  end
-
-  return nothing
-end
+function iterate_p4est(p8est::Ptr{p8est_t}, user_data; ghost_layer = C_NULL,
+                       iter_volume_c = C_NULL, iter_face_c = C_NULL)
+    if user_data === C_NULL
+        user_data_ptr = user_data
+    elseif user_data isa AbstractArray
+        user_data_ptr = pointer(user_data)
+    else
+        user_data_ptr = pointer_from_objref(user_data)
+    end
+
+    GC.@preserve user_data begin
+        p8est_iterate(p8est,
+                      ghost_layer,
+                      user_data_ptr,
+                      iter_volume_c, # iter_volume
+                      iter_face_c, # iter_face
+                      C_NULL, # iter_edge
+                      C_NULL) # iter_corner
+    end
 
+    return nothing
+end
 
 # Load i-th element of the sc_array info.sides of the type p[48]est_iter_face_side_t
 # 2D version
-function unsafe_load_side(info::Ptr{p4est_iter_face_info_t}, i=1)
-  return unsafe_load_sc(p4est_iter_face_side_t, unsafe_load(info).sides, i)
+function unsafe_load_side(info::Ptr{p4est_iter_face_info_t}, i = 1)
+    return unsafe_load_sc(p4est_iter_face_side_t, unsafe_load(info).sides, i)
 end
 
 # 3D version
-function unsafe_load_side(info::Ptr{p8est_iter_face_info_t}, i=1)
-  return unsafe_load_sc(p8est_iter_face_side_t, unsafe_load(info).sides, i)
+function unsafe_load_side(info::Ptr{p8est_iter_face_info_t}, i = 1)
+    return unsafe_load_sc(p8est_iter_face_side_t, unsafe_load(info).sides, i)
 end
 
-
 # Load i-th element of the sc_array p4est.trees of the type p[48]est_tree_t
 # 2D version
-function unsafe_load_tree(p4est::Ptr{p4est_t}, i=1)
-  return unsafe_load_sc(p4est_tree_t, unsafe_load(p4est).trees, i)
+function unsafe_load_tree(p4est::Ptr{p4est_t}, i = 1)
+    return unsafe_load_sc(p4est_tree_t, unsafe_load(p4est).trees, i)
 end
 
 # 3D version
-function unsafe_load_tree(p8est::Ptr{p8est_t}, i=1)
-  return unsafe_load_sc(p8est_tree_t, unsafe_load(p8est).trees, i)
+function unsafe_load_tree(p8est::Ptr{p8est_t}, i = 1)
+    return unsafe_load_sc(p8est_tree_t, unsafe_load(p8est).trees, i)
 end
-
-
 end # @muladd
diff --git a/src/auxiliary/precompile.jl b/src/auxiliary/precompile.jl
index 3b1cb58e147..7ed0e26b5ef 100644
--- a/src/auxiliary/precompile.jl
+++ b/src/auxiliary/precompile.jl
@@ -34,12 +34,10 @@ inf_timing = @snoopi tmin=0.01 begin
   show(stdout, mesh)
   show(stdout, MIME"text/plain"(), mesh)
 
-
   semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
   show(stdout, semi)
   show(stdout, MIME"text/plain"(), semi)
 
-
   ###############################################################################
   # ODE solvers, callbacks etc.
 
@@ -92,7 +90,6 @@ inf_timing = @snoopi tmin=0.01 begin
                           analysis_callback, alive_callback,
                           amr_callback, stepsize_callback);
 
-
   ###############################################################################
   # run the simulation
 
@@ -125,7 +122,6 @@ The latency can be measured by running
 julia --threads=1 -e '@time using Trixi; @time include(joinpath(examples_dir(), "2d", "elixir_advection_basic.jl"))'
 ```
 
-
 We add `@assert` to the precompile statements below to make sure that we don't include
 failing precompile statements, cf. https://timholy.github.io/SnoopCompile.jl/stable/snoopi/.
 If any assertions below fail, it is generally safe to just disable the failing call
@@ -135,360 +131,497 @@ statements in accordance with the changes in Trixi.jl's source code. Please, fee
 the core developers of Trixi.jl to get help with that.
 =#
 
-
 import StaticArrays
 import SciMLBase
 
-
 # manually generated precompile statements
 function _precompile_manual_()
-  ccall(:jl_generating_output, Cint, ()) == 1 || return nothing
-
-  function equations_types_1d(RealT)
-    ( LinearScalarAdvectionEquation1D{RealT},
-      HyperbolicDiffusionEquations1D{RealT},
-      CompressibleEulerEquations1D{RealT},
-      IdealGlmMhdEquations1D{RealT},
-    )
-  end
-  function equations_types_2d(RealT)
-    ( LinearScalarAdvectionEquation2D{RealT},
-      HyperbolicDiffusionEquations2D{RealT},
-      CompressibleEulerEquations2D{RealT},
-      IdealGlmMhdEquations2D{RealT},
-      LatticeBoltzmannEquations2D{RealT, typeof(Trixi.collision_bgk)},
-    )
-  end
-  function equations_types_3d(RealT)
-    ( LinearScalarAdvectionEquation3D{RealT},
-      HyperbolicDiffusionEquations3D{RealT},
-      CompressibleEulerEquations3D{RealT},
-      IdealGlmMhdEquations3D{RealT},
-      LatticeBoltzmannEquations3D{RealT, typeof(Trixi.collision_bgk)},
-    )
-  end
-  function equations_types(RealT)
-    ( LinearScalarAdvectionEquation1D{RealT},
-      LinearScalarAdvectionEquation2D{RealT},
-      LinearScalarAdvectionEquation3D{RealT},
-      HyperbolicDiffusionEquations1D{RealT},
-      HyperbolicDiffusionEquations2D{RealT},
-      HyperbolicDiffusionEquations3D{RealT},
-      CompressibleEulerEquations1D{RealT},
-      CompressibleEulerEquations2D{RealT},
-      CompressibleEulerEquations3D{RealT},
-      IdealGlmMhdEquations1D{RealT},
-      IdealGlmMhdEquations2D{RealT},
-      IdealGlmMhdEquations3D{RealT},
-      LatticeBoltzmannEquations2D{RealT, typeof(Trixi.collision_bgk)},
-      LatticeBoltzmannEquations3D{RealT, typeof(Trixi.collision_bgk)},
-    )
-  end
-
-  function basis_type_dgsem(RealT, nnodes_)
-    LobattoLegendreBasis{RealT,nnodes_,
-                         # VectorT
-                         StaticArrays.SVector{nnodes_,RealT},
-                         # InverseVandermondeLegendre
-                         Matrix{RealT},
-                         # BoundaryMatrix
-                         #StaticArrays.SArray{Tuple{nnodes_,2},RealT,2,2*nnodes_},
-                         Matrix{RealT},
-                         # DerivativeMatrix
-                         #StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2},
-                         Matrix{RealT},
-    }
-  end
-
-  function mortar_type_dgsem(RealT, nnodes_)
-    LobattoLegendreMortarL2{RealT,nnodes_,
-                            # ForwardMatrix
-                            #StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2},
-                            Matrix{RealT},
-                            # ReverseMatrix
-                            # StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2},
-                            Matrix{RealT},
-    }
-  end
-
-  function analyzer_type_dgsem(RealT, nnodes_)
-    polydeg = nnodes_ - 1
-    nnodes_analysis = 2 * polydeg + 1
-    LobattoLegendreAnalyzer{RealT,nnodes_analysis,
-                            # VectorT
-                            StaticArrays.SVector{nnodes_analysis,RealT},
-                            # Vandermonde
-                            Array{RealT,2}
-    }
-  end
-
-  function adaptor_type_dgsem(RealT, nnodes_)
-    LobattoLegendreAdaptorL2{RealT,nnodes_,
-                            # ForwardMatrix
-                            StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2},
-                            # Matrix{RealT},
-                            # ReverseMatrix
-                            StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2},
-                            # Matrix{RealT},
-    }
-  end
-
-  # Constructors: mesh
-  for RealT in (Int, Float64,)
-    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:initial_refinement_level, :n_cells_max),Tuple{Int,Int}},Type{TreeMesh},RealT,RealT})
-    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:initial_refinement_level, :n_cells_max),Tuple{Int,Int}},Type{TreeMesh},Tuple{RealT},Tuple{RealT}})
-    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:initial_refinement_level, :n_cells_max),Tuple{Int,Int}},Type{TreeMesh},Tuple{RealT,RealT},Tuple{RealT,RealT}})
-    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:initial_refinement_level, :n_cells_max),Tuple{Int,Int}},Type{TreeMesh},Tuple{RealT,RealT,RealT},Tuple{RealT,RealT,RealT}})
-  end
-  for TreeType in (SerialTree, ParallelTree), NDIMS in 1:3
-    @assert Base.precompile(Tuple{typeof(Trixi.initialize!),TreeMesh{NDIMS,TreeType{NDIMS}},Int,Tuple{},Tuple{}})
-    @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),TreeMesh{NDIMS,TreeType{NDIMS}},String,Int})
-  end
-
-  # Constructors: linear advection
-  for RealT in (Float64,)
-    @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation1D},RealT})
-    @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation2D},RealT,RealT})
-    @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation2D},Tuple{RealT,RealT}})
-    @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation3D},RealT,RealT,RealT})
-    @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation3D},Tuple{RealT,RealT,RealT}})
-  end
-
-  # Constructors: hyperbolic diffusion
-  for RealT in (Float64,)
-    @assert Base.precompile(Tuple{Type{HyperbolicDiffusionEquations1D},})
-    @assert Base.precompile(Tuple{Type{HyperbolicDiffusionEquations2D},})
-    @assert Base.precompile(Tuple{Type{HyperbolicDiffusionEquations3D},})
-  end
-
-  # Constructors: Euler
-  for RealT in (Float64,)
-    @assert Base.precompile(Tuple{Type{CompressibleEulerEquations1D},RealT})
-    @assert Base.precompile(Tuple{Type{CompressibleEulerEquations2D},RealT})
-    @assert Base.precompile(Tuple{Type{CompressibleEulerEquations3D},RealT})
-  end
-
-  # Constructors: MHD
-  for RealT in (Float64,)
-    @assert Base.precompile(Tuple{Type{IdealGlmMhdEquations1D},RealT})
-    @assert Base.precompile(Tuple{Type{IdealGlmMhdEquations2D},RealT})
-    @assert Base.precompile(Tuple{Type{IdealGlmMhdEquations3D},RealT})
-  end
-
-  # Constructors: LBM
-  for RealT in (Float64,)
-    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:Ma, :Re), Tuple{RealT, RealT}},Type{LatticeBoltzmannEquations2D}})
-    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:Ma, :Re), Tuple{RealT, Int}},Type{LatticeBoltzmannEquations2D}})
-    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:Ma, :Re), Tuple{RealT, RealT}},Type{LatticeBoltzmannEquations3D}})
-    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:Ma, :Re), Tuple{RealT, Int}},Type{LatticeBoltzmannEquations3D}})
-  end
-
-  # Constructors of the basis are inherently type-unstable since we pass integers
-  # and use their values as parameters of static arrays.
-  # Nevertheless, we can still precompile methods used to construct the bases.
-  Base.precompile(Tuple{Type{LobattoLegendreBasis},Int})
-  for RealT in (Float64,)
-    Base.precompile(Tuple{Type{LobattoLegendreBasis},RealT,Int})
-    @assert Base.precompile(Tuple{typeof(Trixi.calc_dhat),Vector{RealT},Vector{RealT}})
-    @assert Base.precompile(Tuple{typeof(Trixi.calc_dsplit),Vector{RealT},Vector{RealT}})
-    @assert Base.precompile(Tuple{typeof(Trixi.polynomial_derivative_matrix),Vector{RealT}})
-    @assert Base.precompile(Tuple{typeof(Trixi.polynomial_interpolation_matrix),Vector{RealT},Vector{RealT}})
-    @assert Base.precompile(Tuple{typeof(Trixi.barycentric_weights),Vector{RealT}})
-    @assert Base.precompile(Tuple{typeof(Trixi.calc_lhat),RealT,Vector{RealT},Vector{RealT}})
-    @assert Base.precompile(Tuple{typeof(Trixi.lagrange_interpolating_polynomials),RealT,Vector{RealT},Vector{RealT}})
-    @assert Base.precompile(Tuple{typeof(Trixi.calc_q_and_l),Int,RealT})
-    @assert Base.precompile(Tuple{typeof(Trixi.legendre_polynomial_and_derivative),Int,RealT})
-    @assert Base.precompile(Tuple{typeof(Trixi.vandermonde_legendre),Vector{RealT}})
-  end
-  @assert Base.precompile(Tuple{typeof(Trixi.gauss_lobatto_nodes_weights),Int})
-  @assert Base.precompile(Tuple{typeof(Trixi.gauss_nodes_weights),Int})
-  @assert Base.precompile(Tuple{typeof(Trixi.calc_forward_upper),Int})
-  @assert Base.precompile(Tuple{typeof(Trixi.calc_forward_lower),Int})
-  @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_upper),Int,Val{:gauss}})
-  @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_lower),Int,Val{:gauss}})
-  @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_upper),Int,Val{:gauss_lobatto}})
-  @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_lower),Int,Val{:gauss_lobatto}})
-
-  # Constructors: mortars, analyzers, adaptors
-  for RealT in (Float64,), polydeg in 1:7
-    nnodes_ = polydeg + 1
-    basis_type = basis_type_dgsem(RealT, nnodes_)
-    @assert Base.precompile(Tuple{typeof(Trixi.MortarL2),basis_type})
-    @assert Base.precompile(Tuple{Type{Trixi.SolutionAnalyzer},basis_type})
-    @assert Base.precompile(Tuple{Type{Trixi.AdaptorL2},basis_type})
-  end
-
-  # Constructors: callbacks
-  @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:analysis_interval,),Tuple{Int}},Type{AliveCallback}})
-  for RealT in (Float64,)
-    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:cfl,),Tuple{RealT}},Type{StepsizeCallback}})
-    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:glm_scale, :cfl),Tuple{RealT,RealT}},Type{GlmSpeedCallback}})
-  end
-  @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:interval, :save_final_restart),Tuple{Int,Bool}},Type{SaveRestartCallback}})
-  @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:interval, :save_initial_solution, :save_final_solution, :solution_variables),Tuple{Int,Bool,Bool,typeof(cons2cons)}},Type{SaveSolutionCallback}})
-  @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:interval, :save_initial_solution, :save_final_solution, :solution_variables),Tuple{Int,Bool,Bool,typeof(cons2prim)}},Type{SaveSolutionCallback}})
-  # TODO: AnalysisCallback?
-  # for RealT in (Float64,), polydeg in 1:7
-  #   nnodes_ = polydeg + 1
-  #   nnodes_analysis = 2*polydeg + 1
-    # @assert Base.precompile(Tuple{Type{AnalysisCallback},RealT,Int,Bool,String,String,Trixi.LobattoLegendreAnalyzer{RealT,nnodes_analysis,Array{RealT,2}},Array{Symbol,1},Tuple{typeof(Trixi.entropy_timederivative),typeof(entropy)},StaticArrays.SArray{Tuple{1},RealT,1,1}})
-    # We would need to use all special cases instead of
-    # Function,Trixi.AbstractVolumeIntegral
-    # for equations_type in equations_types(RealT)
-    #   @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:interval, :extra_analysis_integrals),Tuple{Int,Tuple{typeof(entropy)}}},Type{AnalysisCallback},equations_type,DG{RealT,LobattoLegendreBasis{RealT,nnodes_,StaticArrays.SVector{nnodes_,RealT},Array{RealT,2},StaticArrays.SArray{Tuple{4,2},RealT,2,2*nnodes_},StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}},Trixi.LobattoLegendreMortarL2{RealT,nnodes_,StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}},Function,Trixi.AbstractVolumeIntegral}})
-    # end
-  # end
-  @assert Base.precompile(Tuple{typeof(SummaryCallback)})
-  @assert Base.precompile(Tuple{DiscreteCallback{typeof(Trixi.summary_callback), typeof(Trixi.summary_callback), typeof(Trixi.initialize_summary_callback), typeof(SciMLBase.FINALIZE_DEFAULT)}})
-  @assert Base.precompile(Tuple{typeof(summary_box),Base.TTY,String,Vector{Pair{String, Any}}})
-  # TODO: AMRCallback, ControllerThreeLevel, indicators
-
-  # init_elements, interfaces, etc.
-  for RealT in (Float64,), polydeg in 1:7
-    uEltype = RealT
-    nnodes_ = polydeg + 1
-    mortar_type = mortar_type_dgsem(RealT, nnodes_)
-
-    # 1D, serial
-    @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries),Array{Int,1},TreeMesh{1,Trixi.SerialTree{1}},Trixi.ElementContainer1D{RealT,uEltype}})
-    @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces),Array{Int,1},TreeMesh{1,Trixi.SerialTree{1}},Trixi.ElementContainer1D{RealT,uEltype}})
-    @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),TreeMesh{1,Trixi.SerialTree{1}},String})
-
-    # 2D, serial
-    @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries),Array{Int,1},TreeMesh{2,Trixi.SerialTree{2}},Trixi.ElementContainer2D{RealT,uEltype}})
-    @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces),Array{Int,1},TreeMesh{2,Trixi.SerialTree{2}},Trixi.ElementContainer2D{RealT,uEltype}})
-    @assert Base.precompile(Tuple{typeof(Trixi.init_mortars),Array{Int,1},TreeMesh{2,Trixi.SerialTree{2}},Trixi.ElementContainer2D{RealT,uEltype},mortar_type})
-    @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),TreeMesh{2,Trixi.SerialTree{2}},String})
-
-    # 2D, parallel
-    @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries),Array{Int,1},TreeMesh{2,Trixi.ParallelTree{2}},Trixi.ElementContainer2D{RealT,uEltype}})
-    @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces),Array{Int,1},TreeMesh{2,Trixi.ParallelTree{2}},Trixi.ElementContainer2D{RealT,uEltype}})
-    @assert Base.precompile(Tuple{typeof(Trixi.init_mortars),Array{Int,1},TreeMesh{2,Trixi.ParallelTree{2}},Trixi.ElementContainer2D{RealT,uEltype},mortar_type})
-    @assert Base.precompile(Tuple{typeof(Trixi.init_mpi_interfaces),Array{Int,1},TreeMesh{2,Trixi.ParallelTree{2}},Trixi.ElementContainer2D{RealT,uEltype}})
-    @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),TreeMesh{2,Trixi.ParallelTree{2}},String})
-
-    # 3D, serial
-    @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries),Array{Int,1},TreeMesh{3,Trixi.SerialTree{3}},Trixi.ElementContainer3D{RealT,uEltype}})
-    @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces),Array{Int,1},TreeMesh{3,Trixi.SerialTree{3}},Trixi.ElementContainer3D{RealT,uEltype}})
-    @assert Base.precompile(Tuple{typeof(Trixi.init_mortars),Array{Int,1},TreeMesh{3,Trixi.SerialTree{3}},Trixi.ElementContainer3D{RealT,uEltype},mortar_type})
-    @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),TreeMesh{3,Trixi.SerialTree{3}},String})
-  end
-
-  # various `show` methods
-  for RealT in (Float64,)
-    # meshes
-    for NDIMS in 1:3
-      # serial
-      @assert Base.precompile(Tuple{typeof(show),Base.TTY,TreeMesh{NDIMS,Trixi.SerialTree{NDIMS}}})
-      @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",TreeMesh{NDIMS,Trixi.SerialTree{NDIMS}}})
-      # parallel
-      @assert Base.precompile(Tuple{typeof(show),Base.TTY,TreeMesh{NDIMS,Trixi.ParallelTree{NDIMS}}})
-      @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",TreeMesh{NDIMS,Trixi.ParallelTree{NDIMS}}})
-    end
+    ccall(:jl_generating_output, Cint, ()) == 1 || return nothing
 
-    # equations
-    for eq_type in equations_types(RealT)
-      @assert Base.precompile(Tuple{typeof(show),Base.TTY,eq_type})
-      @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",eq_type})
+    function equations_types_1d(RealT)
+        (LinearScalarAdvectionEquation1D{RealT},
+         HyperbolicDiffusionEquations1D{RealT},
+         CompressibleEulerEquations1D{RealT},
+         IdealGlmMhdEquations1D{RealT})
+    end
+    function equations_types_2d(RealT)
+        (LinearScalarAdvectionEquation2D{RealT},
+         HyperbolicDiffusionEquations2D{RealT},
+         CompressibleEulerEquations2D{RealT},
+         IdealGlmMhdEquations2D{RealT},
+         LatticeBoltzmannEquations2D{RealT, typeof(Trixi.collision_bgk)})
+    end
+    function equations_types_3d(RealT)
+        (LinearScalarAdvectionEquation3D{RealT},
+         HyperbolicDiffusionEquations3D{RealT},
+         CompressibleEulerEquations3D{RealT},
+         IdealGlmMhdEquations3D{RealT},
+         LatticeBoltzmannEquations3D{RealT, typeof(Trixi.collision_bgk)})
+    end
+    function equations_types(RealT)
+        (LinearScalarAdvectionEquation1D{RealT},
+         LinearScalarAdvectionEquation2D{RealT},
+         LinearScalarAdvectionEquation3D{RealT},
+         HyperbolicDiffusionEquations1D{RealT},
+         HyperbolicDiffusionEquations2D{RealT},
+         HyperbolicDiffusionEquations3D{RealT},
+         CompressibleEulerEquations1D{RealT},
+         CompressibleEulerEquations2D{RealT},
+         CompressibleEulerEquations3D{RealT},
+         IdealGlmMhdEquations1D{RealT},
+         IdealGlmMhdEquations2D{RealT},
+         IdealGlmMhdEquations3D{RealT},
+         LatticeBoltzmannEquations2D{RealT, typeof(Trixi.collision_bgk)},
+         LatticeBoltzmannEquations3D{RealT, typeof(Trixi.collision_bgk)})
     end
 
-    # mortars, analyzers, adaptors, DG
-    for polydeg in 1:1
-      nnodes_ = polydeg + 1
-      basis_type    = basis_type_dgsem(RealT, nnodes_)
-      mortar_type   = mortar_type_dgsem(RealT, nnodes_)
-      analyzer_type = analyzer_type_dgsem(RealT, nnodes_)
-      adaptor_type  = adaptor_type_dgsem(RealT, nnodes_)
-
-      @assert Base.precompile(Tuple{typeof(show),Base.TTY,basis_type})
-      @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",basis_type})
-
-      @assert Base.precompile(Tuple{typeof(show),Base.TTY,mortar_type})
-      @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",mortar_type})
-
-      @assert Base.precompile(Tuple{typeof(show),Base.TTY,analyzer_type})
-      @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",analyzer_type})
-
-      @assert Base.precompile(Tuple{typeof(show),Base.TTY,adaptor_type})
-      @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",adaptor_type})
-
-      # we could also use more numerical fluxes and volume integral types here
-      @assert Base.precompile(Tuple{typeof(show),Base.TTY,DG{basis_type,mortar_type,typeof(flux_lax_friedrichs),VolumeIntegralWeakForm}})
-      @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",DG{basis_type,mortar_type,typeof(flux_lax_friedrichs),VolumeIntegralWeakForm}})
+    function basis_type_dgsem(RealT, nnodes_)
+        LobattoLegendreBasis{RealT, nnodes_,
+                             # VectorT
+                             StaticArrays.SVector{nnodes_, RealT},
+                             # InverseVandermondeLegendre
+                             Matrix{RealT},
+                             # BoundaryMatrix
+                             #StaticArrays.SArray{Tuple{nnodes_,2},RealT,2,2*nnodes_},
+                             Matrix{RealT},
+                             # DerivativeMatrix
+                             #StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2},
+                             Matrix{RealT}
+                             }
     end
 
-    # semidiscretizations
-    @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",SemidiscretizationHyperbolic})
+    function mortar_type_dgsem(RealT, nnodes_)
+        LobattoLegendreMortarL2{RealT, nnodes_,
+                                # ForwardMatrix
+                                #StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2},
+                                Matrix{RealT},
+                                # ReverseMatrix
+                                # StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2},
+                                Matrix{RealT}
+                                }
+    end
 
-    # callbacks
-    summary_callback_type = DiscreteCallback{typeof(Trixi.summary_callback),typeof(Trixi.summary_callback),typeof(Trixi.initialize_summary_callback),typeof(SciMLBase.FINALIZE_DEFAULT)}
-    @assert Base.precompile(Tuple{typeof(show),Base.TTY,summary_callback_type})
-    @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",summary_callback_type})
-    @assert Base.precompile(Tuple{summary_callback_type,Base.TTY})
+    function analyzer_type_dgsem(RealT, nnodes_)
+        polydeg = nnodes_ - 1
+        nnodes_analysis = 2 * polydeg + 1
+        LobattoLegendreAnalyzer{RealT, nnodes_analysis,
+                                # VectorT
+                                StaticArrays.SVector{nnodes_analysis, RealT},
+                                # Vandermonde
+                                Array{RealT, 2}
+                                }
+    end
 
-    # TODO: SteadyStateCallback, AnalysisCallback
+    function adaptor_type_dgsem(RealT, nnodes_)
+        LobattoLegendreAdaptorL2{RealT, nnodes_,
+                                 # ForwardMatrix
+                                 StaticArrays.SArray{Tuple{nnodes_, nnodes_}, RealT, 2,
+                                                     nnodes_^2},
+                                 # Matrix{RealT},
+                                 # ReverseMatrix
+                                 StaticArrays.SArray{Tuple{nnodes_, nnodes_}, RealT, 2,
+                                                     nnodes_^2}
+                                 # Matrix{RealT},
+                                 }
+    end
 
-    alive_callback_type = DiscreteCallback{AliveCallback,AliveCallback,typeof(Trixi.initialize!),typeof(SciMLBase.FINALIZE_DEFAULT)}
-    @assert Base.precompile(Tuple{typeof(show),Base.TTY,alive_callback_type})
-    @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",alive_callback_type})
+    # Constructors: mesh
+    for RealT in (Int, Float64)
+        @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                      NamedTuple{(:initial_refinement_level, :n_cells_max),
+                                                 Tuple{Int, Int}}, Type{TreeMesh}, RealT,
+                                      RealT})
+        @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                      NamedTuple{(:initial_refinement_level, :n_cells_max),
+                                                 Tuple{Int, Int}}, Type{TreeMesh},
+                                      Tuple{RealT}, Tuple{RealT}})
+        @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                      NamedTuple{(:initial_refinement_level, :n_cells_max),
+                                                 Tuple{Int, Int}}, Type{TreeMesh},
+                                      Tuple{RealT, RealT}, Tuple{RealT, RealT}})
+        @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                      NamedTuple{(:initial_refinement_level, :n_cells_max),
+                                                 Tuple{Int, Int}}, Type{TreeMesh},
+                                      Tuple{RealT, RealT, RealT}, Tuple{RealT, RealT, RealT
+                                                                        }})
+    end
+    for TreeType in (SerialTree, ParallelTree), NDIMS in 1:3
+        @assert Base.precompile(Tuple{typeof(Trixi.initialize!),
+                                      TreeMesh{NDIMS, TreeType{NDIMS}}, Int, Tuple{},
+                                      Tuple{}})
+        @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),
+                                      TreeMesh{NDIMS, TreeType{NDIMS}}, String, Int})
+    end
 
-    restart_callback_type = DiscreteCallback{SaveRestartCallback,SaveRestartCallback,typeof(Trixi.initialize!),typeof(SciMLBase.FINALIZE_DEFAULT)}
-    @assert Base.precompile(Tuple{typeof(show),Base.TTY,restart_callback_type})
-    @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",restart_callback_type})
+    # Constructors: linear advection
+    for RealT in (Float64,)
+        @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation1D}, RealT})
+        @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation2D}, RealT, RealT})
+        @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation2D},
+                                      Tuple{RealT, RealT}})
+        @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation3D}, RealT, RealT,
+                                      RealT})
+        @assert Base.precompile(Tuple{Type{LinearScalarAdvectionEquation3D},
+                                      Tuple{RealT, RealT, RealT}})
+    end
 
-    for solution_variables in (cons2cons, cons2prim)
-      save_solution_callback_type = DiscreteCallback{SaveSolutionCallback{typeof(solution_variables)},SaveSolutionCallback{typeof(solution_variables)},typeof(Trixi.initialize!),typeof(SciMLBase.FINALIZE_DEFAULT)}
-      @assert Base.precompile(Tuple{typeof(show),Base.TTY,save_solution_callback_type})
-      @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",save_solution_callback_type})
+    # Constructors: hyperbolic diffusion
+    for RealT in (Float64,)
+        @assert Base.precompile(Tuple{Type{HyperbolicDiffusionEquations1D}})
+        @assert Base.precompile(Tuple{Type{HyperbolicDiffusionEquations2D}})
+        @assert Base.precompile(Tuple{Type{HyperbolicDiffusionEquations3D}})
     end
 
-    # TODO: AMRCallback
+    # Constructors: Euler
+    for RealT in (Float64,)
+        @assert Base.precompile(Tuple{Type{CompressibleEulerEquations1D}, RealT})
+        @assert Base.precompile(Tuple{Type{CompressibleEulerEquations2D}, RealT})
+        @assert Base.precompile(Tuple{Type{CompressibleEulerEquations3D}, RealT})
+    end
 
-    stepsize_callback_type = DiscreteCallback{StepsizeCallback{RealT},StepsizeCallback{RealT},typeof(Trixi.initialize!),typeof(SciMLBase.FINALIZE_DEFAULT)}
-    @assert Base.precompile(Tuple{typeof(show),Base.TTY,stepsize_callback_type})
-    @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",stepsize_callback_type})
+    # Constructors: MHD
+    for RealT in (Float64,)
+        @assert Base.precompile(Tuple{Type{IdealGlmMhdEquations1D}, RealT})
+        @assert Base.precompile(Tuple{Type{IdealGlmMhdEquations2D}, RealT})
+        @assert Base.precompile(Tuple{Type{IdealGlmMhdEquations3D}, RealT})
+    end
 
-    glm_speed_callback_type = DiscreteCallback{GlmSpeedCallback{RealT},GlmSpeedCallback{RealT},typeof(Trixi.initialize!),typeof(SciMLBase.FINALIZE_DEFAULT)}
-    @assert Base.precompile(Tuple{typeof(show),Base.TTY,glm_speed_callback_type})
-    @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",glm_speed_callback_type})
+    # Constructors: LBM
+    for RealT in (Float64,)
+        @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                      NamedTuple{(:Ma, :Re), Tuple{RealT, RealT}},
+                                      Type{LatticeBoltzmannEquations2D}})
+        @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                      NamedTuple{(:Ma, :Re), Tuple{RealT, Int}},
+                                      Type{LatticeBoltzmannEquations2D}})
+        @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                      NamedTuple{(:Ma, :Re), Tuple{RealT, RealT}},
+                                      Type{LatticeBoltzmannEquations3D}})
+        @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                      NamedTuple{(:Ma, :Re), Tuple{RealT, Int}},
+                                      Type{LatticeBoltzmannEquations3D}})
+    end
 
-    lbm_collision_callback_type = DiscreteCallback{typeof(Trixi.lbm_collision_callback),typeof(Trixi.lbm_collision_callback),typeof(Trixi.initialize!),typeof(SciMLBase.FINALIZE_DEFAULT)}
-    @assert Base.precompile(Tuple{typeof(show),Base.TTY,lbm_collision_callback_type})
-    @assert Base.precompile(Tuple{typeof(show),IOContext{Base.TTY},MIME"text/plain",lbm_collision_callback_type})
+    # Constructors of the basis are inherently type-unstable since we pass integers
+    # and use their values as parameters of static arrays.
+    # Nevertheless, we can still precompile methods used to construct the bases.
+    Base.precompile(Tuple{Type{LobattoLegendreBasis}, Int})
+    for RealT in (Float64,)
+        Base.precompile(Tuple{Type{LobattoLegendreBasis}, RealT, Int})
+        @assert Base.precompile(Tuple{typeof(Trixi.calc_dhat), Vector{RealT}, Vector{RealT}
+                                      })
+        @assert Base.precompile(Tuple{typeof(Trixi.calc_dsplit), Vector{RealT},
+                                      Vector{RealT}})
+        @assert Base.precompile(Tuple{typeof(Trixi.polynomial_derivative_matrix),
+                                      Vector{RealT}})
+        @assert Base.precompile(Tuple{typeof(Trixi.polynomial_interpolation_matrix),
+                                      Vector{RealT}, Vector{RealT}})
+        @assert Base.precompile(Tuple{typeof(Trixi.barycentric_weights), Vector{RealT}})
+        @assert Base.precompile(Tuple{typeof(Trixi.calc_lhat), RealT, Vector{RealT},
+                                      Vector{RealT}})
+        @assert Base.precompile(Tuple{typeof(Trixi.lagrange_interpolating_polynomials),
+                                      RealT, Vector{RealT}, Vector{RealT}})
+        @assert Base.precompile(Tuple{typeof(Trixi.calc_q_and_l), Int, RealT})
+        @assert Base.precompile(Tuple{typeof(Trixi.legendre_polynomial_and_derivative), Int,
+                                      RealT})
+        @assert Base.precompile(Tuple{typeof(Trixi.vandermonde_legendre), Vector{RealT}})
+    end
+    @assert Base.precompile(Tuple{typeof(Trixi.gauss_lobatto_nodes_weights), Int})
+    @assert Base.precompile(Tuple{typeof(Trixi.gauss_nodes_weights), Int})
+    @assert Base.precompile(Tuple{typeof(Trixi.calc_forward_upper), Int})
+    @assert Base.precompile(Tuple{typeof(Trixi.calc_forward_lower), Int})
+    @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_upper), Int, Val{:gauss}})
+    @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_lower), Int, Val{:gauss}})
+    @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_upper), Int, Val{:gauss_lobatto
+                                                                             }})
+    @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_lower), Int, Val{:gauss_lobatto
+                                                                             }})
+
+    # Constructors: mortars, analyzers, adaptors
+    for RealT in (Float64,), polydeg in 1:7
+        nnodes_ = polydeg + 1
+        basis_type = basis_type_dgsem(RealT, nnodes_)
+        @assert Base.precompile(Tuple{typeof(Trixi.MortarL2), basis_type})
+        @assert Base.precompile(Tuple{Type{Trixi.SolutionAnalyzer}, basis_type})
+        @assert Base.precompile(Tuple{Type{Trixi.AdaptorL2}, basis_type})
+    end
 
-    # infrastructure, special elixirs
-    @assert Base.precompile(Tuple{typeof(trixi_include),String})
-  end
+    # Constructors: callbacks
+    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                  NamedTuple{(:analysis_interval,), Tuple{Int}},
+                                  Type{AliveCallback}})
+    for RealT in (Float64,)
+        @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                      NamedTuple{(:cfl,), Tuple{RealT}},
+                                      Type{StepsizeCallback}})
+        @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                      NamedTuple{(:glm_scale, :cfl), Tuple{RealT, RealT}},
+                                      Type{GlmSpeedCallback}})
+    end
+    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                  NamedTuple{(:interval, :save_final_restart),
+                                             Tuple{Int, Bool}}, Type{SaveRestartCallback}})
+    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                  NamedTuple{
+                                             (:interval, :save_initial_solution,
+                                              :save_final_solution, :solution_variables),
+                                             Tuple{Int, Bool, Bool, typeof(cons2cons)}},
+                                  Type{SaveSolutionCallback}})
+    @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),
+                                  NamedTuple{
+                                             (:interval, :save_initial_solution,
+                                              :save_final_solution, :solution_variables),
+                                             Tuple{Int, Bool, Bool, typeof(cons2prim)}},
+                                  Type{SaveSolutionCallback}})
+    # TODO: AnalysisCallback?
+    # for RealT in (Float64,), polydeg in 1:7
+    #   nnodes_ = polydeg + 1
+    #   nnodes_analysis = 2*polydeg + 1
+    # @assert Base.precompile(Tuple{Type{AnalysisCallback},RealT,Int,Bool,String,String,Trixi.LobattoLegendreAnalyzer{RealT,nnodes_analysis,Array{RealT,2}},Array{Symbol,1},Tuple{typeof(Trixi.entropy_timederivative),typeof(entropy)},StaticArrays.SArray{Tuple{1},RealT,1,1}})
+    # We would need to use all special cases instead of
+    # Function,Trixi.AbstractVolumeIntegral
+    # for equations_type in equations_types(RealT)
+    #   @assert Base.precompile(Tuple{Core.kwftype(typeof(Trixi.Type)),NamedTuple{(:interval, :extra_analysis_integrals),Tuple{Int,Tuple{typeof(entropy)}}},Type{AnalysisCallback},equations_type,DG{RealT,LobattoLegendreBasis{RealT,nnodes_,StaticArrays.SVector{nnodes_,RealT},Array{RealT,2},StaticArrays.SArray{Tuple{4,2},RealT,2,2*nnodes_},StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}},Trixi.LobattoLegendreMortarL2{RealT,nnodes_,StaticArrays.SArray{Tuple{nnodes_,nnodes_},RealT,2,nnodes_^2}},Function,Trixi.AbstractVolumeIntegral}})
+    # end
+    # end
+    @assert Base.precompile(Tuple{typeof(SummaryCallback)})
+    @assert Base.precompile(Tuple{
+                                  DiscreteCallback{typeof(Trixi.summary_callback),
+                                                   typeof(Trixi.summary_callback),
+                                                   typeof(Trixi.initialize_summary_callback),
+                                                   typeof(SciMLBase.FINALIZE_DEFAULT)}})
+    @assert Base.precompile(Tuple{typeof(summary_box), Base.TTY, String,
+                                  Vector{Pair{String, Any}}})
+    # TODO: AMRCallback, ControllerThreeLevel, indicators
+
+    # init_elements, interfaces, etc.
+    for RealT in (Float64,), polydeg in 1:7
+        uEltype = RealT
+        nnodes_ = polydeg + 1
+        mortar_type = mortar_type_dgsem(RealT, nnodes_)
+
+        # 1D, serial
+        @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1},
+                                      TreeMesh{1, Trixi.SerialTree{1}},
+                                      Trixi.ElementContainer1D{RealT, uEltype}})
+        @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces), Array{Int, 1},
+                                      TreeMesh{1, Trixi.SerialTree{1}},
+                                      Trixi.ElementContainer1D{RealT, uEltype}})
+        @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),
+                                      TreeMesh{1, Trixi.SerialTree{1}}, String})
+
+        # 2D, serial
+        @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1},
+                                      TreeMesh{2, Trixi.SerialTree{2}},
+                                      Trixi.ElementContainer2D{RealT, uEltype}})
+        @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces), Array{Int, 1},
+                                      TreeMesh{2, Trixi.SerialTree{2}},
+                                      Trixi.ElementContainer2D{RealT, uEltype}})
+        @assert Base.precompile(Tuple{typeof(Trixi.init_mortars), Array{Int, 1},
+                                      TreeMesh{2, Trixi.SerialTree{2}},
+                                      Trixi.ElementContainer2D{RealT, uEltype}, mortar_type
+                                      })
+        @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),
+                                      TreeMesh{2, Trixi.SerialTree{2}}, String})
+
+        # 2D, parallel
+        @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1},
+                                      TreeMesh{2, Trixi.ParallelTree{2}},
+                                      Trixi.ElementContainer2D{RealT, uEltype}})
+        @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces), Array{Int, 1},
+                                      TreeMesh{2, Trixi.ParallelTree{2}},
+                                      Trixi.ElementContainer2D{RealT, uEltype}})
+        @assert Base.precompile(Tuple{typeof(Trixi.init_mortars), Array{Int, 1},
+                                      TreeMesh{2, Trixi.ParallelTree{2}},
+                                      Trixi.ElementContainer2D{RealT, uEltype}, mortar_type
+                                      })
+        @assert Base.precompile(Tuple{typeof(Trixi.init_mpi_interfaces), Array{Int, 1},
+                                      TreeMesh{2, Trixi.ParallelTree{2}},
+                                      Trixi.ElementContainer2D{RealT, uEltype}})
+        @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),
+                                      TreeMesh{2, Trixi.ParallelTree{2}}, String})
+
+        # 3D, serial
+        @assert Base.precompile(Tuple{typeof(Trixi.init_boundaries), Array{Int, 1},
+                                      TreeMesh{3, Trixi.SerialTree{3}},
+                                      Trixi.ElementContainer3D{RealT, uEltype}})
+        @assert Base.precompile(Tuple{typeof(Trixi.init_interfaces), Array{Int, 1},
+                                      TreeMesh{3, Trixi.SerialTree{3}},
+                                      Trixi.ElementContainer3D{RealT, uEltype}})
+        @assert Base.precompile(Tuple{typeof(Trixi.init_mortars), Array{Int, 1},
+                                      TreeMesh{3, Trixi.SerialTree{3}},
+                                      Trixi.ElementContainer3D{RealT, uEltype}, mortar_type
+                                      })
+        @assert Base.precompile(Tuple{typeof(Trixi.save_mesh_file),
+                                      TreeMesh{3, Trixi.SerialTree{3}}, String})
+    end
 
-  @assert Base.precompile(Tuple{typeof(init_mpi)})
-  @assert Base.precompile(Tuple{typeof(init_p4est)})
+    # various `show` methods
+    for RealT in (Float64,)
+        # meshes
+        for NDIMS in 1:3
+            # serial
+            @assert Base.precompile(Tuple{typeof(show), Base.TTY,
+                                          TreeMesh{NDIMS, Trixi.SerialTree{NDIMS}}})
+            @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY},
+                                          MIME"text/plain",
+                                          TreeMesh{NDIMS, Trixi.SerialTree{NDIMS}}})
+            # parallel
+            @assert Base.precompile(Tuple{typeof(show), Base.TTY,
+                                          TreeMesh{NDIMS, Trixi.ParallelTree{NDIMS}}})
+            @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY},
+                                          MIME"text/plain",
+                                          TreeMesh{NDIMS, Trixi.ParallelTree{NDIMS}}})
+        end
+
+        # equations
+        for eq_type in equations_types(RealT)
+            @assert Base.precompile(Tuple{typeof(show), Base.TTY, eq_type})
+            @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY},
+                                          MIME"text/plain", eq_type})
+        end
+
+        # mortars, analyzers, adaptors, DG
+        for polydeg in 1:1
+            nnodes_ = polydeg + 1
+            basis_type = basis_type_dgsem(RealT, nnodes_)
+            mortar_type = mortar_type_dgsem(RealT, nnodes_)
+            analyzer_type = analyzer_type_dgsem(RealT, nnodes_)
+            adaptor_type = adaptor_type_dgsem(RealT, nnodes_)
+
+            @assert Base.precompile(Tuple{typeof(show), Base.TTY, basis_type})
+            @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY},
+                                          MIME"text/plain", basis_type})
+
+            @assert Base.precompile(Tuple{typeof(show), Base.TTY, mortar_type})
+            @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY},
+                                          MIME"text/plain", mortar_type})
+
+            @assert Base.precompile(Tuple{typeof(show), Base.TTY, analyzer_type})
+            @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY},
+                                          MIME"text/plain", analyzer_type})
+
+            @assert Base.precompile(Tuple{typeof(show), Base.TTY, adaptor_type})
+            @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY},
+                                          MIME"text/plain", adaptor_type})
+
+            # we could also use more numerical fluxes and volume integral types here
+            @assert Base.precompile(Tuple{typeof(show), Base.TTY,
+                                          DG{basis_type, mortar_type,
+                                             typeof(flux_lax_friedrichs),
+                                             VolumeIntegralWeakForm}})
+            @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY},
+                                          MIME"text/plain",
+                                          DG{basis_type, mortar_type,
+                                             typeof(flux_lax_friedrichs),
+                                             VolumeIntegralWeakForm}})
+        end
+
+        # semidiscretizations
+        @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain",
+                                      SemidiscretizationHyperbolic})
+
+        # callbacks
+        summary_callback_type = DiscreteCallback{typeof(Trixi.summary_callback),
+                                                 typeof(Trixi.summary_callback),
+                                                 typeof(Trixi.initialize_summary_callback),
+                                                 typeof(SciMLBase.FINALIZE_DEFAULT)}
+        @assert Base.precompile(Tuple{typeof(show), Base.TTY, summary_callback_type})
+        @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain",
+                                      summary_callback_type})
+        @assert Base.precompile(Tuple{summary_callback_type, Base.TTY})
+
+        # TODO: SteadyStateCallback, AnalysisCallback
+
+        alive_callback_type = DiscreteCallback{AliveCallback, AliveCallback,
+                                               typeof(Trixi.initialize!),
+                                               typeof(SciMLBase.FINALIZE_DEFAULT)}
+        @assert Base.precompile(Tuple{typeof(show), Base.TTY, alive_callback_type})
+        @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain",
+                                      alive_callback_type})
+
+        restart_callback_type = DiscreteCallback{SaveRestartCallback, SaveRestartCallback,
+                                                 typeof(Trixi.initialize!),
+                                                 typeof(SciMLBase.FINALIZE_DEFAULT)}
+        @assert Base.precompile(Tuple{typeof(show), Base.TTY, restart_callback_type})
+        @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain",
+                                      restart_callback_type})
+
+        for solution_variables in (cons2cons, cons2prim)
+            save_solution_callback_type = DiscreteCallback{
+                                                           SaveSolutionCallback{
+                                                                                typeof(solution_variables)
+                                                                                },
+                                                           SaveSolutionCallback{
+                                                                                typeof(solution_variables)
+                                                                                },
+                                                           typeof(Trixi.initialize!),
+                                                           typeof(SciMLBase.FINALIZE_DEFAULT)
+                                                           }
+            @assert Base.precompile(Tuple{typeof(show), Base.TTY,
+                                          save_solution_callback_type})
+            @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY},
+                                          MIME"text/plain", save_solution_callback_type})
+        end
+
+        # TODO: AMRCallback
+
+        stepsize_callback_type = DiscreteCallback{StepsizeCallback{RealT},
+                                                  StepsizeCallback{RealT},
+                                                  typeof(Trixi.initialize!),
+                                                  typeof(SciMLBase.FINALIZE_DEFAULT)}
+        @assert Base.precompile(Tuple{typeof(show), Base.TTY, stepsize_callback_type})
+        @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain",
+                                      stepsize_callback_type})
+
+        glm_speed_callback_type = DiscreteCallback{GlmSpeedCallback{RealT},
+                                                   GlmSpeedCallback{RealT},
+                                                   typeof(Trixi.initialize!),
+                                                   typeof(SciMLBase.FINALIZE_DEFAULT)}
+        @assert Base.precompile(Tuple{typeof(show), Base.TTY, glm_speed_callback_type})
+        @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain",
+                                      glm_speed_callback_type})
+
+        lbm_collision_callback_type = DiscreteCallback{typeof(Trixi.lbm_collision_callback),
+                                                       typeof(Trixi.lbm_collision_callback),
+                                                       typeof(Trixi.initialize!),
+                                                       typeof(SciMLBase.FINALIZE_DEFAULT)}
+        @assert Base.precompile(Tuple{typeof(show), Base.TTY, lbm_collision_callback_type})
+        @assert Base.precompile(Tuple{typeof(show), IOContext{Base.TTY}, MIME"text/plain",
+                                      lbm_collision_callback_type})
+
+        # infrastructure, special elixirs
+        @assert Base.precompile(Tuple{typeof(trixi_include), String})
+    end
 
-  # The following precompile statements do not seem to be taken
-  # # `multiply_dimensionwise!` as used in the analysis callback
-  # for RealT in (Float64,)
-  #   # 1D version
-  #   @assert Base.precompile(Tuple{typeof(multiply_dimensionwise!),Array{RealT, 2},Matrix{RealT},SubArray{RealT, 2, Array{RealT, 3}, Tuple{Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Int}, true}})
-  #   # 2D version
-  #   @assert Base.precompile(Tuple{typeof(multiply_dimensionwise!),Array{RealT, 3},Matrix{RealT},SubArray{RealT, 3, Array{RealT, 4}, Tuple{Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Int}, true},Array{RealT, 3}})
-  #   # 3D version
-  #   @assert Base.precompile(Tuple{typeof(multiply_dimensionwise!),Array{RealT, 4},Matrix{RealT},SubArray{RealT, 4, Array{RealT, 5}, Tuple{Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Int}, true},Array{RealT, 4},Array{RealT, 4}})
-  # end
+    @assert Base.precompile(Tuple{typeof(init_mpi)})
+    @assert Base.precompile(Tuple{typeof(init_p4est)})
+
+    # The following precompile statements do not seem to be taken
+    # # `multiply_dimensionwise!` as used in the analysis callback
+    # for RealT in (Float64,)
+    #   # 1D version
+    #   @assert Base.precompile(Tuple{typeof(multiply_dimensionwise!),Array{RealT, 2},Matrix{RealT},SubArray{RealT, 2, Array{RealT, 3}, Tuple{Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Int}, true}})
+    #   # 2D version
+    #   @assert Base.precompile(Tuple{typeof(multiply_dimensionwise!),Array{RealT, 3},Matrix{RealT},SubArray{RealT, 3, Array{RealT, 4}, Tuple{Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Int}, true},Array{RealT, 3}})
+    #   # 3D version
+    #   @assert Base.precompile(Tuple{typeof(multiply_dimensionwise!),Array{RealT, 4},Matrix{RealT},SubArray{RealT, 4, Array{RealT, 5}, Tuple{Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Base.Slice{Base.OneTo{Int}}, Int}, true},Array{RealT, 4},Array{RealT, 4}})
+    # end
 
-  return nothing
+    return nothing
 end
 
-
 # Explicit precompilation running code only on Julia v1.9 and newer
 using PrecompileTools: @setup_workload, @compile_workload
 
 @static if VERSION >= v"1.9.0-beta4"
-  @setup_workload begin
-    # Setup code can go here
-
-    @compile_workload begin
-      # Everything inside this block will run at precompile time, saving the
-      # binary code to a cache in newer versions of Julia.
-      DGSEM(3)
+    @setup_workload begin
+        # Setup code can go here
+
+        @compile_workload begin
+            # Everything inside this block will run at precompile time, saving the
+            # binary code to a cache in newer versions of Julia.
+            DGSEM(3)
+        end
     end
-  end
 end
diff --git a/src/auxiliary/special_elixirs.jl b/src/auxiliary/special_elixirs.jl
index 0724c62bcba..da73b42e572 100644
--- a/src/auxiliary/special_elixirs.jl
+++ b/src/auxiliary/special_elixirs.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Note: We can't call the method below `Trixi.include` since that is created automatically
 # inside `module Trixi` to `include` source files and evaluate them within the global scope
@@ -36,15 +36,16 @@ julia> redirect_stdout(devnull) do
 ```
 """
 function trixi_include(mod::Module, elixir::AbstractString; kwargs...)
-  # Print information on potential wait time only in non-parallel case
-  if !mpi_isparallel()
-    @info "You just called `trixi_include`. Julia may now compile the code, please be patient."
-  end
-  Base.include(ex -> replace_assignments(insert_maxiters(ex); kwargs...), mod, elixir)
+    # Print information on potential wait time only in non-parallel case
+    if !mpi_isparallel()
+        @info "You just called `trixi_include`. Julia may now compile the code, please be patient."
+    end
+    Base.include(ex -> replace_assignments(insert_maxiters(ex); kwargs...), mod, elixir)
 end
 
-trixi_include(elixir::AbstractString; kwargs...) = trixi_include(Main, elixir; kwargs...)
-
+function trixi_include(elixir::AbstractString; kwargs...)
+    trixi_include(Main, elixir; kwargs...)
+end
 
 """
     convergence_test([mod::Module=Main,] elixir::AbstractString, iterations; kwargs...)
@@ -60,94 +61,97 @@ This function assumes that the spatial resolution is set via the keywords
 integers, one per spatial dimension).
 """
 function convergence_test(mod::Module, elixir::AbstractString, iterations; kwargs...)
-  @assert(iterations > 1, "Number of iterations must be bigger than 1 for a convergence analysis")
+    @assert(iterations>1,
+            "Number of iterations must be bigger than 1 for a convergence analysis")
 
-  # Types of errors to be calculated
-  errors = Dict(:l2 => Float64[], :linf => Float64[])
+    # Types of errors to be calculated
+    errors = Dict(:l2 => Float64[], :linf => Float64[])
 
-  initial_resolution = extract_initial_resolution(elixir, kwargs)
+    initial_resolution = extract_initial_resolution(elixir, kwargs)
 
-  # run simulations and extract errors
-  for iter in 1:iterations
-    println("Running convtest iteration ", iter, "/", iterations)
+    # run simulations and extract errors
+    for iter in 1:iterations
+        println("Running convtest iteration ", iter, "/", iterations)
 
-    include_refined(mod, elixir, initial_resolution, iter; kwargs)
+        include_refined(mod, elixir, initial_resolution, iter; kwargs)
 
-    l2_error, linf_error = mod.analysis_callback(mod.sol)
+        l2_error, linf_error = mod.analysis_callback(mod.sol)
 
-    # collect errors as one vector to reshape later
-    append!(errors[:l2],   l2_error)
-    append!(errors[:linf], linf_error)
+        # collect errors as one vector to reshape later
+        append!(errors[:l2], l2_error)
+        append!(errors[:linf], linf_error)
 
-    println("\n\n")
-    println("#"^100)
-  end
+        println("\n\n")
+        println("#"^100)
+    end
 
-  # number of variables
-  _, equations, _, _ = mesh_equations_solver_cache(mod.semi)
-  variablenames = varnames(cons2cons, equations)
-  nvariables = length(variablenames)
+    # number of variables
+    _, equations, _, _ = mesh_equations_solver_cache(mod.semi)
+    variablenames = varnames(cons2cons, equations)
+    nvariables = length(variablenames)
 
-  # Reshape errors to get a matrix where the i-th row represents the i-th iteration
-  # and the j-th column represents the j-th variable
-  errorsmatrix = Dict(kind => transpose(reshape(error, (nvariables, iterations))) for (kind, error) in errors)
+    # Reshape errors to get a matrix where the i-th row represents the i-th iteration
+    # and the j-th column represents the j-th variable
+    errorsmatrix = Dict(kind => transpose(reshape(error, (nvariables, iterations)))
+                        for (kind, error) in errors)
 
-  # Calculate EOCs where the columns represent the variables
-  # As dx halves in every iteration the denominator needs to be log(1/2)
-  eocs = Dict(kind => log.(error[2:end, :] ./ error[1:end-1, :]) ./ log(1 / 2) for (kind, error) in errorsmatrix)
+    # Calculate EOCs where the columns represent the variables
+    # As dx halves in every iteration the denominator needs to be log(1/2)
+    eocs = Dict(kind => log.(error[2:end, :] ./ error[1:(end - 1), :]) ./ log(1 / 2)
+                for (kind, error) in errorsmatrix)
 
-  eoc_mean_values = Dict{Symbol,Any}()
-  eoc_mean_values[:variables] = variablenames
+    eoc_mean_values = Dict{Symbol, Any}()
+    eoc_mean_values[:variables] = variablenames
 
-  for (kind, error) in errorsmatrix
-    println(kind)
+    for (kind, error) in errorsmatrix
+        println(kind)
 
-    for v in variablenames
-      @printf("%-20s", v)
-    end
-    println("")
+        for v in variablenames
+            @printf("%-20s", v)
+        end
+        println("")
 
-    for k = 1:nvariables
-      @printf("%-10s", "error")
-      @printf("%-10s", "EOC")
-    end
-    println("")
+        for k in 1:nvariables
+            @printf("%-10s", "error")
+            @printf("%-10s", "EOC")
+        end
+        println("")
 
-    # Print errors for the first iteration
-    for k = 1:nvariables
-      @printf("%-10.2e", error[1, k])
-      @printf("%-10s", "-")
-    end
-    println("")
-
-    # For the following iterations print errors and EOCs
-    for j = 2:iterations
-      for k = 1:nvariables
-        @printf("%-10.2e", error[j, k])
-        @printf("%-10.2f", eocs[kind][j-1, k])
-      end
-      println("")
-    end
-    println("")
-
-    # Print mean EOCs
-    mean_values = zeros(nvariables)
-    for v in 1:nvariables
-      mean_values[v] = sum(eocs[kind][:, v]) ./ length(eocs[kind][:, v])
-      @printf("%-10s", "mean")
-      @printf("%-10.2f", mean_values[v])
+        # Print errors for the first iteration
+        for k in 1:nvariables
+            @printf("%-10.2e", error[1, k])
+            @printf("%-10s", "-")
+        end
+        println("")
+
+        # For the following iterations print errors and EOCs
+        for j in 2:iterations
+            for k in 1:nvariables
+                @printf("%-10.2e", error[j, k])
+                @printf("%-10.2f", eocs[kind][j - 1, k])
+            end
+            println("")
+        end
+        println("")
+
+        # Print mean EOCs
+        mean_values = zeros(nvariables)
+        for v in 1:nvariables
+            mean_values[v] = sum(eocs[kind][:, v]) ./ length(eocs[kind][:, v])
+            @printf("%-10s", "mean")
+            @printf("%-10.2f", mean_values[v])
+        end
+        eoc_mean_values[kind] = mean_values
+        println("")
+        println("-"^100)
     end
-    eoc_mean_values[kind] = mean_values
-    println("")
-    println("-"^100)
-  end
 
-  return eoc_mean_values
+    return eoc_mean_values
 end
 
-convergence_test(elixir::AbstractString, iterations; kwargs...) = convergence_test(Main, elixir::AbstractString, iterations; kwargs...)
-
-
+function convergence_test(elixir::AbstractString, iterations; kwargs...)
+    convergence_test(Main, elixir::AbstractString, iterations; kwargs...)
+end
 
 # Helper methods used in the functions defined above
 
@@ -158,130 +162,134 @@ walkexpr(f, x) = f(x)
 # Insert the keyword argument `maxiters` into calls to `solve` and `Trixi.solve`
 # with default value `10^5` if it is not already present.
 function insert_maxiters(expr)
-  maxiters_default = 10^5
-
-  expr = walkexpr(expr) do x
-    if x isa Expr
-      is_plain_solve = x.head === Symbol("call") && x.args[1] === Symbol("solve")
-      is_trixi_solve = (x.head === Symbol("call") && x.args[1] isa Expr &&
-                        x.args[1].head === Symbol(".") &&
-                        x.args[1].args[1] === Symbol("Trixi") &&
-                        x.args[1].args[2] isa QuoteNode &&
-                        x.args[1].args[2].value === Symbol("solve"))
-
-      if is_plain_solve || is_trixi_solve
-        # Do nothing if `maxiters` is already set as keyword argument...
-        for arg in x.args
-          # This detects the case where `maxiters` is set as keyword argument
-          # without or before a semicolon
-          if (arg isa Expr && arg.head === Symbol("kw") && arg.args[1] === Symbol("maxiters"))
-            return x
-          end
-
-          # This detects the case where maxiters is set as keyword argument
-          # after a semicolon
-          if (arg isa Expr && arg.head === Symbol("parameters"))
-            # We need to check each keyword argument listed here
-            for nested_arg in arg.args
-              if (nested_arg isa Expr && nested_arg.head === Symbol("kw") &&
-                  nested_arg.args[1] === Symbol("maxiters"))
-                return x
-              end
+    maxiters_default = 10^5
+
+    expr = walkexpr(expr) do x
+        if x isa Expr
+            is_plain_solve = x.head === Symbol("call") && x.args[1] === Symbol("solve")
+            is_trixi_solve = (x.head === Symbol("call") && x.args[1] isa Expr &&
+                              x.args[1].head === Symbol(".") &&
+                              x.args[1].args[1] === Symbol("Trixi") &&
+                              x.args[1].args[2] isa QuoteNode &&
+                              x.args[1].args[2].value === Symbol("solve"))
+
+            if is_plain_solve || is_trixi_solve
+                # Do nothing if `maxiters` is already set as keyword argument...
+                for arg in x.args
+                    # This detects the case where `maxiters` is set as keyword argument
+                    # without or before a semicolon
+                    if (arg isa Expr && arg.head === Symbol("kw") &&
+                        arg.args[1] === Symbol("maxiters"))
+                        return x
+                    end
+
+                    # This detects the case where maxiters is set as keyword argument
+                    # after a semicolon
+                    if (arg isa Expr && arg.head === Symbol("parameters"))
+                        # We need to check each keyword argument listed here
+                        for nested_arg in arg.args
+                            if (nested_arg isa Expr &&
+                                nested_arg.head === Symbol("kw") &&
+                                nested_arg.args[1] === Symbol("maxiters"))
+                                return x
+                            end
+                        end
+                    end
+                end
+
+                # ...and insert it otherwise.
+                push!(x.args, Expr(Symbol("kw"), Symbol("maxiters"), maxiters_default))
             end
-          end
         end
-
-        # ...and insert it otherwise.
-        push!(x.args, Expr(Symbol("kw"), Symbol("maxiters"), maxiters_default))
-      end
+        return x
     end
-    return x
-  end
 
-  return expr
+    return expr
 end
 
 # Replace assignments to `key` in `expr` by `key = val` for all `(key,val)` in `kwargs`.
 function replace_assignments(expr; kwargs...)
-  # replace explicit and keyword assignments
-  expr = walkexpr(expr) do x
-    if x isa Expr
-      for (key,val) in kwargs
-        if (x.head === Symbol("=") || x.head === :kw) && x.args[1] === Symbol(key)
-          x.args[2] = :( $val )
-          # dump(x)
+    # replace explicit and keyword assignments
+    expr = walkexpr(expr) do x
+        if x isa Expr
+            for (key, val) in kwargs
+                if (x.head === Symbol("=") || x.head === :kw) &&
+                   x.args[1] === Symbol(key)
+                    x.args[2] = :($val)
+                    # dump(x)
+                end
+            end
         end
-      end
+        return x
     end
-    return x
-  end
 
-  return expr
+    return expr
 end
 
 # find a (keyword or common) assignment to `destination` in `expr`
 # and return the assigned value
 function find_assignment(expr, destination)
-  # declare result to be able to assign to it in the closure
-  local result
-
-  # find explicit and keyword assignments
-  walkexpr(expr) do x
-    if x isa Expr
-      if (x.head === Symbol("=") || x.head === :kw) && x.args[1] === Symbol(destination)
-        result = x.args[2]
-        # dump(x)
-      end
+    # declare result to be able to assign to it in the closure
+    local result
+
+    # find explicit and keyword assignments
+    walkexpr(expr) do x
+        if x isa Expr
+            if (x.head === Symbol("=") || x.head === :kw) &&
+               x.args[1] === Symbol(destination)
+                result = x.args[2]
+                # dump(x)
+            end
+        end
+        return x
     end
-    return x
-  end
 
-  result
+    result
 end
 
 # searches the parameter that specifies the mesh reslution in the elixir
 function extract_initial_resolution(elixir, kwargs)
-  code = read(elixir, String)
-  expr = Meta.parse("begin \n$code \nend")
+    code = read(elixir, String)
+    expr = Meta.parse("begin \n$code \nend")
 
-  try
-    # get the initial_refinement_level from the elixir
-    initial_refinement_level = find_assignment(expr, :initial_refinement_level)
+    try
+        # get the initial_refinement_level from the elixir
+        initial_refinement_level = find_assignment(expr, :initial_refinement_level)
 
-    if haskey(kwargs, :initial_refinement_level)
-      return kwargs[:initial_refinement_level]
-    else
-      return initial_refinement_level
-    end
-  catch e
-    if isa(e, UndefVarError)
-      # get cells_per_dimension from the elixir
-      cells_per_dimension = eval(find_assignment(expr, :cells_per_dimension))
-
-      if haskey(kwargs, :cells_per_dimension)
-        return kwargs[:cells_per_dimension]
-      else
-        return cells_per_dimension
-      end
-    else
-      throw(e)
+        if haskey(kwargs, :initial_refinement_level)
+            return kwargs[:initial_refinement_level]
+        else
+            return initial_refinement_level
+        end
+    catch e
+        if isa(e, UndefVarError)
+            # get cells_per_dimension from the elixir
+            cells_per_dimension = eval(find_assignment(expr, :cells_per_dimension))
+
+            if haskey(kwargs, :cells_per_dimension)
+                return kwargs[:cells_per_dimension]
+            else
+                return cells_per_dimension
+            end
+        else
+            throw(e)
+        end
     end
-  end
 end
 
 # runs the specified elixir with a doubled resolution each time iter is increased by 1
 # works for TreeMesh
 function include_refined(mod, elixir, initial_refinement_level::Int, iter; kwargs)
-  trixi_include(mod, elixir; kwargs..., initial_refinement_level=initial_refinement_level+iter-1)
+    trixi_include(mod, elixir; kwargs...,
+                  initial_refinement_level = initial_refinement_level + iter - 1)
 end
 
 # runs the specified elixir with a doubled resolution each time iter is increased by 1
 # works for StructuredMesh
-function include_refined(mod, elixir, cells_per_dimension::NTuple{NDIMS, Int}, iter; kwargs) where {NDIMS}
-  new_cells_per_dimension = cells_per_dimension .* 2^(iter - 1)
+function include_refined(mod, elixir, cells_per_dimension::NTuple{NDIMS, Int}, iter;
+                         kwargs) where {NDIMS}
+    new_cells_per_dimension = cells_per_dimension .* 2^(iter - 1)
 
-  trixi_include(mod, elixir; kwargs..., cells_per_dimension=new_cells_per_dimension)
+    trixi_include(mod, elixir; kwargs..., cells_per_dimension = new_cells_per_dimension)
 end
-
-
 end # @muladd
diff --git a/src/basic_types.jl b/src/basic_types.jl
index 4539e26dea3..ee479a62039 100644
--- a/src/basic_types.jl
+++ b/src/basic_types.jl
@@ -3,14 +3,13 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # abstract supertype of specific semidiscretizations such as
 # - SemidiscretizationHyperbolic for hyperbolic conservation laws
 # - SemidiscretizationEulerGravity for Euler with self-gravity
 abstract type AbstractSemidiscretization end
 
-
 """
     AbstractEquations{NDIMS, NVARS}
 
@@ -20,7 +19,6 @@ number of primary variables (`NVARS`) of the physics model.
 """
 abstract type AbstractEquations{NDIMS, NVARS} end
 
-
 """
     AbstractMesh{NDIMS}
 
@@ -29,36 +27,30 @@ The type parameters encode the number of spatial dimensions (`NDIMS`).
 """
 abstract type AbstractMesh{NDIMS} end
 
-
 # abstract supertype of specific SBP bases such as a Lobatto-Legendre nodal basis
-abstract type AbstractBasisSBP{RealT<:Real} end
-
+abstract type AbstractBasisSBP{RealT <: Real} end
 
 # abstract supertype of mortar methods, e.g. using L² projections
-abstract type AbstractMortar{RealT<:Real} end
+abstract type AbstractMortar{RealT <: Real} end
 
 # abstract supertype of mortar methods using L² projection
 # which will be specialized for different SBP bases
-abstract type AbstractMortarL2{RealT<:Real} <: AbstractMortar{RealT} end
-
+abstract type AbstractMortarL2{RealT <: Real} <: AbstractMortar{RealT} end
 
 # abstract supertype of functionality related to the analysis of
 # numerical solutions, e.g. the calculation of errors
-abstract type SolutionAnalyzer{RealT<:Real} end
-
+abstract type SolutionAnalyzer{RealT <: Real} end
 
 # abstract supertype of grid-transfer methods used for AMR,
 # e.g. refinement and coarsening based on L² projections
-abstract type AdaptorAMR{RealT<:Real} end
+abstract type AdaptorAMR{RealT <: Real} end
 
 # abstract supertype of AMR grid-transfer operations using L² projections
 # which will be specialized for different SBP bases
-abstract type AdaptorL2{RealT<:Real} <: AdaptorAMR{RealT} end
-
+abstract type AdaptorL2{RealT <: Real} <: AdaptorAMR{RealT} end
 
 # TODO: Taal decide, which abstract types shall be defined here?
 
-
 struct BoundaryConditionPeriodic end
 
 """
@@ -68,28 +60,30 @@ A singleton struct indicating periodic boundary conditions.
 """
 const boundary_condition_periodic = BoundaryConditionPeriodic()
 
-Base.show(io::IO, ::BoundaryConditionPeriodic) = print(io, "boundary_condition_periodic")
-
+function Base.show(io::IO, ::BoundaryConditionPeriodic)
+    print(io, "boundary_condition_periodic")
+end
 
 struct BoundaryConditionDoNothing end
 
 # This version can be called by hyperbolic solvers on logically Cartesian meshes
-@inline function (::BoundaryConditionDoNothing)(
-    u_inner, orientation_or_normal_direction, direction::Integer, x, t, surface_flux, equations)
-
-  return flux(u_inner, orientation_or_normal_direction, equations)
+@inline function (::BoundaryConditionDoNothing)(u_inner,
+                                                orientation_or_normal_direction,
+                                                direction::Integer, x, t, surface_flux,
+                                                equations)
+    return flux(u_inner, orientation_or_normal_direction, equations)
 end
 
 # This version can be called by hyperbolic solvers on unstructured, curved meshes
-@inline function (::BoundaryConditionDoNothing)(u_inner, outward_direction::AbstractVector,
+@inline function (::BoundaryConditionDoNothing)(u_inner,
+                                                outward_direction::AbstractVector,
                                                 x, t, surface_flux, equations)
-
-  return flux(u_inner, outward_direction, equations)
+    return flux(u_inner, outward_direction, equations)
 end
 
 # This version can be called by parabolic solvers
 @inline function (::BoundaryConditionDoNothing)(inner_flux_or_state, other_args...)
-  return inner_flux_or_state
+    return inner_flux_or_state
 end
 
 """
@@ -99,6 +93,7 @@ Imposing no boundary condition just evaluates the flux at the inner state.
 """
 const boundary_condition_do_nothing = BoundaryConditionDoNothing()
 
-Base.show(io::IO, ::BoundaryConditionDoNothing) = print(io, "boundary_condition_do_nothing")
-
+function Base.show(io::IO, ::BoundaryConditionDoNothing)
+    print(io, "boundary_condition_do_nothing")
+end
 end # @muladd
diff --git a/src/callbacks_stage/callbacks_stage.jl b/src/callbacks_stage/callbacks_stage.jl
index f23f96eccf8..7609f9b341d 100644
--- a/src/callbacks_stage/callbacks_stage.jl
+++ b/src/callbacks_stage/callbacks_stage.jl
@@ -3,9 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 include("positivity_zhang_shu.jl")
-
-
 end # @muladd
diff --git a/src/callbacks_stage/positivity_zhang_shu.jl b/src/callbacks_stage/positivity_zhang_shu.jl
index c3156ae4833..92141c4b26e 100644
--- a/src/callbacks_stage/positivity_zhang_shu.jl
+++ b/src/callbacks_stage/positivity_zhang_shu.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     PositivityPreservingLimiterZhangShu(; threshold, variables)
@@ -18,24 +18,26 @@ using the associated `thresholds` to determine the minimal acceptable values.
 The order of the `variables` is important and might have a strong influence
 on the robustness.
 """
-struct PositivityPreservingLimiterZhangShu{N, Thresholds<:NTuple{N,<:Real}, Variables<:NTuple{N,Any}}
-  thresholds::Thresholds
-  variables::Variables
+struct PositivityPreservingLimiterZhangShu{N, Thresholds <: NTuple{N, <:Real},
+                                           Variables <: NTuple{N, Any}}
+    thresholds::Thresholds
+    variables::Variables
 end
 
 function PositivityPreservingLimiterZhangShu(; thresholds, variables)
-  PositivityPreservingLimiterZhangShu(thresholds, variables)
+    PositivityPreservingLimiterZhangShu(thresholds, variables)
 end
 
-
-function (limiter!::PositivityPreservingLimiterZhangShu)(
-    u_ode, integrator, semi::AbstractSemidiscretization, t)
-  u = wrap_array(u_ode, semi)
-  @trixi_timeit timer() "positivity-preserving limiter" limiter_zhang_shu!(
-    u, limiter!.thresholds, limiter!.variables, mesh_equations_solver_cache(semi)...)
+function (limiter!::PositivityPreservingLimiterZhangShu)(u_ode, integrator,
+                                                         semi::AbstractSemidiscretization,
+                                                         t)
+    u = wrap_array(u_ode, semi)
+    @trixi_timeit timer() "positivity-preserving limiter" begin
+        limiter_zhang_shu!(u, limiter!.thresholds, limiter!.variables,
+                           mesh_equations_solver_cache(semi)...)
+    end
 end
 
-
 # Iterate over tuples in a type-stable way using "lispy tuple programming",
 # similar to https://stackoverflow.com/a/55849398:
 # Iterating over tuples of different functions isn't type-stable in general
@@ -44,28 +46,26 @@ end
 # Note that you shouldn't use this with too many elements per tuple since the
 # compile times can increase otherwise - but a handful of elements per tuple
 # is definitely fine.
-function limiter_zhang_shu!(u, thresholds::NTuple{N,<:Real}, variables::NTuple{N,Any},
+function limiter_zhang_shu!(u, thresholds::NTuple{N, <:Real}, variables::NTuple{N, Any},
                             mesh, equations, solver, cache) where {N}
-  threshold = first(thresholds)
-  remaining_thresholds = Base.tail(thresholds)
-  variable = first(variables)
-  remaining_variables = Base.tail(variables)
+    threshold = first(thresholds)
+    remaining_thresholds = Base.tail(thresholds)
+    variable = first(variables)
+    remaining_variables = Base.tail(variables)
 
-  limiter_zhang_shu!(u, threshold, variable, mesh, equations, solver, cache)
-  limiter_zhang_shu!(u, remaining_thresholds, remaining_variables, mesh, equations, solver, cache)
-  return nothing
+    limiter_zhang_shu!(u, threshold, variable, mesh, equations, solver, cache)
+    limiter_zhang_shu!(u, remaining_thresholds, remaining_variables, mesh, equations,
+                       solver, cache)
+    return nothing
 end
 
 # terminate the type-stable iteration over tuples
 function limiter_zhang_shu!(u, thresholds::Tuple{}, variables::Tuple{},
                             mesh, equations, solver, cache)
-  nothing
+    nothing
 end
 
-
 include("positivity_zhang_shu_dg1d.jl")
 include("positivity_zhang_shu_dg2d.jl")
 include("positivity_zhang_shu_dg3d.jl")
-
-
 end # @muladd
diff --git a/src/callbacks_stage/positivity_zhang_shu_dg1d.jl b/src/callbacks_stage/positivity_zhang_shu_dg1d.jl
index 50d6b3f2c31..7797eb95b09 100644
--- a/src/callbacks_stage/positivity_zhang_shu_dg1d.jl
+++ b/src/callbacks_stage/positivity_zhang_shu_dg1d.jl
@@ -3,45 +3,43 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function limiter_zhang_shu!(u, threshold::Real, variable,
                             mesh::AbstractMesh{1}, equations, dg::DGSEM, cache)
-  @unpack weights = dg.basis
-
-  @threaded for element in eachelement(dg, cache)
-    # determine minimum value
-    value_min = typemax(eltype(u))
-    for i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, element)
-      value_min = min(value_min, variable(u_node, equations))
-    end
-
-    # detect if limiting is necessary
-    value_min < threshold || continue
-
-    # compute mean value
-    u_mean = zero(get_node_vars(u, equations, dg, 1, element))
-    for i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, element)
-      u_mean += u_node * weights[i]
+    @unpack weights = dg.basis
+
+    @threaded for element in eachelement(dg, cache)
+        # determine minimum value
+        value_min = typemax(eltype(u))
+        for i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, element)
+            value_min = min(value_min, variable(u_node, equations))
+        end
+
+        # detect if limiting is necessary
+        value_min < threshold || continue
+
+        # compute mean value
+        u_mean = zero(get_node_vars(u, equations, dg, 1, element))
+        for i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, element)
+            u_mean += u_node * weights[i]
+        end
+        # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2
+        u_mean = u_mean / 2^ndims(mesh)
+
+        # We compute the value directly with the mean values, as we assume that
+        # Jensen's inequality holds (e.g. pressure for compressible Euler equations).
+        value_mean = variable(u_mean, equations)
+        theta = (value_mean - threshold) / (value_mean - value_min)
+        for i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, element)
+            set_node_vars!(u, theta * u_node + (1 - theta) * u_mean,
+                           equations, dg, i, element)
+        end
     end
-    # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2
-    u_mean = u_mean / 2^ndims(mesh)
 
-    # We compute the value directly with the mean values, as we assume that
-    # Jensen's inequality holds (e.g. pressure for compressible Euler equations).
-    value_mean = variable(u_mean, equations)
-    theta = (value_mean - threshold) / (value_mean - value_min)
-    for i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, element)
-      set_node_vars!(u, theta * u_node + (1-theta) * u_mean,
-                     equations, dg, i, element)
-    end
-  end
-
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/callbacks_stage/positivity_zhang_shu_dg2d.jl b/src/callbacks_stage/positivity_zhang_shu_dg2d.jl
index ae5b7371920..b37ed9c49d5 100644
--- a/src/callbacks_stage/positivity_zhang_shu_dg2d.jl
+++ b/src/callbacks_stage/positivity_zhang_shu_dg2d.jl
@@ -3,45 +3,43 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function limiter_zhang_shu!(u, threshold::Real, variable,
                             mesh::AbstractMesh{2}, equations, dg::DGSEM, cache)
-  @unpack weights = dg.basis
-
-  @threaded for element in eachelement(dg, cache)
-    # determine minimum value
-    value_min = typemax(eltype(u))
-    for j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, element)
-      value_min = min(value_min, variable(u_node, equations))
-    end
-
-    # detect if limiting is necessary
-    value_min < threshold || continue
-
-    # compute mean value
-    u_mean = zero(get_node_vars(u, equations, dg, 1, 1, element))
-    for j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, element)
-      u_mean += u_node * weights[i] * weights[j]
+    @unpack weights = dg.basis
+
+    @threaded for element in eachelement(dg, cache)
+        # determine minimum value
+        value_min = typemax(eltype(u))
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, element)
+            value_min = min(value_min, variable(u_node, equations))
+        end
+
+        # detect if limiting is necessary
+        value_min < threshold || continue
+
+        # compute mean value
+        u_mean = zero(get_node_vars(u, equations, dg, 1, 1, element))
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, element)
+            u_mean += u_node * weights[i] * weights[j]
+        end
+        # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2
+        u_mean = u_mean / 2^ndims(mesh)
+
+        # We compute the value directly with the mean values, as we assume that
+        # Jensen's inequality holds (e.g. pressure for compressible Euler equations).
+        value_mean = variable(u_mean, equations)
+        theta = (value_mean - threshold) / (value_mean - value_min)
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, element)
+            set_node_vars!(u, theta * u_node + (1 - theta) * u_mean,
+                           equations, dg, i, j, element)
+        end
     end
-    # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2
-    u_mean = u_mean / 2^ndims(mesh)
 
-    # We compute the value directly with the mean values, as we assume that
-    # Jensen's inequality holds (e.g. pressure for compressible Euler equations).
-    value_mean = variable(u_mean, equations)
-    theta = (value_mean - threshold) / (value_mean - value_min)
-    for j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, element)
-      set_node_vars!(u, theta * u_node + (1-theta) * u_mean,
-                     equations, dg, i, j, element)
-    end
-  end
-
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/callbacks_stage/positivity_zhang_shu_dg3d.jl b/src/callbacks_stage/positivity_zhang_shu_dg3d.jl
index d2e46dc7d88..773a236d831 100644
--- a/src/callbacks_stage/positivity_zhang_shu_dg3d.jl
+++ b/src/callbacks_stage/positivity_zhang_shu_dg3d.jl
@@ -3,45 +3,43 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function limiter_zhang_shu!(u, threshold::Real, variable,
                             mesh::AbstractMesh{3}, equations, dg::DGSEM, cache)
-  @unpack weights = dg.basis
-
-  @threaded for element in eachelement(dg, cache)
-    # determine minimum value
-    value_min = typemax(eltype(u))
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, k, element)
-      value_min = min(value_min, variable(u_node, equations))
-    end
-
-    # detect if limiting is necessary
-    value_min < threshold || continue
-
-    # compute mean value
-    u_mean = zero(get_node_vars(u, equations, dg, 1, 1, 1, element))
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, k, element)
-      u_mean += u_node * weights[i] * weights[j] * weights[k]
+    @unpack weights = dg.basis
+
+    @threaded for element in eachelement(dg, cache)
+        # determine minimum value
+        value_min = typemax(eltype(u))
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, k, element)
+            value_min = min(value_min, variable(u_node, equations))
+        end
+
+        # detect if limiting is necessary
+        value_min < threshold || continue
+
+        # compute mean value
+        u_mean = zero(get_node_vars(u, equations, dg, 1, 1, 1, element))
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, k, element)
+            u_mean += u_node * weights[i] * weights[j] * weights[k]
+        end
+        # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2
+        u_mean = u_mean / 2^ndims(mesh)
+
+        # We compute the value directly with the mean values, as we assume that
+        # Jensen's inequality holds (e.g. pressure for compressible Euler equations).
+        value_mean = variable(u_mean, equations)
+        theta = (value_mean - threshold) / (value_mean - value_min)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, k, element)
+            set_node_vars!(u, theta * u_node + (1 - theta) * u_mean,
+                           equations, dg, i, j, k, element)
+        end
     end
-    # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2
-    u_mean = u_mean / 2^ndims(mesh)
 
-    # We compute the value directly with the mean values, as we assume that
-    # Jensen's inequality holds (e.g. pressure for compressible Euler equations).
-    value_mean = variable(u_mean, equations)
-    theta = (value_mean - threshold) / (value_mean - value_min)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, k, element)
-      set_node_vars!(u, theta * u_node + (1-theta) * u_mean,
-                     equations, dg, i, j, k, element)
-    end
-  end
-
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/alive.jl b/src/callbacks_step/alive.jl
index 1417dc3bef7..eeacd9681d8 100644
--- a/src/callbacks_step/alive.jl
+++ b/src/callbacks_step/alive.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     AliveCallback(analysis_interval=0, alive_interval=analysis_interval÷10)
@@ -14,91 +14,85 @@ time steps. If `analysis_interval ≂̸ 0`, the output is omitted every
 `analysis_interval` time steps.
 """
 mutable struct AliveCallback
-  start_time::Float64
-  alive_interval::Int
-  analysis_interval::Int
+    start_time::Float64
+    alive_interval::Int
+    analysis_interval::Int
 end
 
-function AliveCallback(; analysis_interval=0,
-                         alive_interval=analysis_interval÷10)
-
-  alive_callback = AliveCallback(0.0, alive_interval, analysis_interval)
+function AliveCallback(; analysis_interval = 0,
+                       alive_interval = analysis_interval ÷ 10)
+    alive_callback = AliveCallback(0.0, alive_interval, analysis_interval)
 
-  DiscreteCallback(alive_callback, alive_callback, # the first one is the condition, the second the affect!
-                   save_positions=(false,false),
-                   initialize=initialize!)
+    DiscreteCallback(alive_callback, alive_callback, # the first one is the condition, the second the affect!
+                     save_positions = (false, false),
+                     initialize = initialize!)
 end
 
-
 function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:AliveCallback})
-  @nospecialize cb # reduce precompilation time
-
-  alive_callback = cb.affect!
-  print(io, "AliveCallback(alive_interval=", alive_callback.alive_interval, ")")
-end
-
-function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:AliveCallback})
-  @nospecialize cb # reduce precompilation time
+    @nospecialize cb # reduce precompilation time
 
-  if get(io, :compact, false)
-    show(io, cb)
-  else
     alive_callback = cb.affect!
-
-    setup = [
-             "interval" => alive_callback.alive_interval,
-            ]
-    summary_box(io, "AliveCallback", setup)
-  end
+    print(io, "AliveCallback(alive_interval=", alive_callback.alive_interval, ")")
 end
 
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{<:Any, <:AliveCallback})
+    @nospecialize cb # reduce precompilation time
 
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        alive_callback = cb.affect!
 
-function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:AliveCallback}
-
-  alive_callback = cb.affect!
-  alive_callback.start_time = time_ns()
-  return nothing
+        setup = [
+            "interval" => alive_callback.alive_interval,
+        ]
+        summary_box(io, "AliveCallback", setup)
+    end
 end
 
+function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t,
+                     integrator) where {Condition, Affect! <: AliveCallback}
+    alive_callback = cb.affect!
+    alive_callback.start_time = time_ns()
+    return nothing
+end
 
 # this method is called to determine whether the callback should be activated
 function (alive_callback::AliveCallback)(u, t, integrator)
-  @unpack alive_interval, analysis_interval = alive_callback
-
-  # With error-based step size control, some steps can be rejected. Thus,
-  #   `integrator.iter >= integrator.stats.naccept`
-  #    (total #steps)       (#accepted steps)
-  # We need to check the number of accepted steps since callbacks are not
-  # activated after a rejected step.
-  return alive_interval > 0 && (
-    (integrator.stats.naccept % alive_interval == 0 &&
-    !(integrator.stats.naccept == 0 && integrator.iter > 0) &&
-    (analysis_interval == 0 || integrator.stats.naccept % analysis_interval != 0)) ||
-    isfinished(integrator))
+    @unpack alive_interval, analysis_interval = alive_callback
+
+    # With error-based step size control, some steps can be rejected. Thus,
+    #   `integrator.iter >= integrator.stats.naccept`
+    #    (total #steps)       (#accepted steps)
+    # We need to check the number of accepted steps since callbacks are not
+    # activated after a rejected step.
+    return alive_interval > 0 && ((integrator.stats.naccept % alive_interval == 0 &&
+             !(integrator.stats.naccept == 0 && integrator.iter > 0) &&
+             (analysis_interval == 0 ||
+              integrator.stats.naccept % analysis_interval != 0)) ||
+            isfinished(integrator))
 end
 
-
 # this method is called when the callback is activated
 function (alive_callback::AliveCallback)(integrator)
-  # Checking for floating point equality is OK here as `DifferentialEquations.jl`
-  # sets the time exactly to the final time in the last iteration
-  if isfinished(integrator) && mpi_isroot()
-    println("─"^100)
-    println("Trixi.jl simulation finished.  Final time: ", integrator.t,
-            "  Time steps: ", integrator.stats.naccept, " (accepted), ", integrator.iter, " (total)")
-    println("─"^100)
-    println()
-  elseif mpi_isroot()
-    runtime_absolute = 1.0e-9 * (time_ns() - alive_callback.start_time)
-    @printf("#timesteps: %6d │ Δt: %.4e │ sim. time: %.4e │ run time: %.4e s\n",
-            integrator.stats.naccept, integrator.dt, integrator.t, runtime_absolute)
-  end
-
-  # avoid re-evaluating possible FSAL stages
-  u_modified!(integrator, false)
-  return nothing
+    # Checking for floating point equality is OK here as `DifferentialEquations.jl`
+    # sets the time exactly to the final time in the last iteration
+    if isfinished(integrator) && mpi_isroot()
+        println("─"^100)
+        println("Trixi.jl simulation finished.  Final time: ", integrator.t,
+                "  Time steps: ", integrator.stats.naccept, " (accepted), ",
+                integrator.iter, " (total)")
+        println("─"^100)
+        println()
+    elseif mpi_isroot()
+        runtime_absolute = 1.0e-9 * (time_ns() - alive_callback.start_time)
+        @printf("#timesteps: %6d │ Δt: %.4e │ sim. time: %.4e │ run time: %.4e s\n",
+                integrator.stats.naccept, integrator.dt, integrator.t, runtime_absolute)
+    end
+
+    # avoid re-evaluating possible FSAL stages
+    u_modified!(integrator, false)
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/amr.jl b/src/callbacks_step/amr.jl
index 4655a0b9ef6..d6e19b79886 100644
--- a/src/callbacks_step/amr.jl
+++ b/src/callbacks_step/amr.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     AMRCallback(semi, controller [,adaptor=AdaptorAMR(semi)];
@@ -16,64 +16,67 @@ Performs adaptive mesh refinement (AMR) every `interval` time steps
 for a given semidiscretization `semi` using the chosen `controller`.
 """
 struct AMRCallback{Controller, Adaptor, Cache}
-  controller::Controller
-  interval::Int
-  adapt_initial_condition::Bool
-  adapt_initial_condition_only_refine::Bool
-  dynamic_load_balancing::Bool
-  adaptor::Adaptor
-  amr_cache::Cache
+    controller::Controller
+    interval::Int
+    adapt_initial_condition::Bool
+    adapt_initial_condition_only_refine::Bool
+    dynamic_load_balancing::Bool
+    adaptor::Adaptor
+    amr_cache::Cache
 end
 
-
 function AMRCallback(semi, controller, adaptor;
                      interval,
-                     adapt_initial_condition=true,
-                     adapt_initial_condition_only_refine=true,
-                     dynamic_load_balancing=true)
-  # check arguments
-  if !(interval isa Integer && interval >= 0)
-    throw(ArgumentError("`interval` must be a non-negative integer (provided `interval = $interval`)"))
-  end
-
-  # AMR every `interval` time steps, but not after the final step
-  # With error-based step size control, some steps can be rejected. Thus,
-  #   `integrator.iter >= integrator.stats.naccept`
-  #    (total #steps)       (#accepted steps)
-  # We need to check the number of accepted steps since callbacks are not
-  # activated after a rejected step.
-  if interval > 0
-    condition = (u, t, integrator) -> ( (integrator.stats.naccept % interval == 0) &&
-                                        !(integrator.stats.naccept == 0 && integrator.iter > 0) &&
-                                        !isfinished(integrator) )
-  else # disable the AMR callback except possibly for initial refinement during initialization
-    condition = (u, t, integrator) -> false
-  end
-
-  to_refine  = Int[]
-  to_coarsen = Int[]
-  amr_cache = (; to_refine, to_coarsen)
-
-  amr_callback = AMRCallback{typeof(controller), typeof(adaptor), typeof(amr_cache)}(
-    controller, interval, adapt_initial_condition, adapt_initial_condition_only_refine,
-    dynamic_load_balancing, adaptor, amr_cache)
-
-  DiscreteCallback(condition, amr_callback,
-                   save_positions=(false,false),
-                   initialize=initialize!)
+                     adapt_initial_condition = true,
+                     adapt_initial_condition_only_refine = true,
+                     dynamic_load_balancing = true)
+    # check arguments
+    if !(interval isa Integer && interval >= 0)
+        throw(ArgumentError("`interval` must be a non-negative integer (provided `interval = $interval`)"))
+    end
+
+    # AMR every `interval` time steps, but not after the final step
+    # With error-based step size control, some steps can be rejected. Thus,
+    #   `integrator.iter >= integrator.stats.naccept`
+    #    (total #steps)       (#accepted steps)
+    # We need to check the number of accepted steps since callbacks are not
+    # activated after a rejected step.
+    if interval > 0
+        condition = (u, t, integrator) -> ((integrator.stats.naccept % interval == 0) &&
+                                           !(integrator.stats.naccept == 0 &&
+                                             integrator.iter > 0) &&
+                                           !isfinished(integrator))
+    else # disable the AMR callback except possibly for initial refinement during initialization
+        condition = (u, t, integrator) -> false
+    end
+
+    to_refine = Int[]
+    to_coarsen = Int[]
+    amr_cache = (; to_refine, to_coarsen)
+
+    amr_callback = AMRCallback{typeof(controller), typeof(adaptor), typeof(amr_cache)}(controller,
+                                                                                       interval,
+                                                                                       adapt_initial_condition,
+                                                                                       adapt_initial_condition_only_refine,
+                                                                                       dynamic_load_balancing,
+                                                                                       adaptor,
+                                                                                       amr_cache)
+
+    DiscreteCallback(condition, amr_callback,
+                     save_positions = (false, false),
+                     initialize = initialize!)
 end
 
 function AMRCallback(semi, controller; kwargs...)
-  adaptor = AdaptorAMR(semi)
-  AMRCallback(semi, controller, adaptor; kwargs...)
+    adaptor = AdaptorAMR(semi)
+    AMRCallback(semi, controller, adaptor; kwargs...)
 end
 
 function AdaptorAMR(semi; kwargs...)
-  mesh, _, solver, _ = mesh_equations_solver_cache(semi)
-  AdaptorAMR(mesh, solver; kwargs...)
+    mesh, _, solver, _ = mesh_equations_solver_cache(semi)
+    AdaptorAMR(mesh, solver; kwargs...)
 end
 
-
 # TODO: Taal bikeshedding, implement a method with less information and the signature
 # function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:AMRCallback})
 #   @nospecialize cb # reduce precompilation time
@@ -81,27 +84,30 @@ end
 #   amr_callback = cb.affect!
 #   print(io, "AMRCallback")
 # end
-function Base.show(io::IO, mime::MIME"text/plain", cb::DiscreteCallback{<:Any, <:AMRCallback})
-  @nospecialize cb # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, cb)
-  else
-    amr_callback = cb.affect!
+function Base.show(io::IO, mime::MIME"text/plain",
+                   cb::DiscreteCallback{<:Any, <:AMRCallback})
+    @nospecialize cb # reduce precompilation time
 
-    summary_header(io, "AMRCallback")
-    summary_line(io, "controller", amr_callback.controller |> typeof |> nameof)
-    show(increment_indent(io), mime, amr_callback.controller)
-    summary_line(io, "interval", amr_callback.interval)
-    summary_line(io, "adapt IC", amr_callback.adapt_initial_condition ? "yes" : "no",)
-    if amr_callback.adapt_initial_condition
-      summary_line(io, "│ only refine", amr_callback.adapt_initial_condition_only_refine ? "yes" : "no")
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        amr_callback = cb.affect!
+
+        summary_header(io, "AMRCallback")
+        summary_line(io, "controller", amr_callback.controller |> typeof |> nameof)
+        show(increment_indent(io), mime, amr_callback.controller)
+        summary_line(io, "interval", amr_callback.interval)
+        summary_line(io, "adapt IC",
+                     amr_callback.adapt_initial_condition ? "yes" : "no")
+        if amr_callback.adapt_initial_condition
+            summary_line(io, "│ only refine",
+                         amr_callback.adapt_initial_condition_only_refine ? "yes" :
+                         "no")
+        end
+        summary_footer(io)
     end
-    summary_footer(io)
-  end
 end
 
-
 # The function below is used to control the output depending on whether or not AMR is enabled.
 """
     uses_amr(callback)
@@ -110,37 +116,39 @@ Checks whether the provided callback or `CallbackSet` is an [`AMRCallback`](@ref
 or contains one.
 """
 uses_amr(cb) = false
-uses_amr(cb::DiscreteCallback{Condition,Affect!}) where {Condition, Affect!<:AMRCallback} = true
+function uses_amr(cb::DiscreteCallback{Condition, Affect!}) where {Condition,
+                                                                   Affect! <:
+                                                                   AMRCallback}
+    true
+end
 uses_amr(callbacks::CallbackSet) = mapreduce(uses_amr, |, callbacks.discrete_callbacks)
 
-
 function get_element_variables!(element_variables, u, mesh, equations, solver, cache,
                                 amr_callback::AMRCallback; kwargs...)
-  get_element_variables!(element_variables, u, mesh, equations, solver, cache,
-                         amr_callback.controller, amr_callback; kwargs...)
+    get_element_variables!(element_variables, u, mesh, equations, solver, cache,
+                           amr_callback.controller, amr_callback; kwargs...)
 end
 
-
-function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:AMRCallback}
-  amr_callback = cb.affect!
-  semi = integrator.p
-
-  @trixi_timeit timer() "initial condition AMR" if amr_callback.adapt_initial_condition
-    # iterate until mesh does not change anymore
-    has_changed = amr_callback(integrator,
-                               only_refine=amr_callback.adapt_initial_condition_only_refine)
-    while has_changed
-      compute_coefficients!(integrator.u, t, semi)
-      u_modified!(integrator, true)
-      has_changed = amr_callback(integrator,
-                                 only_refine=amr_callback.adapt_initial_condition_only_refine)
+function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t,
+                     integrator) where {Condition, Affect! <: AMRCallback}
+    amr_callback = cb.affect!
+    semi = integrator.p
+
+    @trixi_timeit timer() "initial condition AMR" if amr_callback.adapt_initial_condition
+        # iterate until mesh does not change anymore
+        has_changed = amr_callback(integrator,
+                                   only_refine = amr_callback.adapt_initial_condition_only_refine)
+        while has_changed
+            compute_coefficients!(integrator.u, t, semi)
+            u_modified!(integrator, true)
+            has_changed = amr_callback(integrator,
+                                       only_refine = amr_callback.adapt_initial_condition_only_refine)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # TODO: Taal remove?
 # function (cb::DiscreteCallback{Condition,Affect!})(ode::ODEProblem) where {Condition, Affect!<:AMRCallback}
 #   amr_callback = cb.affect!
@@ -159,35 +167,31 @@ end
 #   return nothing
 # end
 
-
 function (amr_callback::AMRCallback)(integrator; kwargs...)
-  u_ode = integrator.u
-  semi = integrator.p
-
-  @trixi_timeit timer() "AMR" begin
-    has_changed = amr_callback(u_ode, semi,
-                               integrator.t, integrator.iter; kwargs...)
-    if has_changed
-      resize!(integrator, length(u_ode))
-      u_modified!(integrator, true)
+    u_ode = integrator.u
+    semi = integrator.p
+
+    @trixi_timeit timer() "AMR" begin
+        has_changed = amr_callback(u_ode, semi,
+                                   integrator.t, integrator.iter; kwargs...)
+        if has_changed
+            resize!(integrator, length(u_ode))
+            u_modified!(integrator, true)
+        end
     end
-  end
 
-  return has_changed
+    return has_changed
 end
 
-
 @inline function (amr_callback::AMRCallback)(u_ode::AbstractVector,
                                              semi::SemidiscretizationHyperbolic,
                                              t, iter;
                                              kwargs...)
-  # Note that we don't `wrap_array` the vector `u_ode` to be able to `resize!`
-  # it when doing AMR while still dispatching on the `mesh` etc.
-  amr_callback(u_ode, mesh_equations_solver_cache(semi)..., semi, t, iter; kwargs...)
+    # Note that we don't `wrap_array` the vector `u_ode` to be able to `resize!`
+    # it when doing AMR while still dispatching on the `mesh` etc.
+    amr_callback(u_ode, mesh_equations_solver_cache(semi)..., semi, t, iter; kwargs...)
 end
 
-
-
 # `passive_args` is currently used for Euler with self-gravity to adapt the gravity solver
 # passively without querying its indicator, based on the assumption that both solvers use
 # the same mesh. That's a hack and should be improved in the future once we have more examples
@@ -197,292 +201,312 @@ end
 function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::TreeMesh,
                                      equations, dg::DG, cache, semi,
                                      t, iter;
-                                     only_refine=false, only_coarsen=false,
-                                     passive_args=())
-  @unpack controller, adaptor = amr_callback
-
-  u = wrap_array(u_ode, mesh, equations, dg, cache)
-  lambda = @trixi_timeit timer() "indicator" controller(u, mesh, equations, dg, cache,
-                                                 t=t, iter=iter)
-
-  if mpi_isparallel()
-    # Collect lambda for all elements
-    lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(dg, cache))
-    # Use parent because n_elements_by_rank is an OffsetArray
-    recvbuf = MPI.VBuffer(lambda_global, parent(cache.mpi_cache.n_elements_by_rank))
-    MPI.Allgatherv!(lambda, recvbuf, mpi_comm())
-    lambda = lambda_global
-  end
-
-  leaf_cell_ids = leaf_cells(mesh.tree)
-  @boundscheck begin
-   @assert axes(lambda) == axes(leaf_cell_ids) ("Indicator (axes = $(axes(lambda))) and leaf cell (axes = $(axes(leaf_cell_ids))) arrays have different axes")
-  end
-
-  @unpack to_refine, to_coarsen = amr_callback.amr_cache
-  empty!(to_refine)
-  empty!(to_coarsen)
-  for element in 1:length(lambda)
-    controller_value = lambda[element]
-    if controller_value > 0
-      push!(to_refine, leaf_cell_ids[element])
-    elseif controller_value < 0
-      push!(to_coarsen, leaf_cell_ids[element])
+                                     only_refine = false, only_coarsen = false,
+                                     passive_args = ())
+    @unpack controller, adaptor = amr_callback
+
+    u = wrap_array(u_ode, mesh, equations, dg, cache)
+    lambda = @trixi_timeit timer() "indicator" controller(u, mesh, equations, dg, cache,
+                                                          t = t, iter = iter)
+
+    if mpi_isparallel()
+        # Collect lambda for all elements
+        lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(dg, cache))
+        # Use parent because n_elements_by_rank is an OffsetArray
+        recvbuf = MPI.VBuffer(lambda_global, parent(cache.mpi_cache.n_elements_by_rank))
+        MPI.Allgatherv!(lambda, recvbuf, mpi_comm())
+        lambda = lambda_global
     end
-  end
-
-
-  @trixi_timeit timer() "refine" if !only_coarsen && !isempty(to_refine)
-    # refine mesh
-    refined_original_cells = @trixi_timeit timer() "mesh" refine!(mesh.tree, to_refine)
 
-    # Find all indices of elements whose cell ids are in refined_original_cells
-    elements_to_refine = findall(in(refined_original_cells), cache.elements.cell_ids)
-
-    # refine solver
-    @trixi_timeit timer() "solver" refine!(u_ode, adaptor, mesh, equations, dg, cache, elements_to_refine)
-    for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args
-      @trixi_timeit timer() "passive solver" refine!(p_u_ode, adaptor, p_mesh, p_equations, p_dg, p_cache, elements_to_refine)
+    leaf_cell_ids = leaf_cells(mesh.tree)
+    @boundscheck begin
+        @assert axes(lambda)==axes(leaf_cell_ids) ("Indicator (axes = $(axes(lambda))) and leaf cell (axes = $(axes(leaf_cell_ids))) arrays have different axes")
     end
-  else
-    # If there is nothing to refine, create empty array for later use
-    refined_original_cells = Int[]
-  end
-
 
-  @trixi_timeit timer() "coarsen" if !only_refine && !isempty(to_coarsen)
-    # Since the cells may have been shifted due to refinement, first we need to
-    # translate the old cell ids to the new cell ids
-    if !isempty(to_coarsen)
-      to_coarsen = original2refined(to_coarsen, refined_original_cells, mesh)
+    @unpack to_refine, to_coarsen = amr_callback.amr_cache
+    empty!(to_refine)
+    empty!(to_coarsen)
+    for element in 1:length(lambda)
+        controller_value = lambda[element]
+        if controller_value > 0
+            push!(to_refine, leaf_cell_ids[element])
+        elseif controller_value < 0
+            push!(to_coarsen, leaf_cell_ids[element])
+        end
     end
 
-    # Next, determine the parent cells from which the fine cells are to be
-    # removed, since these are needed for the coarsen! function. However, since
-    # we only want to coarsen if *all* child cells are marked for coarsening,
-    # we count the coarsening indicators for each parent cell and only coarsen
-    # if all children are marked as such (i.e., where the count is 2^ndims). At
-    # the same time, check if a cell is marked for coarsening even though it is
-    # *not* a leaf cell -> this can only happen if it was refined due to 2:1
-    # smoothing during the preceding refinement operation.
-    parents_to_coarsen = zeros(Int, length(mesh.tree))
-    for cell_id in to_coarsen
-      # If cell has no parent, it cannot be coarsened
-      if !has_parent(mesh.tree, cell_id)
-        continue
-      end
-
-      # If cell is not leaf (anymore), it cannot be coarsened
-      if !is_leaf(mesh.tree, cell_id)
-        continue
-      end
-
-      # Increase count for parent cell
-      parent_id = mesh.tree.parent_ids[cell_id]
-      parents_to_coarsen[parent_id] += 1
+    @trixi_timeit timer() "refine" if !only_coarsen && !isempty(to_refine)
+        # refine mesh
+        refined_original_cells = @trixi_timeit timer() "mesh" refine!(mesh.tree,
+                                                                      to_refine)
+
+        # Find all indices of elements whose cell ids are in refined_original_cells
+        elements_to_refine = findall(in(refined_original_cells),
+                                     cache.elements.cell_ids)
+
+        # refine solver
+        @trixi_timeit timer() "solver" refine!(u_ode, adaptor, mesh, equations, dg,
+                                               cache, elements_to_refine)
+        for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args
+            @trixi_timeit timer() "passive solver" refine!(p_u_ode, adaptor, p_mesh,
+                                                           p_equations, p_dg, p_cache,
+                                                           elements_to_refine)
+        end
+    else
+        # If there is nothing to refine, create empty array for later use
+        refined_original_cells = Int[]
     end
 
-    # Extract only those parent cells for which all children should be coarsened
-    to_coarsen = collect(1:length(parents_to_coarsen))[parents_to_coarsen .== 2^ndims(mesh)]
-
-    # Finally, coarsen mesh
-    coarsened_original_cells = @trixi_timeit timer() "mesh" coarsen!(mesh.tree, to_coarsen)
+    @trixi_timeit timer() "coarsen" if !only_refine && !isempty(to_coarsen)
+        # Since the cells may have been shifted due to refinement, first we need to
+        # translate the old cell ids to the new cell ids
+        if !isempty(to_coarsen)
+            to_coarsen = original2refined(to_coarsen, refined_original_cells, mesh)
+        end
+
+        # Next, determine the parent cells from which the fine cells are to be
+        # removed, since these are needed for the coarsen! function. However, since
+        # we only want to coarsen if *all* child cells are marked for coarsening,
+        # we count the coarsening indicators for each parent cell and only coarsen
+        # if all children are marked as such (i.e., where the count is 2^ndims). At
+        # the same time, check if a cell is marked for coarsening even though it is
+        # *not* a leaf cell -> this can only happen if it was refined due to 2:1
+        # smoothing during the preceding refinement operation.
+        parents_to_coarsen = zeros(Int, length(mesh.tree))
+        for cell_id in to_coarsen
+            # If cell has no parent, it cannot be coarsened
+            if !has_parent(mesh.tree, cell_id)
+                continue
+            end
+
+            # If cell is not leaf (anymore), it cannot be coarsened
+            if !is_leaf(mesh.tree, cell_id)
+                continue
+            end
+
+            # Increase count for parent cell
+            parent_id = mesh.tree.parent_ids[cell_id]
+            parents_to_coarsen[parent_id] += 1
+        end
+
+        # Extract only those parent cells for which all children should be coarsened
+        to_coarsen = collect(1:length(parents_to_coarsen))[parents_to_coarsen .== 2^ndims(mesh)]
+
+        # Finally, coarsen mesh
+        coarsened_original_cells = @trixi_timeit timer() "mesh" coarsen!(mesh.tree,
+                                                                         to_coarsen)
+
+        # Convert coarsened parent cell ids to the list of child cell ids that have
+        # been removed, since this is the information that is expected by the solver
+        removed_child_cells = zeros(Int,
+                                    n_children_per_cell(mesh.tree) *
+                                    length(coarsened_original_cells))
+        for (index, coarse_cell_id) in enumerate(coarsened_original_cells)
+            for child in 1:n_children_per_cell(mesh.tree)
+                removed_child_cells[n_children_per_cell(mesh.tree) * (index - 1) + child] = coarse_cell_id +
+                                                                                            child
+            end
+        end
+
+        # Find all indices of elements whose cell ids are in removed_child_cells
+        elements_to_remove = findall(in(removed_child_cells), cache.elements.cell_ids)
+
+        # coarsen solver
+        @trixi_timeit timer() "solver" coarsen!(u_ode, adaptor, mesh, equations, dg,
+                                                cache, elements_to_remove)
+        for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args
+            @trixi_timeit timer() "passive solver" coarsen!(p_u_ode, adaptor, p_mesh,
+                                                            p_equations, p_dg, p_cache,
+                                                            elements_to_remove)
+        end
+    else
+        # If there is nothing to coarsen, create empty array for later use
+        coarsened_original_cells = Int[]
+    end
 
-    # Convert coarsened parent cell ids to the list of child cell ids that have
-    # been removed, since this is the information that is expected by the solver
-    removed_child_cells = zeros(Int, n_children_per_cell(mesh.tree) * length(coarsened_original_cells))
-    for (index, coarse_cell_id) in enumerate(coarsened_original_cells)
-      for child in 1:n_children_per_cell(mesh.tree)
-        removed_child_cells[n_children_per_cell(mesh.tree) * (index-1) + child] = coarse_cell_id + child
-      end
+    # Store whether there were any cells coarsened or refined
+    has_changed = !isempty(refined_original_cells) || !isempty(coarsened_original_cells)
+    if has_changed # TODO: Taal decide, where shall we set this?
+        # don't set it to has_changed since there can be changes from earlier calls
+        mesh.unsaved_changes = true
     end
 
-    # Find all indices of elements whose cell ids are in removed_child_cells
-    elements_to_remove = findall(in(removed_child_cells), cache.elements.cell_ids)
+    # Dynamically balance computational load by first repartitioning the mesh and then redistributing the cells/elements
+    if has_changed && mpi_isparallel() && amr_callback.dynamic_load_balancing
+        @trixi_timeit timer() "dynamic load balancing" begin
+            old_mpi_ranks_per_cell = copy(mesh.tree.mpi_ranks)
 
-    # coarsen solver
-    @trixi_timeit timer() "solver" coarsen!(u_ode, adaptor, mesh, equations, dg, cache, elements_to_remove)
-    for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args
-      @trixi_timeit timer() "passive solver" coarsen!(p_u_ode, adaptor, p_mesh, p_equations, p_dg, p_cache, elements_to_remove)
-    end
-  else
-    # If there is nothing to coarsen, create empty array for later use
-    coarsened_original_cells = Int[]
-  end
-
-  # Store whether there were any cells coarsened or refined
-  has_changed = !isempty(refined_original_cells) || !isempty(coarsened_original_cells)
-  if has_changed # TODO: Taal decide, where shall we set this?
-    # don't set it to has_changed since there can be changes from earlier calls
-    mesh.unsaved_changes = true
-  end
-
-  # Dynamically balance computational load by first repartitioning the mesh and then redistributing the cells/elements
-  if has_changed && mpi_isparallel() && amr_callback.dynamic_load_balancing
-    @trixi_timeit timer() "dynamic load balancing" begin
-      old_mpi_ranks_per_cell = copy(mesh.tree.mpi_ranks)
-
-      partition!(mesh)
-
-      rebalance_solver!(u_ode, mesh, equations, dg, cache, old_mpi_ranks_per_cell)
+            partition!(mesh)
+
+            rebalance_solver!(u_ode, mesh, equations, dg, cache, old_mpi_ranks_per_cell)
+        end
     end
-  end
 
-  # Return true if there were any cells coarsened or refined, otherwise false
-  return has_changed
+    # Return true if there were any cells coarsened or refined, otherwise false
+    return has_changed
 end
 
-
 # Copy controller values to quad user data storage, will be called below
 function copy_to_quad_iter_volume(info, user_data)
-  info_obj = unsafe_load(info)
-
-  # Load tree from global trees array, one-based indexing
-  tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1)
-  # Quadrant numbering offset of this quadrant
-  offset = tree.quadrants_offset
-  # Global quad ID
-  quad_id = offset + info_obj.quadid
-
-  # Access user_data = lambda
-  user_data_ptr = Ptr{Int}(user_data)
-  # Load controller_value = lambda[quad_id + 1]
-  controller_value = unsafe_load(user_data_ptr, quad_id + 1)
-
-  # Access quadrant's user data ([global quad ID, controller_value])
-  quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data))
-  # Save controller value to quadrant's user data.
-  unsafe_store!(quad_data_ptr, controller_value, 2)
-
-  return nothing
+    info_obj = unsafe_load(info)
+
+    # Load tree from global trees array, one-based indexing
+    tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1)
+    # Quadrant numbering offset of this quadrant
+    offset = tree.quadrants_offset
+    # Global quad ID
+    quad_id = offset + info_obj.quadid
+
+    # Access user_data = lambda
+    user_data_ptr = Ptr{Int}(user_data)
+    # Load controller_value = lambda[quad_id + 1]
+    controller_value = unsafe_load(user_data_ptr, quad_id + 1)
+
+    # Access quadrant's user data ([global quad ID, controller_value])
+    quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data))
+    # Save controller value to quadrant's user data.
+    unsafe_store!(quad_data_ptr, controller_value, 2)
+
+    return nothing
 end
 
 # 2D
-cfunction(::typeof(copy_to_quad_iter_volume), ::Val{2}) = @cfunction(copy_to_quad_iter_volume, Cvoid, (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(copy_to_quad_iter_volume), ::Val{2})
+    @cfunction(copy_to_quad_iter_volume, Cvoid,
+               (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid}))
+end
 # 3D
-cfunction(::typeof(copy_to_quad_iter_volume), ::Val{3}) = @cfunction(copy_to_quad_iter_volume, Cvoid, (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(copy_to_quad_iter_volume), ::Val{3})
+    @cfunction(copy_to_quad_iter_volume, Cvoid,
+               (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid}))
+end
 
 function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::P4estMesh,
                                      equations, dg::DG, cache, semi,
                                      t, iter;
-                                     only_refine=false, only_coarsen=false,
-                                     passive_args=())
-  @unpack controller, adaptor = amr_callback
-
-  u = wrap_array(u_ode, mesh, equations, dg, cache)
-  lambda = @trixi_timeit timer() "indicator" controller(u, mesh, equations, dg, cache,
-                                                 t=t, iter=iter)
-
-  @boundscheck begin
-    @assert axes(lambda) == (Base.OneTo(ncells(mesh)),) (
-      "Indicator array (axes = $(axes(lambda))) and mesh cells (axes = $(Base.OneTo(ncells(mesh)))) have different axes"
-    )
-  end
-
-  # Copy controller value of each quad to the quad's user data storage
-  iter_volume_c = cfunction(copy_to_quad_iter_volume, Val(ndims(mesh)))
-
-  # The pointer to lambda will be interpreted as Ptr{Int} above
-  @assert lambda isa Vector{Int}
-  iterate_p4est(mesh.p4est, lambda; iter_volume_c=iter_volume_c)
-
-  @trixi_timeit timer() "refine" if !only_coarsen
-    # Refine mesh
-    refined_original_cells = @trixi_timeit timer() "mesh" refine!(mesh)
-
-    # Refine solver
-    @trixi_timeit timer() "solver" refine!(u_ode, adaptor, mesh, equations, dg, cache,
-                                    refined_original_cells)
-    for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args
-      @trixi_timeit timer() "passive solver" refine!(p_u_ode, adaptor, p_mesh, p_equations,
-                                              p_dg, p_cache, refined_original_cells)
+                                     only_refine = false, only_coarsen = false,
+                                     passive_args = ())
+    @unpack controller, adaptor = amr_callback
+
+    u = wrap_array(u_ode, mesh, equations, dg, cache)
+    lambda = @trixi_timeit timer() "indicator" controller(u, mesh, equations, dg, cache,
+                                                          t = t, iter = iter)
+
+    @boundscheck begin
+        @assert axes(lambda)==(Base.OneTo(ncells(mesh)),) ("Indicator array (axes = $(axes(lambda))) and mesh cells (axes = $(Base.OneTo(ncells(mesh)))) have different axes")
     end
-  else
-    # If there is nothing to refine, create empty array for later use
-    refined_original_cells = Int[]
-  end
-
-  @trixi_timeit timer() "coarsen" if !only_refine
-    # Coarsen mesh
-    coarsened_original_cells = @trixi_timeit timer() "mesh" coarsen!(mesh)
-
-    # coarsen solver
-    @trixi_timeit timer() "solver" coarsen!(u_ode, adaptor, mesh, equations, dg, cache,
-                                     coarsened_original_cells)
-    for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args
-      @trixi_timeit timer() "passive solver" coarsen!(p_u_ode, adaptor, p_mesh, p_equations,
-                                               p_dg, p_cache, coarsened_original_cells)
+
+    # Copy controller value of each quad to the quad's user data storage
+    iter_volume_c = cfunction(copy_to_quad_iter_volume, Val(ndims(mesh)))
+
+    # The pointer to lambda will be interpreted as Ptr{Int} above
+    @assert lambda isa Vector{Int}
+    iterate_p4est(mesh.p4est, lambda; iter_volume_c = iter_volume_c)
+
+    @trixi_timeit timer() "refine" if !only_coarsen
+        # Refine mesh
+        refined_original_cells = @trixi_timeit timer() "mesh" refine!(mesh)
+
+        # Refine solver
+        @trixi_timeit timer() "solver" refine!(u_ode, adaptor, mesh, equations, dg,
+                                               cache,
+                                               refined_original_cells)
+        for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args
+            @trixi_timeit timer() "passive solver" refine!(p_u_ode, adaptor, p_mesh,
+                                                           p_equations,
+                                                           p_dg, p_cache,
+                                                           refined_original_cells)
+        end
+    else
+        # If there is nothing to refine, create empty array for later use
+        refined_original_cells = Int[]
     end
-  else
-    # If there is nothing to coarsen, create empty array for later use
-    coarsened_original_cells = Int[]
-  end
-
-  # Store whether there were any cells coarsened or refined and perform load balancing
-  has_changed = !isempty(refined_original_cells) || !isempty(coarsened_original_cells)
-  # Check if mesh changed on other processes
-  if mpi_isparallel()
-    has_changed = MPI.Allreduce!(Ref(has_changed), |, mpi_comm())[]
-  end
-
-  if has_changed # TODO: Taal decide, where shall we set this?
-    # don't set it to has_changed since there can be changes from earlier calls
-    mesh.unsaved_changes = true
-
-    if mpi_isparallel() && amr_callback.dynamic_load_balancing
-      @trixi_timeit timer() "dynamic load balancing" begin
-        global_first_quadrant = unsafe_wrap(Array, unsafe_load(mesh.p4est).global_first_quadrant, mpi_nranks() + 1)
-        old_global_first_quadrant = copy(global_first_quadrant)
-        partition!(mesh)
-        rebalance_solver!(u_ode, mesh, equations, dg, cache, old_global_first_quadrant)
-      end
+
+    @trixi_timeit timer() "coarsen" if !only_refine
+        # Coarsen mesh
+        coarsened_original_cells = @trixi_timeit timer() "mesh" coarsen!(mesh)
+
+        # coarsen solver
+        @trixi_timeit timer() "solver" coarsen!(u_ode, adaptor, mesh, equations, dg,
+                                                cache,
+                                                coarsened_original_cells)
+        for (p_u_ode, p_mesh, p_equations, p_dg, p_cache) in passive_args
+            @trixi_timeit timer() "passive solver" coarsen!(p_u_ode, adaptor, p_mesh,
+                                                            p_equations,
+                                                            p_dg, p_cache,
+                                                            coarsened_original_cells)
+        end
+    else
+        # If there is nothing to coarsen, create empty array for later use
+        coarsened_original_cells = Int[]
     end
 
-    reinitialize_boundaries!(semi.boundary_conditions, cache)
-  end
+    # Store whether there were any cells coarsened or refined and perform load balancing
+    has_changed = !isempty(refined_original_cells) || !isempty(coarsened_original_cells)
+    # Check if mesh changed on other processes
+    if mpi_isparallel()
+        has_changed = MPI.Allreduce!(Ref(has_changed), |, mpi_comm())[]
+    end
 
-  # Return true if there were any cells coarsened or refined, otherwise false
-  return has_changed
+    if has_changed # TODO: Taal decide, where shall we set this?
+        # don't set it to has_changed since there can be changes from earlier calls
+        mesh.unsaved_changes = true
+
+        if mpi_isparallel() && amr_callback.dynamic_load_balancing
+            @trixi_timeit timer() "dynamic load balancing" begin
+                global_first_quadrant = unsafe_wrap(Array,
+                                                    unsafe_load(mesh.p4est).global_first_quadrant,
+                                                    mpi_nranks() + 1)
+                old_global_first_quadrant = copy(global_first_quadrant)
+                partition!(mesh)
+                rebalance_solver!(u_ode, mesh, equations, dg, cache,
+                                  old_global_first_quadrant)
+            end
+        end
+
+        reinitialize_boundaries!(semi.boundary_conditions, cache)
+    end
+
+    # Return true if there were any cells coarsened or refined, otherwise false
+    return has_changed
 end
 
-function reinitialize_boundaries!(boundary_conditions::UnstructuredSortedBoundaryTypes, cache)
-  # Reinitialize boundary types container because boundaries may have changed.
-  initialize!(boundary_conditions, cache)
+function reinitialize_boundaries!(boundary_conditions::UnstructuredSortedBoundaryTypes,
+                                  cache)
+    # Reinitialize boundary types container because boundaries may have changed.
+    initialize!(boundary_conditions, cache)
 end
 
 function reinitialize_boundaries!(boundary_conditions, cache)
-  return boundary_conditions
+    return boundary_conditions
 end
 
-
 # After refining cells, shift original cell ids to match new locations
 # Note: Assumes sorted lists of original and refined cell ids!
 # Note: `mesh` is only required to extract ndims
 function original2refined(original_cell_ids, refined_original_cells, mesh)
-  # Sanity check
-  @assert issorted(original_cell_ids) "`original_cell_ids` not sorted"
-  @assert issorted(refined_original_cells) "`refined_cell_ids` not sorted"
-
-  # Create array with original cell ids (not yet shifted)
-  shifted_cell_ids = collect(1:original_cell_ids[end])
-
-  # Loop over refined original cells and apply shift for all following cells
-  for cell_id in refined_original_cells
-    # Only calculate shifts for cell ids that are relevant
-    if cell_id > length(shifted_cell_ids)
-      break
+    # Sanity check
+    @assert issorted(original_cell_ids) "`original_cell_ids` not sorted"
+    @assert issorted(refined_original_cells) "`refined_cell_ids` not sorted"
+
+    # Create array with original cell ids (not yet shifted)
+    shifted_cell_ids = collect(1:original_cell_ids[end])
+
+    # Loop over refined original cells and apply shift for all following cells
+    for cell_id in refined_original_cells
+        # Only calculate shifts for cell ids that are relevant
+        if cell_id > length(shifted_cell_ids)
+            break
+        end
+
+        # Shift all subsequent cells by 2^ndims ids
+        shifted_cell_ids[(cell_id + 1):end] .+= 2^ndims(mesh)
     end
 
-    # Shift all subsequent cells by 2^ndims ids
-    shifted_cell_ids[(cell_id + 1):end] .+= 2^ndims(mesh)
-  end
-
-  # Convert original cell ids to their shifted values
-  return shifted_cell_ids[original_cell_ids]
+    # Convert original cell ids to their shifted values
+    return shifted_cell_ids[original_cell_ids]
 end
 
-
-
 """
     ControllerThreeLevel(semi, indicator; base_level=1,
                                           med_level=base_level, med_threshold=0.0,
@@ -494,161 +518,169 @@ An AMR controller based on three levels (in descending order of precedence):
   if `med_level < 0`, set the target level to the current level
 - set the target level to `base_level` otherwise
 """
-struct ControllerThreeLevel{RealT<:Real, Indicator, Cache}
-  base_level::Int
-  med_level ::Int
-  max_level ::Int
-  med_threshold::RealT
-  max_threshold::RealT
-  indicator::Indicator
-  cache::Cache
+struct ControllerThreeLevel{RealT <: Real, Indicator, Cache}
+    base_level::Int
+    med_level::Int
+    max_level::Int
+    med_threshold::RealT
+    max_threshold::RealT
+    indicator::Indicator
+    cache::Cache
 end
 
-function ControllerThreeLevel(semi, indicator; base_level=1,
-                                               med_level=base_level, med_threshold=0.0,
-                                               max_level=base_level, max_threshold=1.0)
-  med_threshold, max_threshold = promote(med_threshold, max_threshold)
-  cache = create_cache(ControllerThreeLevel, semi)
-  ControllerThreeLevel{typeof(max_threshold), typeof(indicator), typeof(cache)}(
-    base_level, med_level, max_level, med_threshold, max_threshold, indicator, cache)
+function ControllerThreeLevel(semi, indicator; base_level = 1,
+                              med_level = base_level, med_threshold = 0.0,
+                              max_level = base_level, max_threshold = 1.0)
+    med_threshold, max_threshold = promote(med_threshold, max_threshold)
+    cache = create_cache(ControllerThreeLevel, semi)
+    ControllerThreeLevel{typeof(max_threshold), typeof(indicator), typeof(cache)}(base_level,
+                                                                                  med_level,
+                                                                                  max_level,
+                                                                                  med_threshold,
+                                                                                  max_threshold,
+                                                                                  indicator,
+                                                                                  cache)
 end
 
-create_cache(indicator_type::Type{ControllerThreeLevel}, semi) = create_cache(indicator_type, mesh_equations_solver_cache(semi)...)
-
+function create_cache(indicator_type::Type{ControllerThreeLevel}, semi)
+    create_cache(indicator_type, mesh_equations_solver_cache(semi)...)
+end
 
 function Base.show(io::IO, controller::ControllerThreeLevel)
-  @nospecialize controller # reduce precompilation time
-
-  print(io, "ControllerThreeLevel(")
-  print(io, controller.indicator)
-  print(io, ", base_level=", controller.base_level)
-  print(io, ", med_level=",  controller.med_level)
-  print(io, ", max_level=",  controller.max_level)
-  print(io, ", med_threshold=", controller.med_threshold)
-  print(io, ", max_threshold=", controller.max_threshold)
-  print(io, ")")
+    @nospecialize controller # reduce precompilation time
+
+    print(io, "ControllerThreeLevel(")
+    print(io, controller.indicator)
+    print(io, ", base_level=", controller.base_level)
+    print(io, ", med_level=", controller.med_level)
+    print(io, ", max_level=", controller.max_level)
+    print(io, ", med_threshold=", controller.med_threshold)
+    print(io, ", max_threshold=", controller.max_threshold)
+    print(io, ")")
 end
 
 function Base.show(io::IO, mime::MIME"text/plain", controller::ControllerThreeLevel)
-  @nospecialize controller # reduce precompilation time
+    @nospecialize controller # reduce precompilation time
 
-  if get(io, :compact, false)
-    show(io, controller)
-  else
-    summary_header(io, "ControllerThreeLevel")
-    summary_line(io, "indicator", controller.indicator |> typeof |> nameof)
-    show(increment_indent(io), mime, controller.indicator)
-    summary_line(io, "base_level", controller.base_level)
-    summary_line(io, "med_level", controller.med_level)
-    summary_line(io, "max_level", controller.max_level)
-    summary_line(io, "med_threshold", controller.med_threshold)
-    summary_line(io, "max_threshold", controller.max_threshold)
-    summary_footer(io)
-  end
+    if get(io, :compact, false)
+        show(io, controller)
+    else
+        summary_header(io, "ControllerThreeLevel")
+        summary_line(io, "indicator", controller.indicator |> typeof |> nameof)
+        show(increment_indent(io), mime, controller.indicator)
+        summary_line(io, "base_level", controller.base_level)
+        summary_line(io, "med_level", controller.med_level)
+        summary_line(io, "max_level", controller.max_level)
+        summary_line(io, "med_threshold", controller.med_threshold)
+        summary_line(io, "max_threshold", controller.max_threshold)
+        summary_footer(io)
+    end
 end
 
-
 function get_element_variables!(element_variables, u, mesh, equations, solver, cache,
-                                controller::ControllerThreeLevel, amr_callback::AMRCallback;
+                                controller::ControllerThreeLevel,
+                                amr_callback::AMRCallback;
                                 kwargs...)
-  # call the indicator to get up-to-date values for IO
-  controller.indicator(u, mesh, equations, solver, cache; kwargs...)
-  get_element_variables!(element_variables, controller.indicator, amr_callback)
+    # call the indicator to get up-to-date values for IO
+    controller.indicator(u, mesh, equations, solver, cache; kwargs...)
+    get_element_variables!(element_variables, controller.indicator, amr_callback)
 end
 
-function get_element_variables!(element_variables, indicator::AbstractIndicator, ::AMRCallback)
-  element_variables[:indicator_amr] = indicator.cache.alpha
-  return nothing
+function get_element_variables!(element_variables, indicator::AbstractIndicator,
+                                ::AMRCallback)
+    element_variables[:indicator_amr] = indicator.cache.alpha
+    return nothing
 end
 
-
 function current_element_levels(mesh::TreeMesh, solver, cache)
-  cell_ids = cache.elements.cell_ids[eachelement(solver, cache)]
+    cell_ids = cache.elements.cell_ids[eachelement(solver, cache)]
 
-  return mesh.tree.levels[cell_ids]
+    return mesh.tree.levels[cell_ids]
 end
 
-
 function extract_levels_iter_volume(info, user_data)
-  info_obj = unsafe_load(info)
+    info_obj = unsafe_load(info)
 
-  # Load tree from global trees array, one-based indexing
-  tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1)
-  # Quadrant numbering offset of this quadrant
-  offset = tree.quadrants_offset
-  # Global quad ID
-  quad_id = offset + info_obj.quadid
-  # Julia element ID
-  element_id = quad_id + 1
+    # Load tree from global trees array, one-based indexing
+    tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1)
+    # Quadrant numbering offset of this quadrant
+    offset = tree.quadrants_offset
+    # Global quad ID
+    quad_id = offset + info_obj.quadid
+    # Julia element ID
+    element_id = quad_id + 1
 
-  current_level = unsafe_load(info_obj.quad.level)
+    current_level = unsafe_load(info_obj.quad.level)
 
-  # Unpack user_data = current_levels and save current element level
-  ptr = Ptr{Int}(user_data)
-  unsafe_store!(ptr, current_level, element_id)
+    # Unpack user_data = current_levels and save current element level
+    ptr = Ptr{Int}(user_data)
+    unsafe_store!(ptr, current_level, element_id)
 
-  return nothing
+    return nothing
 end
 
 # 2D
-cfunction(::typeof(extract_levels_iter_volume), ::Val{2}) = @cfunction(extract_levels_iter_volume, Cvoid, (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(extract_levels_iter_volume), ::Val{2})
+    @cfunction(extract_levels_iter_volume, Cvoid,
+               (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid}))
+end
 # 3D
-cfunction(::typeof(extract_levels_iter_volume), ::Val{3}) = @cfunction(extract_levels_iter_volume, Cvoid, (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(extract_levels_iter_volume), ::Val{3})
+    @cfunction(extract_levels_iter_volume, Cvoid,
+               (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid}))
+end
 
 function current_element_levels(mesh::P4estMesh, solver, cache)
-  current_levels = Vector{Int}(undef, nelements(solver, cache))
+    current_levels = Vector{Int}(undef, nelements(solver, cache))
 
-  iter_volume_c = cfunction(extract_levels_iter_volume, Val(ndims(mesh)))
-  iterate_p4est(mesh.p4est, current_levels; iter_volume_c=iter_volume_c)
+    iter_volume_c = cfunction(extract_levels_iter_volume, Val(ndims(mesh)))
+    iterate_p4est(mesh.p4est, current_levels; iter_volume_c = iter_volume_c)
 
-  return current_levels
+    return current_levels
 end
 
-
 # TODO: Taal refactor, merge the two loops of ControllerThreeLevel and IndicatorLöhner etc.?
 #       But that would remove the simplest possibility to write that stuff to a file...
 #       We could of course implement some additional logic and workarounds, but is it worth the effort?
 function (controller::ControllerThreeLevel)(u::AbstractArray{<:Any},
                                             mesh, equations, dg::DG, cache;
                                             kwargs...)
-
-  @unpack controller_value = controller.cache
-  resize!(controller_value, nelements(dg, cache))
-
-  alpha = controller.indicator(u, mesh, equations, dg, cache; kwargs...)
-  current_levels = current_element_levels(mesh, dg, cache)
-
-  @threaded for element in eachelement(dg, cache)
-    current_level = current_levels[element]
-
-    # set target level
-    target_level = current_level
-    if alpha[element] > controller.max_threshold
-      target_level = controller.max_level
-    elseif alpha[element] > controller.med_threshold
-      if controller.med_level > 0
-        target_level = controller.med_level
-        # otherwise, target_level = current_level
-        # set med_level = -1 to implicitly use med_level = current_level
-      end
-    else
-      target_level = controller.base_level
-    end
-
-    # compare target level with actual level to set controller
-    if current_level < target_level
-      controller_value[element] = 1 # refine!
-    elseif current_level > target_level
-      controller_value[element] = -1 # coarsen!
-    else
-      controller_value[element] = 0 # we're good
+    @unpack controller_value = controller.cache
+    resize!(controller_value, nelements(dg, cache))
+
+    alpha = controller.indicator(u, mesh, equations, dg, cache; kwargs...)
+    current_levels = current_element_levels(mesh, dg, cache)
+
+    @threaded for element in eachelement(dg, cache)
+        current_level = current_levels[element]
+
+        # set target level
+        target_level = current_level
+        if alpha[element] > controller.max_threshold
+            target_level = controller.max_level
+        elseif alpha[element] > controller.med_threshold
+            if controller.med_level > 0
+                target_level = controller.med_level
+                # otherwise, target_level = current_level
+                # set med_level = -1 to implicitly use med_level = current_level
+            end
+        else
+            target_level = controller.base_level
+        end
+
+        # compare target level with actual level to set controller
+        if current_level < target_level
+            controller_value[element] = 1 # refine!
+        elseif current_level > target_level
+            controller_value[element] = -1 # coarsen!
+        else
+            controller_value[element] = 0 # we're good
+        end
     end
-  end
 
-  return controller_value
+    return controller_value
 end
 
-
 """
     ControllerThreeLevelCombined(semi, indicator_primary, indicator_secondary;
                                  base_level=1,
@@ -664,129 +696,139 @@ An AMR controller based on three levels (in descending order of precedence):
 If `indicator_secondary >= max_threshold_secondary`,
 set the target level to `max_level`.
 """
-struct ControllerThreeLevelCombined{RealT<:Real, IndicatorPrimary, IndicatorSecondary, Cache}
-  base_level::Int
-  med_level ::Int
-  max_level ::Int
-  med_threshold::RealT
-  max_threshold::RealT
-  max_threshold_secondary::RealT
-  indicator_primary::IndicatorPrimary
-  indicator_secondary::IndicatorSecondary
-  cache::Cache
+struct ControllerThreeLevelCombined{RealT <: Real, IndicatorPrimary, IndicatorSecondary,
+                                    Cache}
+    base_level::Int
+    med_level::Int
+    max_level::Int
+    med_threshold::RealT
+    max_threshold::RealT
+    max_threshold_secondary::RealT
+    indicator_primary::IndicatorPrimary
+    indicator_secondary::IndicatorSecondary
+    cache::Cache
 end
 
 function ControllerThreeLevelCombined(semi, indicator_primary, indicator_secondary;
-                                      base_level=1,
-                                      med_level=base_level, med_threshold=0.0,
-                                      max_level=base_level, max_threshold=1.0,
-                                      max_threshold_secondary=1.0)
-  med_threshold, max_threshold, max_threshold_secondary = promote(med_threshold, max_threshold, max_threshold_secondary)
-  cache = create_cache(ControllerThreeLevelCombined, semi)
-  ControllerThreeLevelCombined{typeof(max_threshold), typeof(indicator_primary), typeof(indicator_secondary), typeof(cache)}(
-    base_level, med_level, max_level, med_threshold, max_threshold,
-    max_threshold_secondary, indicator_primary, indicator_secondary, cache)
+                                      base_level = 1,
+                                      med_level = base_level, med_threshold = 0.0,
+                                      max_level = base_level, max_threshold = 1.0,
+                                      max_threshold_secondary = 1.0)
+    med_threshold, max_threshold, max_threshold_secondary = promote(med_threshold,
+                                                                    max_threshold,
+                                                                    max_threshold_secondary)
+    cache = create_cache(ControllerThreeLevelCombined, semi)
+    ControllerThreeLevelCombined{typeof(max_threshold), typeof(indicator_primary),
+                                 typeof(indicator_secondary), typeof(cache)}(base_level,
+                                                                             med_level,
+                                                                             max_level,
+                                                                             med_threshold,
+                                                                             max_threshold,
+                                                                             max_threshold_secondary,
+                                                                             indicator_primary,
+                                                                             indicator_secondary,
+                                                                             cache)
 end
 
-create_cache(indicator_type::Type{ControllerThreeLevelCombined}, semi) = create_cache(indicator_type, mesh_equations_solver_cache(semi)...)
-
+function create_cache(indicator_type::Type{ControllerThreeLevelCombined}, semi)
+    create_cache(indicator_type, mesh_equations_solver_cache(semi)...)
+end
 
 function Base.show(io::IO, controller::ControllerThreeLevelCombined)
-  @nospecialize controller # reduce precompilation time
-
-  print(io, "ControllerThreeLevelCombined(")
-  print(io, controller.indicator_primary)
-  print(io, ", ", controller.indicator_secondary)
-  print(io, ", base_level=", controller.base_level)
-  print(io, ", med_level=",  controller.med_level)
-  print(io, ", max_level=",  controller.max_level)
-  print(io, ", med_threshold=", controller.med_threshold)
-  print(io, ", max_threshold_secondary=", controller.max_threshold_secondary)
-  print(io, ")")
-end
-
-function Base.show(io::IO, mime::MIME"text/plain", controller::ControllerThreeLevelCombined)
-  @nospecialize controller # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, controller)
-  else
-    summary_header(io, "ControllerThreeLevelCombined")
-    summary_line(io, "primary indicator", controller.indicator_primary |> typeof |> nameof)
-    show(increment_indent(io), mime, controller.indicator_primary)
-    summary_line(io, "secondary indicator", controller.indicator_secondary |> typeof |> nameof)
-    show(increment_indent(io), mime, controller.indicator_secondary)
-    summary_line(io, "base_level", controller.base_level)
-    summary_line(io, "med_level", controller.med_level)
-    summary_line(io, "max_level", controller.max_level)
-    summary_line(io, "med_threshold", controller.med_threshold)
-    summary_line(io, "max_threshold", controller.max_threshold)
-    summary_line(io, "max_threshold_secondary", controller.max_threshold_secondary)
-    summary_footer(io)
-  end
+    @nospecialize controller # reduce precompilation time
+
+    print(io, "ControllerThreeLevelCombined(")
+    print(io, controller.indicator_primary)
+    print(io, ", ", controller.indicator_secondary)
+    print(io, ", base_level=", controller.base_level)
+    print(io, ", med_level=", controller.med_level)
+    print(io, ", max_level=", controller.max_level)
+    print(io, ", med_threshold=", controller.med_threshold)
+    print(io, ", max_threshold_secondary=", controller.max_threshold_secondary)
+    print(io, ")")
 end
 
+function Base.show(io::IO, mime::MIME"text/plain",
+                   controller::ControllerThreeLevelCombined)
+    @nospecialize controller # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, controller)
+    else
+        summary_header(io, "ControllerThreeLevelCombined")
+        summary_line(io, "primary indicator",
+                     controller.indicator_primary |> typeof |> nameof)
+        show(increment_indent(io), mime, controller.indicator_primary)
+        summary_line(io, "secondary indicator",
+                     controller.indicator_secondary |> typeof |> nameof)
+        show(increment_indent(io), mime, controller.indicator_secondary)
+        summary_line(io, "base_level", controller.base_level)
+        summary_line(io, "med_level", controller.med_level)
+        summary_line(io, "max_level", controller.max_level)
+        summary_line(io, "med_threshold", controller.med_threshold)
+        summary_line(io, "max_threshold", controller.max_threshold)
+        summary_line(io, "max_threshold_secondary", controller.max_threshold_secondary)
+        summary_footer(io)
+    end
+end
 
 function get_element_variables!(element_variables, u, mesh, equations, solver, cache,
-                                controller::ControllerThreeLevelCombined, amr_callback::AMRCallback;
+                                controller::ControllerThreeLevelCombined,
+                                amr_callback::AMRCallback;
                                 kwargs...)
-  # call the indicator to get up-to-date values for IO
-  controller.indicator_primary(u, mesh, equations, solver, cache; kwargs...)
-  get_element_variables!(element_variables, controller.indicator_primary, amr_callback)
+    # call the indicator to get up-to-date values for IO
+    controller.indicator_primary(u, mesh, equations, solver, cache; kwargs...)
+    get_element_variables!(element_variables, controller.indicator_primary,
+                           amr_callback)
 end
 
-
 function (controller::ControllerThreeLevelCombined)(u::AbstractArray{<:Any},
                                                     mesh, equations, dg::DG, cache;
                                                     kwargs...)
-
-  @unpack controller_value = controller.cache
-  resize!(controller_value, nelements(dg, cache))
-
-  alpha = controller.indicator_primary(u, mesh, equations, dg, cache; kwargs...)
-  alpha_secondary = controller.indicator_secondary(u, mesh, equations, dg, cache)
-
-  current_levels = current_element_levels(mesh, dg, cache)
-
-  @threaded for element in eachelement(dg, cache)
-    current_level = current_levels[element]
-
-    # set target level
-    target_level = current_level
-    if alpha[element] > controller.max_threshold
-      target_level = controller.max_level
-    elseif alpha[element] > controller.med_threshold
-      if controller.med_level > 0
-        target_level = controller.med_level
-        # otherwise, target_level = current_level
-        # set med_level = -1 to implicitly use med_level = current_level
-      end
-    else
-      target_level = controller.base_level
+    @unpack controller_value = controller.cache
+    resize!(controller_value, nelements(dg, cache))
+
+    alpha = controller.indicator_primary(u, mesh, equations, dg, cache; kwargs...)
+    alpha_secondary = controller.indicator_secondary(u, mesh, equations, dg, cache)
+
+    current_levels = current_element_levels(mesh, dg, cache)
+
+    @threaded for element in eachelement(dg, cache)
+        current_level = current_levels[element]
+
+        # set target level
+        target_level = current_level
+        if alpha[element] > controller.max_threshold
+            target_level = controller.max_level
+        elseif alpha[element] > controller.med_threshold
+            if controller.med_level > 0
+                target_level = controller.med_level
+                # otherwise, target_level = current_level
+                # set med_level = -1 to implicitly use med_level = current_level
+            end
+        else
+            target_level = controller.base_level
+        end
+
+        if alpha_secondary[element] >= controller.max_threshold_secondary
+            target_level = controller.max_level
+        end
+
+        # compare target level with actual level to set controller
+        if current_level < target_level
+            controller_value[element] = 1 # refine!
+        elseif current_level > target_level
+            controller_value[element] = -1 # coarsen!
+        else
+            controller_value[element] = 0 # we're good
+        end
     end
 
-    if alpha_secondary[element] >= controller.max_threshold_secondary
-      target_level = controller.max_level
-    end
-
-    # compare target level with actual level to set controller
-    if current_level < target_level
-      controller_value[element] = 1 # refine!
-    elseif current_level > target_level
-      controller_value[element] = -1 # coarsen!
-    else
-      controller_value[element] = 0 # we're good
-    end
-  end
-
-  return controller_value
+    return controller_value
 end
 
-
 include("amr_dg.jl")
 include("amr_dg1d.jl")
 include("amr_dg2d.jl")
 include("amr_dg3d.jl")
-
-
 end # @muladd
diff --git a/src/callbacks_step/amr_dg.jl b/src/callbacks_step/amr_dg.jl
index 239b83cb562..19bbebd9254 100644
--- a/src/callbacks_step/amr_dg.jl
+++ b/src/callbacks_step/amr_dg.jl
@@ -3,75 +3,89 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Redistribute data for load balancing after partitioning the mesh
 function rebalance_solver!(u_ode::AbstractVector, mesh::ParallelP4estMesh, equations,
                            dg::DGSEM, cache, old_global_first_quadrant)
-  # mpi ranks are 0-based, this array uses 1-based indices
-  global_first_quadrant = unsafe_wrap(Array, unsafe_load(mesh.p4est).global_first_quadrant, mpi_nranks() + 1)
-  if global_first_quadrant[mpi_rank()+1] == old_global_first_quadrant[mpi_rank()+1] &&
-     global_first_quadrant[mpi_rank()+2] == old_global_first_quadrant[mpi_rank()+2]
-    # Global ids of first and last local quadrants are the same for newly partitioned mesh so the
-    # solver does not need to be rebalanced on this rank.
-    # Container init uses all-to-all communication -> reinitialize even if there is nothing to do
-    # locally (there are other MPI ranks that need to be rebalanced if this function is called)
-    reinitialize_containers!(mesh, equations, dg, cache)
-    return
-  end
-  # Retain current solution data
-  old_n_elements = nelements(dg, cache)
-  old_u_ode = copy(u_ode)
-  GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
-    # Use `wrap_array_native` instead of `wrap_array` since MPI might not interact
-    # nicely with non-base array types
-    old_u = wrap_array_native(old_u_ode, mesh, equations, dg, cache)
-
-    @trixi_timeit timer() "reinitialize data structures" reinitialize_containers!(mesh, equations, dg, cache)
-
-    resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
-    u = wrap_array_native(u_ode, mesh, equations, dg, cache)
+    # mpi ranks are 0-based, this array uses 1-based indices
+    global_first_quadrant = unsafe_wrap(Array,
+                                        unsafe_load(mesh.p4est).global_first_quadrant,
+                                        mpi_nranks() + 1)
+    if global_first_quadrant[mpi_rank() + 1] ==
+       old_global_first_quadrant[mpi_rank() + 1] &&
+       global_first_quadrant[mpi_rank() + 2] ==
+       old_global_first_quadrant[mpi_rank() + 2]
+        # Global ids of first and last local quadrants are the same for newly partitioned mesh so the
+        # solver does not need to be rebalanced on this rank.
+        # Container init uses all-to-all communication -> reinitialize even if there is nothing to do
+        # locally (there are other MPI ranks that need to be rebalanced if this function is called)
+        reinitialize_containers!(mesh, equations, dg, cache)
+        return
+    end
+    # Retain current solution data
+    old_n_elements = nelements(dg, cache)
+    old_u_ode = copy(u_ode)
+    GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
+        # Use `wrap_array_native` instead of `wrap_array` since MPI might not interact
+        # nicely with non-base array types
+        old_u = wrap_array_native(old_u_ode, mesh, equations, dg, cache)
 
-    @trixi_timeit timer() "exchange data" begin
-      # Collect MPI requests for MPI_Waitall
-      requests = Vector{MPI.Request}()
-      # Find elements that will change their rank and send their data to the new rank
-      for old_element_id in 1:old_n_elements
-        # Get global quad ID of old element; local quad id is element id - 1
-        global_quad_id = old_global_first_quadrant[mpi_rank()+1] + old_element_id - 1
-        if !(global_first_quadrant[mpi_rank()+1] <= global_quad_id < global_first_quadrant[mpi_rank()+2])
-          # Send element data to new rank, use global_quad_id as tag (non-blocking)
-          dest = findfirst(r -> global_first_quadrant[r] <= global_quad_id < global_first_quadrant[r+1],
-                           1:mpi_nranks()) - 1 # mpi ranks 0-based
-          request = MPI.Isend(@view(old_u[:, .., old_element_id]), dest, global_quad_id, mpi_comm())
-          push!(requests, request)
+        @trixi_timeit timer() "reinitialize data structures" begin
+            reinitialize_containers!(mesh, equations, dg, cache)
         end
-      end
 
-      # Loop over all elements in new container and either copy them from old container
-      # or receive them with MPI
-      for element in eachelement(dg, cache)
-        # Get global quad ID of element; local quad id is element id - 1
-        global_quad_id = global_first_quadrant[mpi_rank()+1] + element - 1
-        if old_global_first_quadrant[mpi_rank()+1] <= global_quad_id < old_global_first_quadrant[mpi_rank()+2]
-          # Quad ids are 0-based, element ids are 1-based, hence add 1
-          old_element_id = global_quad_id - old_global_first_quadrant[mpi_rank()+1] + 1
-          # Copy old element data to new element container
-          @views u[:, .., element] .= old_u[:, .., old_element_id]
-        else
-          # Receive old element data
-          src = findfirst(r -> old_global_first_quadrant[r] <= global_quad_id < old_global_first_quadrant[r+1],
-                          1:mpi_nranks()) - 1 # mpi ranks 0-based
-          request = MPI.Irecv!(@view(u[:, .., element]), src, global_quad_id, mpi_comm())
-          push!(requests, request)
-        end
-      end
+        resize!(u_ode,
+                nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
+        u = wrap_array_native(u_ode, mesh, equations, dg, cache)
 
-      # Wait for all non-blocking MPI send/receive operations to finish
-      MPI.Waitall(requests, MPI.Status)
-    end
-  end # GC.@preserve old_u_ode
-end
+        @trixi_timeit timer() "exchange data" begin
+            # Collect MPI requests for MPI_Waitall
+            requests = Vector{MPI.Request}()
+            # Find elements that will change their rank and send their data to the new rank
+            for old_element_id in 1:old_n_elements
+                # Get global quad ID of old element; local quad id is element id - 1
+                global_quad_id = old_global_first_quadrant[mpi_rank() + 1] +
+                                 old_element_id - 1
+                if !(global_first_quadrant[mpi_rank() + 1] <= global_quad_id <
+                     global_first_quadrant[mpi_rank() + 2])
+                    # Send element data to new rank, use global_quad_id as tag (non-blocking)
+                    dest = findfirst(r -> global_first_quadrant[r] <= global_quad_id <
+                                          global_first_quadrant[r + 1],
+                                     1:mpi_nranks()) - 1 # mpi ranks 0-based
+                    request = MPI.Isend(@view(old_u[:, .., old_element_id]), dest,
+                                        global_quad_id, mpi_comm())
+                    push!(requests, request)
+                end
+            end
 
+            # Loop over all elements in new container and either copy them from old container
+            # or receive them with MPI
+            for element in eachelement(dg, cache)
+                # Get global quad ID of element; local quad id is element id - 1
+                global_quad_id = global_first_quadrant[mpi_rank() + 1] + element - 1
+                if old_global_first_quadrant[mpi_rank() + 1] <= global_quad_id <
+                   old_global_first_quadrant[mpi_rank() + 2]
+                    # Quad ids are 0-based, element ids are 1-based, hence add 1
+                    old_element_id = global_quad_id -
+                                     old_global_first_quadrant[mpi_rank() + 1] + 1
+                    # Copy old element data to new element container
+                    @views u[:, .., element] .= old_u[:, .., old_element_id]
+                else
+                    # Receive old element data
+                    src = findfirst(r -> old_global_first_quadrant[r] <=
+                                         global_quad_id <
+                                         old_global_first_quadrant[r + 1],
+                                    1:mpi_nranks()) - 1 # mpi ranks 0-based
+                    request = MPI.Irecv!(@view(u[:, .., element]), src, global_quad_id,
+                                         mpi_comm())
+                    push!(requests, request)
+                end
+            end
 
-end # @muladd
\ No newline at end of file
+            # Wait for all non-blocking MPI send/receive operations to finish
+            MPI.Waitall(requests, MPI.Status)
+        end
+    end # GC.@preserve old_u_ode
+end
+end # @muladd
diff --git a/src/callbacks_step/amr_dg1d.jl b/src/callbacks_step/amr_dg1d.jl
index b16b349189c..e31a74730ea 100644
--- a/src/callbacks_step/amr_dg1d.jl
+++ b/src/callbacks_step/amr_dg1d.jl
@@ -3,255 +3,255 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Refine elements in the DG solver based on a list of cell_ids that should be refined
 function refine!(u_ode::AbstractVector, adaptor, mesh::TreeMesh{1},
                  equations, dg::DGSEM, cache, elements_to_refine)
-  # Return early if there is nothing to do
-  if isempty(elements_to_refine)
-    return
-  end
-
-  # Determine for each existing element whether it needs to be refined
-  needs_refinement = falses(nelements(dg, cache))
-  needs_refinement[elements_to_refine] .= true
-
-  # Retain current solution data
-  old_n_elements = nelements(dg, cache)
-  old_u_ode = copy(u_ode)
-  GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
-    old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
-
-    # Get new list of leaf cells
-    leaf_cell_ids = local_leaf_cells(mesh.tree)
-
-    # re-initialize elements container
-    @unpack elements = cache
-    resize!(elements, length(leaf_cell_ids))
-    init_elements!(elements, leaf_cell_ids, mesh, dg.basis)
-    @assert nelements(dg, cache) > old_n_elements
-
-    resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
-    u = wrap_array(u_ode, mesh, equations, dg, cache)
-
-    # Loop over all elements in old container and either copy them or refine them
-    element_id = 1
-    for old_element_id in 1:old_n_elements
-      if needs_refinement[old_element_id]
-        # Refine element and store solution directly in new data structure
-        refine_element!(u, element_id, old_u, old_element_id,
-                        adaptor, equations, dg)
-        element_id += 2^ndims(mesh)
-      else
-        # Copy old element data to new element container
-        @views u[:, .., element_id] .= old_u[:, .., old_element_id]
-        element_id += 1
-      end
+    # Return early if there is nothing to do
+    if isempty(elements_to_refine)
+        return
+    end
+
+    # Determine for each existing element whether it needs to be refined
+    needs_refinement = falses(nelements(dg, cache))
+    needs_refinement[elements_to_refine] .= true
+
+    # Retain current solution data
+    old_n_elements = nelements(dg, cache)
+    old_u_ode = copy(u_ode)
+    GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
+        old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
+
+        # Get new list of leaf cells
+        leaf_cell_ids = local_leaf_cells(mesh.tree)
+
+        # re-initialize elements container
+        @unpack elements = cache
+        resize!(elements, length(leaf_cell_ids))
+        init_elements!(elements, leaf_cell_ids, mesh, dg.basis)
+        @assert nelements(dg, cache) > old_n_elements
+
+        resize!(u_ode,
+                nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
+        u = wrap_array(u_ode, mesh, equations, dg, cache)
+
+        # Loop over all elements in old container and either copy them or refine them
+        element_id = 1
+        for old_element_id in 1:old_n_elements
+            if needs_refinement[old_element_id]
+                # Refine element and store solution directly in new data structure
+                refine_element!(u, element_id, old_u, old_element_id,
+                                adaptor, equations, dg)
+                element_id += 2^ndims(mesh)
+            else
+                # Copy old element data to new element container
+                @views u[:, .., element_id] .= old_u[:, .., old_element_id]
+                element_id += 1
+            end
+        end
+        # If everything is correct, we should have processed all elements.
+        # Depending on whether the last element processed above had to be refined or not,
+        # the counter `element_id` can have two different values at the end.
+        @assert element_id ==
+                nelements(dg, cache) +
+                1||element_id == nelements(dg, cache) + 2^ndims(mesh) "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))"
+    end # GC.@preserve old_u_ode
+
+    # re-initialize interfaces container
+    @unpack interfaces = cache
+    resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids))
+    init_interfaces!(interfaces, elements, mesh)
+
+    # re-initialize boundaries container
+    @unpack boundaries = cache
+    resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids))
+    init_boundaries!(boundaries, elements, mesh)
+
+    # Sanity check
+    if isperiodic(mesh.tree)
+        @assert ninterfaces(interfaces)==1 * nelements(dg, cache) ("For 1D and periodic domains, the number of interfaces must be the same as the number of elements")
     end
-    # If everything is correct, we should have processed all elements.
-    # Depending on whether the last element processed above had to be refined or not,
-    # the counter `element_id` can have two different values at the end.
-    @assert element_id == nelements(dg, cache) + 1 || element_id == nelements(dg, cache) + 2^ndims(mesh) "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))"
-  end # GC.@preserve old_u_ode
-
-  # re-initialize interfaces container
-  @unpack interfaces = cache
-  resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids))
-  init_interfaces!(interfaces, elements, mesh)
-
-  # re-initialize boundaries container
-  @unpack boundaries = cache
-  resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids))
-  init_boundaries!(boundaries, elements, mesh)
-
-  # Sanity check
-  if isperiodic(mesh.tree)
-    @assert ninterfaces(interfaces) == 1 * nelements(dg, cache) ("For 1D and periodic domains, the number of interfaces must be the same as the number of elements")
-  end
-
-  return nothing
-end
 
+    return nothing
+end
 
 # TODO: Taal compare performance of different implementations
 # Refine solution data u for an element, using L2 projection (interpolation)
-function refine_element!(u::AbstractArray{<:Any,3}, element_id,
+function refine_element!(u::AbstractArray{<:Any, 3}, element_id,
                          old_u, old_element_id,
                          adaptor::LobattoLegendreAdaptorL2, equations, dg)
-  @unpack forward_upper, forward_lower = adaptor
-
-  # Store new element ids
-  left_id  = element_id
-  right_id = element_id + 1
-
-  @boundscheck begin
-    @assert old_element_id >= 1
-    @assert size(old_u, 1) == nvariables(equations)
-    @assert size(old_u, 2) == nnodes(dg)
-    @assert size(old_u, 3) >= old_element_id
-    @assert     element_id >= 1
-    @assert size(    u, 1) == nvariables(equations)
-    @assert size(    u, 2) == nnodes(dg)
-    @assert size(    u, 3) >= element_id + 1
-  end
-
-  # Interpolate to left element
-  for i in eachnode(dg)
-    acc = zero(get_node_vars(u, equations, dg, i, element_id))
-    for k in eachnode(dg)
-      acc += get_node_vars(old_u, equations, dg, k, old_element_id) * forward_lower[i, k]
-    end
-    set_node_vars!(u, acc, equations, dg, i, left_id)
-  end
-
-  # Interpolate to right element
-  for i in eachnode(dg)
-    acc = zero(get_node_vars(u, equations, dg, i, element_id))
-    for k in eachnode(dg)
-      acc += get_node_vars(old_u, equations, dg, k, old_element_id) * forward_upper[i, k]
+    @unpack forward_upper, forward_lower = adaptor
+
+    # Store new element ids
+    left_id = element_id
+    right_id = element_id + 1
+
+    @boundscheck begin
+        @assert old_element_id >= 1
+        @assert size(old_u, 1) == nvariables(equations)
+        @assert size(old_u, 2) == nnodes(dg)
+        @assert size(old_u, 3) >= old_element_id
+        @assert element_id >= 1
+        @assert size(u, 1) == nvariables(equations)
+        @assert size(u, 2) == nnodes(dg)
+        @assert size(u, 3) >= element_id + 1
     end
-    set_node_vars!(u, acc, equations, dg, i, right_id)
-  end
 
-  return nothing
-end
+    # Interpolate to left element
+    for i in eachnode(dg)
+        acc = zero(get_node_vars(u, equations, dg, i, element_id))
+        for k in eachnode(dg)
+            acc += get_node_vars(old_u, equations, dg, k, old_element_id) *
+                   forward_lower[i, k]
+        end
+        set_node_vars!(u, acc, equations, dg, i, left_id)
+    end
 
+    # Interpolate to right element
+    for i in eachnode(dg)
+        acc = zero(get_node_vars(u, equations, dg, i, element_id))
+        for k in eachnode(dg)
+            acc += get_node_vars(old_u, equations, dg, k, old_element_id) *
+                   forward_upper[i, k]
+        end
+        set_node_vars!(u, acc, equations, dg, i, right_id)
+    end
 
+    return nothing
+end
 
 # Coarsen elements in the DG solver based on a list of cell_ids that should be removed
 function coarsen!(u_ode::AbstractVector, adaptor, mesh::TreeMesh{1},
                   equations, dg::DGSEM, cache, elements_to_remove)
-  # Return early if there is nothing to do
-  if isempty(elements_to_remove)
-    return
-  end
-
-  # Determine for each old element whether it needs to be removed
-  to_be_removed = falses(nelements(dg, cache))
-  to_be_removed[elements_to_remove] .= true
-
-  # Retain current solution data
-  old_n_elements = nelements(dg, cache)
-  old_u_ode = copy(u_ode)
-  GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
-    old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
-
-    # Get new list of leaf cells
-    leaf_cell_ids = local_leaf_cells(mesh.tree)
-
-    # re-initialize elements container
-    @unpack elements = cache
-    resize!(elements, length(leaf_cell_ids))
-    init_elements!(elements, leaf_cell_ids, mesh, dg.basis)
-    @assert nelements(dg, cache) < old_n_elements
-
-    resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
-    u = wrap_array(u_ode, mesh, equations, dg, cache)
-
-    # Loop over all elements in old container and either copy them or coarsen them
-    skip = 0
-    element_id = 1
-    for old_element_id in 1:old_n_elements
-      # If skip is non-zero, we just coarsened 2^ndims elements and need to omit the following elements
-      if skip > 0
-        skip -= 1
-        continue
-      end
-
-      if to_be_removed[old_element_id]
-        # If an element is to be removed, sanity check if the following elements
-        # are also marked - otherwise there would be an error in the way the
-        # cells/elements are sorted
-        @assert all(to_be_removed[old_element_id:(old_element_id+2^ndims(mesh)-1)]) "bad cell/element order"
-
-        # Coarsen elements and store solution directly in new data structure
-        coarsen_elements!(u, element_id, old_u, old_element_id,
-                          adaptor, equations, dg)
-        element_id += 1
-        skip = 2^ndims(mesh) - 1
-      else
-        # Copy old element data to new element container
-        @views u[:, .., element_id] .= old_u[:, .., old_element_id]
-        element_id += 1
-      end
+    # Return early if there is nothing to do
+    if isempty(elements_to_remove)
+        return
     end
-    # If everything is correct, we should have processed all elements.
-    @assert element_id == nelements(dg, cache) + 1 "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))"
-  end # GC.@preserve old_u_ode
-
-  # re-initialize interfaces container
-  @unpack interfaces = cache
-  resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids))
-  init_interfaces!(interfaces, elements, mesh)
-
-  # re-initialize boundaries container
-  @unpack boundaries = cache
-  resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids))
-  init_boundaries!(boundaries, elements, mesh)
-
-  # Sanity check
-  if isperiodic(mesh.tree)
-    @assert ninterfaces(interfaces) == 1 * nelements(dg, cache) ("For 1D and periodic domains, the number of interfaces must be the same as the number of elements")
-  end
-
-  return nothing
-end
 
+    # Determine for each old element whether it needs to be removed
+    to_be_removed = falses(nelements(dg, cache))
+    to_be_removed[elements_to_remove] .= true
+
+    # Retain current solution data
+    old_n_elements = nelements(dg, cache)
+    old_u_ode = copy(u_ode)
+    GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
+        old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
+
+        # Get new list of leaf cells
+        leaf_cell_ids = local_leaf_cells(mesh.tree)
+
+        # re-initialize elements container
+        @unpack elements = cache
+        resize!(elements, length(leaf_cell_ids))
+        init_elements!(elements, leaf_cell_ids, mesh, dg.basis)
+        @assert nelements(dg, cache) < old_n_elements
+
+        resize!(u_ode,
+                nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
+        u = wrap_array(u_ode, mesh, equations, dg, cache)
+
+        # Loop over all elements in old container and either copy them or coarsen them
+        skip = 0
+        element_id = 1
+        for old_element_id in 1:old_n_elements
+            # If skip is non-zero, we just coarsened 2^ndims elements and need to omit the following elements
+            if skip > 0
+                skip -= 1
+                continue
+            end
+
+            if to_be_removed[old_element_id]
+                # If an element is to be removed, sanity check if the following elements
+                # are also marked - otherwise there would be an error in the way the
+                # cells/elements are sorted
+                @assert all(to_be_removed[old_element_id:(old_element_id + 2^ndims(mesh) - 1)]) "bad cell/element order"
+
+                # Coarsen elements and store solution directly in new data structure
+                coarsen_elements!(u, element_id, old_u, old_element_id,
+                                  adaptor, equations, dg)
+                element_id += 1
+                skip = 2^ndims(mesh) - 1
+            else
+                # Copy old element data to new element container
+                @views u[:, .., element_id] .= old_u[:, .., old_element_id]
+                element_id += 1
+            end
+        end
+        # If everything is correct, we should have processed all elements.
+        @assert element_id==nelements(dg, cache) + 1 "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))"
+    end # GC.@preserve old_u_ode
+
+    # re-initialize interfaces container
+    @unpack interfaces = cache
+    resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids))
+    init_interfaces!(interfaces, elements, mesh)
+
+    # re-initialize boundaries container
+    @unpack boundaries = cache
+    resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids))
+    init_boundaries!(boundaries, elements, mesh)
+
+    # Sanity check
+    if isperiodic(mesh.tree)
+        @assert ninterfaces(interfaces)==1 * nelements(dg, cache) ("For 1D and periodic domains, the number of interfaces must be the same as the number of elements")
+    end
+
+    return nothing
+end
 
 # TODO: Taal compare performance of different implementations
 # Coarsen solution data u for two elements, using L2 projection
-function coarsen_elements!(u::AbstractArray{<:Any,3}, element_id,
+function coarsen_elements!(u::AbstractArray{<:Any, 3}, element_id,
                            old_u, old_element_id,
                            adaptor::LobattoLegendreAdaptorL2, equations, dg)
-  @unpack reverse_upper, reverse_lower = adaptor
-
-  # Store old element ids
-  left_id  = old_element_id
-  right_id = old_element_id + 1
-
-  @boundscheck begin
-    @assert old_element_id >= 1
-    @assert size(old_u, 1) == nvariables(equations)
-    @assert size(old_u, 2) == nnodes(dg)
-    @assert size(old_u, 3) >= old_element_id + 1
-    @assert     element_id >= 1
-    @assert size(    u, 1) == nvariables(equations)
-    @assert size(    u, 2) == nnodes(dg)
-    @assert size(    u, 3) >= element_id
-  end
-
-  for i in eachnode(dg)
-    acc = zero(get_node_vars(u, equations, dg, i, element_id))
-
-    # Project from lower left element
-    for k in eachnode(dg)
-      acc += get_node_vars(old_u, equations, dg, k, left_id) * reverse_lower[i, k]
-    end
-
-    # Project from lower right element
-    for k in eachnode(dg)
-      acc += get_node_vars(old_u, equations, dg, k, right_id) * reverse_upper[i, k]
+    @unpack reverse_upper, reverse_lower = adaptor
+
+    # Store old element ids
+    left_id = old_element_id
+    right_id = old_element_id + 1
+
+    @boundscheck begin
+        @assert old_element_id >= 1
+        @assert size(old_u, 1) == nvariables(equations)
+        @assert size(old_u, 2) == nnodes(dg)
+        @assert size(old_u, 3) >= old_element_id + 1
+        @assert element_id >= 1
+        @assert size(u, 1) == nvariables(equations)
+        @assert size(u, 2) == nnodes(dg)
+        @assert size(u, 3) >= element_id
     end
 
-    # Update value
-    set_node_vars!(u, acc, equations, dg, i, element_id)
-  end
-end
+    for i in eachnode(dg)
+        acc = zero(get_node_vars(u, equations, dg, i, element_id))
 
+        # Project from lower left element
+        for k in eachnode(dg)
+            acc += get_node_vars(old_u, equations, dg, k, left_id) * reverse_lower[i, k]
+        end
 
-# this method is called when an `ControllerThreeLevel` is constructed
-function create_cache(::Type{ControllerThreeLevel}, mesh::TreeMesh{1}, equations, dg::DG, cache)
+        # Project from lower right element
+        for k in eachnode(dg)
+            acc += get_node_vars(old_u, equations, dg, k, right_id) *
+                   reverse_upper[i, k]
+        end
 
-  controller_value = Vector{Int}(undef, nelements(dg, cache))
-  return (; controller_value)
+        # Update value
+        set_node_vars!(u, acc, equations, dg, i, element_id)
+    end
 end
 
-function create_cache(::Type{ControllerThreeLevelCombined}, mesh::TreeMesh{1}, equations, dg::DG, cache)
-
-  controller_value = Vector{Int}(undef, nelements(dg, cache))
-  return (; controller_value)
+# this method is called when an `ControllerThreeLevel` is constructed
+function create_cache(::Type{ControllerThreeLevel}, mesh::TreeMesh{1}, equations,
+                      dg::DG, cache)
+    controller_value = Vector{Int}(undef, nelements(dg, cache))
+    return (; controller_value)
 end
 
-
+function create_cache(::Type{ControllerThreeLevelCombined}, mesh::TreeMesh{1},
+                      equations, dg::DG, cache)
+    controller_value = Vector{Int}(undef, nelements(dg, cache))
+    return (; controller_value)
+end
 end # @muladd
diff --git a/src/callbacks_step/amr_dg2d.jl b/src/callbacks_step/amr_dg2d.jl
index 9f677d1dc4d..400d16347d5 100644
--- a/src/callbacks_step/amr_dg2d.jl
+++ b/src/callbacks_step/amr_dg2d.jl
@@ -3,334 +3,346 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Redistribute data for load balancing after partitioning the mesh
 function rebalance_solver!(u_ode::AbstractVector, mesh::TreeMesh{2}, equations,
                            dg::DGSEM, cache, old_mpi_ranks_per_cell)
-  if cache.elements.cell_ids == local_leaf_cells(mesh.tree)
-    # Cell ids of the current elements are the same as the local leaf cells of the
-    # newly partitioned mesh, so the solver doesn't need to be rebalanced on this rank.
-    # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do
-    # locally (there are other MPI ranks that need to be rebalanced if this function is called)
-    reinitialize_containers!(mesh, equations, dg, cache)
-    return
-  end
-
-  # Retain current solution data
-  old_n_elements = nelements(dg, cache)
-  old_cell_ids = copy(cache.elements.cell_ids)
-  old_u_ode = copy(u_ode)
-  GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
-    # Use `wrap_array_native` instead of `wrap_array` since MPI might not interact
-    # nicely with non-base array types
-    old_u = wrap_array_native(old_u_ode, mesh, equations, dg, cache)
-
-    @trixi_timeit timer() "reinitialize data structures" reinitialize_containers!(mesh, equations, dg, cache)
-
-    resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
-    u = wrap_array_native(u_ode, mesh, equations, dg, cache)
-
-    # Get new list of leaf cells
-    leaf_cell_ids = local_leaf_cells(mesh.tree)
-
-    @trixi_timeit timer() "exchange data" begin
-      # Collect MPI requests for MPI_Waitall
-      requests = Vector{MPI.Request}()
-
-      # Find elements that will change their rank and send their data to the new rank
-      for old_element_id in 1:old_n_elements
-        cell_id = old_cell_ids[old_element_id]
-        if !(cell_id in leaf_cell_ids)
-          # Send element data to new rank, use cell_id as tag (non-blocking)
-          dest = mesh.tree.mpi_ranks[cell_id]
-          request = MPI.Isend(@view(old_u[:, .., old_element_id]), dest, cell_id, mpi_comm())
-          push!(requests, request)
-        end
-      end
-
-      # Loop over all elements in new container and either copy them from old container
-      # or receive them with MPI
-      for element in eachelement(dg, cache)
-        cell_id = cache.elements.cell_ids[element]
-        if cell_id in old_cell_ids
-          old_element_id = searchsortedfirst(old_cell_ids, cell_id)
-          # Copy old element data to new element container
-          @views u[:, .., element] .= old_u[:, .., old_element_id]
-        else
-          # Receive old element data
-          src = old_mpi_ranks_per_cell[cell_id]
-          request = MPI.Irecv!(@view(u[:, .., element]), src, cell_id, mpi_comm())
-          push!(requests, request)
+    if cache.elements.cell_ids == local_leaf_cells(mesh.tree)
+        # Cell ids of the current elements are the same as the local leaf cells of the
+        # newly partitioned mesh, so the solver doesn't need to be rebalanced on this rank.
+        # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do
+        # locally (there are other MPI ranks that need to be rebalanced if this function is called)
+        reinitialize_containers!(mesh, equations, dg, cache)
+        return
+    end
+
+    # Retain current solution data
+    old_n_elements = nelements(dg, cache)
+    old_cell_ids = copy(cache.elements.cell_ids)
+    old_u_ode = copy(u_ode)
+    GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
+        # Use `wrap_array_native` instead of `wrap_array` since MPI might not interact
+        # nicely with non-base array types
+        old_u = wrap_array_native(old_u_ode, mesh, equations, dg, cache)
+
+        @trixi_timeit timer() "reinitialize data structures" begin
+            reinitialize_containers!(mesh, equations, dg, cache)
         end
-      end
 
-      # Wait for all non-blocking MPI send/receive operations to finish
-      MPI.Waitall(requests, MPI.Status)
-    end
-  end # GC.@preserve old_u_ode
+        resize!(u_ode,
+                nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
+        u = wrap_array_native(u_ode, mesh, equations, dg, cache)
+
+        # Get new list of leaf cells
+        leaf_cell_ids = local_leaf_cells(mesh.tree)
+
+        @trixi_timeit timer() "exchange data" begin
+            # Collect MPI requests for MPI_Waitall
+            requests = Vector{MPI.Request}()
+
+            # Find elements that will change their rank and send their data to the new rank
+            for old_element_id in 1:old_n_elements
+                cell_id = old_cell_ids[old_element_id]
+                if !(cell_id in leaf_cell_ids)
+                    # Send element data to new rank, use cell_id as tag (non-blocking)
+                    dest = mesh.tree.mpi_ranks[cell_id]
+                    request = MPI.Isend(@view(old_u[:, .., old_element_id]), dest,
+                                        cell_id, mpi_comm())
+                    push!(requests, request)
+                end
+            end
+
+            # Loop over all elements in new container and either copy them from old container
+            # or receive them with MPI
+            for element in eachelement(dg, cache)
+                cell_id = cache.elements.cell_ids[element]
+                if cell_id in old_cell_ids
+                    old_element_id = searchsortedfirst(old_cell_ids, cell_id)
+                    # Copy old element data to new element container
+                    @views u[:, .., element] .= old_u[:, .., old_element_id]
+                else
+                    # Receive old element data
+                    src = old_mpi_ranks_per_cell[cell_id]
+                    request = MPI.Irecv!(@view(u[:, .., element]), src, cell_id,
+                                         mpi_comm())
+                    push!(requests, request)
+                end
+            end
+
+            # Wait for all non-blocking MPI send/receive operations to finish
+            MPI.Waitall(requests, MPI.Status)
+        end
+    end # GC.@preserve old_u_ode
 end
 
-
 # Refine elements in the DG solver based on a list of cell_ids that should be refined
 function refine!(u_ode::AbstractVector, adaptor, mesh::Union{TreeMesh{2}, P4estMesh{2}},
                  equations, dg::DGSEM, cache, elements_to_refine)
-  # Return early if there is nothing to do
-  if isempty(elements_to_refine)
-    if mpi_isparallel()
-      # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do
-      # locally (there still might be other MPI ranks that have refined elements)
-      reinitialize_containers!(mesh, equations, dg, cache)
+    # Return early if there is nothing to do
+    if isempty(elements_to_refine)
+        if mpi_isparallel()
+            # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do
+            # locally (there still might be other MPI ranks that have refined elements)
+            reinitialize_containers!(mesh, equations, dg, cache)
+        end
+        return
     end
-    return
-  end
-
-  # Determine for each existing element whether it needs to be refined
-  needs_refinement = falses(nelements(dg, cache))
-  needs_refinement[elements_to_refine] .= true
-
-  # Retain current solution data
-  old_n_elements = nelements(dg, cache)
-  old_u_ode = copy(u_ode)
-  GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
-    old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
-
-    reinitialize_containers!(mesh, equations, dg, cache)
-
-    resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
-    u = wrap_array(u_ode, mesh, equations, dg, cache)
-
-    # Loop over all elements in old container and either copy them or refine them
-    element_id = 1
-    for old_element_id in 1:old_n_elements
-      if needs_refinement[old_element_id]
-        # Refine element and store solution directly in new data structure
-        refine_element!(u, element_id, old_u, old_element_id,
-                        adaptor, equations, dg)
-        element_id += 2^ndims(mesh)
-      else
-        # Copy old element data to new element container
-        @views u[:, .., element_id] .= old_u[:, .., old_element_id]
-        element_id += 1
-      end
+
+    # Determine for each existing element whether it needs to be refined
+    needs_refinement = falses(nelements(dg, cache))
+    needs_refinement[elements_to_refine] .= true
+
+    # Retain current solution data
+    old_n_elements = nelements(dg, cache)
+    old_u_ode = copy(u_ode)
+    GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
+        old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
+
+        reinitialize_containers!(mesh, equations, dg, cache)
+
+        resize!(u_ode,
+                nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
+        u = wrap_array(u_ode, mesh, equations, dg, cache)
+
+        # Loop over all elements in old container and either copy them or refine them
+        element_id = 1
+        for old_element_id in 1:old_n_elements
+            if needs_refinement[old_element_id]
+                # Refine element and store solution directly in new data structure
+                refine_element!(u, element_id, old_u, old_element_id,
+                                adaptor, equations, dg)
+                element_id += 2^ndims(mesh)
+            else
+                # Copy old element data to new element container
+                @views u[:, .., element_id] .= old_u[:, .., old_element_id]
+                element_id += 1
+            end
+        end
+        # If everything is correct, we should have processed all elements.
+        # Depending on whether the last element processed above had to be refined or not,
+        # the counter `element_id` can have two different values at the end.
+        @assert element_id ==
+                nelements(dg, cache) +
+                1||element_id == nelements(dg, cache) + 2^ndims(mesh) "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))"
+    end # GC.@preserve old_u_ode
+
+    # Sanity check
+    if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0 &&
+       !mpi_isparallel()
+        @assert ninterfaces(cache.interfaces)==ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements")
     end
-    # If everything is correct, we should have processed all elements.
-    # Depending on whether the last element processed above had to be refined or not,
-    # the counter `element_id` can have two different values at the end.
-    @assert element_id == nelements(dg, cache) + 1 || element_id == nelements(dg, cache) + 2^ndims(mesh) "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))"
-  end # GC.@preserve old_u_ode
-
-  # Sanity check
-  if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0 && !mpi_isparallel()
-    @assert ninterfaces(cache.interfaces) == ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements")
-  end
-
-  return nothing
-end
 
+    return nothing
+end
 
 # TODO: Taal compare performance of different implementations
 # Refine solution data u for an element, using L2 projection (interpolation)
-function refine_element!(u::AbstractArray{<:Any,4}, element_id,
+function refine_element!(u::AbstractArray{<:Any, 4}, element_id,
                          old_u, old_element_id,
                          adaptor::LobattoLegendreAdaptorL2, equations, dg)
-  @unpack forward_upper, forward_lower = adaptor
-
-  # Store new element ids
-  lower_left_id  = element_id
-  lower_right_id = element_id + 1
-  upper_left_id  = element_id + 2
-  upper_right_id = element_id + 3
-
-  @boundscheck begin
-    @assert old_element_id >= 1
-    @assert size(old_u, 1) == nvariables(equations)
-    @assert size(old_u, 2) == nnodes(dg)
-    @assert size(old_u, 3) == nnodes(dg)
-    @assert size(old_u, 4) >= old_element_id
-    @assert     element_id >= 1
-    @assert size(    u, 1) == nvariables(equations)
-    @assert size(    u, 2) == nnodes(dg)
-    @assert size(    u, 3) == nnodes(dg)
-    @assert size(    u, 4) >= element_id + 3
-  end
-
-  # Interpolate to lower left element
-  for j in eachnode(dg), i in eachnode(dg)
-    acc = zero(get_node_vars(u, equations, dg, i, j, element_id))
-    for l in eachnode(dg), k in eachnode(dg)
-      acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) * forward_lower[i, k] * forward_lower[j, l]
-    end
-    set_node_vars!(u, acc, equations, dg, i, j, lower_left_id)
-  end
-
-  # Interpolate to lower right element
-  for j in eachnode(dg), i in eachnode(dg)
-    acc = zero(get_node_vars(u, equations, dg, i, j, element_id))
-    for l in eachnode(dg), k in eachnode(dg)
-      acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) * forward_upper[i, k] * forward_lower[j, l]
+    @unpack forward_upper, forward_lower = adaptor
+
+    # Store new element ids
+    lower_left_id = element_id
+    lower_right_id = element_id + 1
+    upper_left_id = element_id + 2
+    upper_right_id = element_id + 3
+
+    @boundscheck begin
+        @assert old_element_id >= 1
+        @assert size(old_u, 1) == nvariables(equations)
+        @assert size(old_u, 2) == nnodes(dg)
+        @assert size(old_u, 3) == nnodes(dg)
+        @assert size(old_u, 4) >= old_element_id
+        @assert element_id >= 1
+        @assert size(u, 1) == nvariables(equations)
+        @assert size(u, 2) == nnodes(dg)
+        @assert size(u, 3) == nnodes(dg)
+        @assert size(u, 4) >= element_id + 3
     end
-    set_node_vars!(u, acc, equations, dg, i, j, lower_right_id)
-  end
-
-  # Interpolate to upper left element
-  for j in eachnode(dg), i in eachnode(dg)
-    acc = zero(get_node_vars(u, equations, dg, i, j, element_id))
-    for l in eachnode(dg), k in eachnode(dg)
-      acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) * forward_lower[i, k] * forward_upper[j, l]
+
+    # Interpolate to lower left element
+    for j in eachnode(dg), i in eachnode(dg)
+        acc = zero(get_node_vars(u, equations, dg, i, j, element_id))
+        for l in eachnode(dg), k in eachnode(dg)
+            acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) *
+                   forward_lower[i, k] * forward_lower[j, l]
+        end
+        set_node_vars!(u, acc, equations, dg, i, j, lower_left_id)
     end
-    set_node_vars!(u, acc, equations, dg, i, j, upper_left_id)
-  end
-
-  # Interpolate to upper right element
-  for j in eachnode(dg), i in eachnode(dg)
-    acc = zero(get_node_vars(u, equations, dg, i, j, element_id))
-    for l in eachnode(dg), k in eachnode(dg)
-      acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) * forward_upper[i, k] * forward_upper[j, l]
+
+    # Interpolate to lower right element
+    for j in eachnode(dg), i in eachnode(dg)
+        acc = zero(get_node_vars(u, equations, dg, i, j, element_id))
+        for l in eachnode(dg), k in eachnode(dg)
+            acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) *
+                   forward_upper[i, k] * forward_lower[j, l]
+        end
+        set_node_vars!(u, acc, equations, dg, i, j, lower_right_id)
     end
-    set_node_vars!(u, acc, equations, dg, i, j, upper_right_id)
-  end
 
-  return nothing
-end
+    # Interpolate to upper left element
+    for j in eachnode(dg), i in eachnode(dg)
+        acc = zero(get_node_vars(u, equations, dg, i, j, element_id))
+        for l in eachnode(dg), k in eachnode(dg)
+            acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) *
+                   forward_lower[i, k] * forward_upper[j, l]
+        end
+        set_node_vars!(u, acc, equations, dg, i, j, upper_left_id)
+    end
 
+    # Interpolate to upper right element
+    for j in eachnode(dg), i in eachnode(dg)
+        acc = zero(get_node_vars(u, equations, dg, i, j, element_id))
+        for l in eachnode(dg), k in eachnode(dg)
+            acc += get_node_vars(old_u, equations, dg, k, l, old_element_id) *
+                   forward_upper[i, k] * forward_upper[j, l]
+        end
+        set_node_vars!(u, acc, equations, dg, i, j, upper_right_id)
+    end
 
+    return nothing
+end
 
 # Coarsen elements in the DG solver based on a list of cell_ids that should be removed
-function coarsen!(u_ode::AbstractVector, adaptor, mesh::Union{TreeMesh{2}, P4estMesh{2}},
+function coarsen!(u_ode::AbstractVector, adaptor,
+                  mesh::Union{TreeMesh{2}, P4estMesh{2}},
                   equations, dg::DGSEM, cache, elements_to_remove)
-  # Return early if there is nothing to do
-  if isempty(elements_to_remove)
-    if mpi_isparallel()
-      # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do
-      # locally (there still might be other MPI ranks that have coarsened elements)
-      reinitialize_containers!(mesh, equations, dg, cache)
-    end
-    return
-  end
-
-  # Determine for each old element whether it needs to be removed
-  to_be_removed = falses(nelements(dg, cache))
-  to_be_removed[elements_to_remove] .= true
-
-  # Retain current solution data
-  old_n_elements = nelements(dg, cache)
-  old_u_ode = copy(u_ode)
-  GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
-    old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
-
-    reinitialize_containers!(mesh, equations, dg, cache)
-
-    resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
-    u = wrap_array(u_ode, mesh, equations, dg, cache)
-
-    # Loop over all elements in old container and either copy them or coarsen them
-    skip = 0
-    element_id = 1
-    for old_element_id in 1:old_n_elements
-      # If skip is non-zero, we just coarsened 2^ndims elements and need to omit the following elements
-      if skip > 0
-        skip -= 1
-        continue
-      end
-
-      if to_be_removed[old_element_id]
-        # If an element is to be removed, sanity check if the following elements
-        # are also marked - otherwise there would be an error in the way the
-        # cells/elements are sorted
-        @assert all(to_be_removed[old_element_id:(old_element_id+2^ndims(mesh)-1)]) "bad cell/element order"
-
-        # Coarsen elements and store solution directly in new data structure
-        coarsen_elements!(u, element_id, old_u, old_element_id,
-                          adaptor, equations, dg)
-        element_id += 1
-        skip = 2^ndims(mesh) - 1
-      else
-        # Copy old element data to new element container
-        @views u[:, .., element_id] .= old_u[:, .., old_element_id]
-        element_id += 1
-      end
+    # Return early if there is nothing to do
+    if isempty(elements_to_remove)
+        if mpi_isparallel()
+            # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do
+            # locally (there still might be other MPI ranks that have coarsened elements)
+            reinitialize_containers!(mesh, equations, dg, cache)
+        end
+        return
     end
-    # If everything is correct, we should have processed all elements.
-    @assert element_id == nelements(dg, cache) + 1 "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))"
-  end # GC.@preserve old_u_ode
 
-  # Sanity check
-  if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0 && !mpi_isparallel()
-    @assert ninterfaces(cache.interfaces) == ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements")
-  end
+    # Determine for each old element whether it needs to be removed
+    to_be_removed = falses(nelements(dg, cache))
+    to_be_removed[elements_to_remove] .= true
+
+    # Retain current solution data
+    old_n_elements = nelements(dg, cache)
+    old_u_ode = copy(u_ode)
+    GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
+        old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
+
+        reinitialize_containers!(mesh, equations, dg, cache)
+
+        resize!(u_ode,
+                nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
+        u = wrap_array(u_ode, mesh, equations, dg, cache)
+
+        # Loop over all elements in old container and either copy them or coarsen them
+        skip = 0
+        element_id = 1
+        for old_element_id in 1:old_n_elements
+            # If skip is non-zero, we just coarsened 2^ndims elements and need to omit the following elements
+            if skip > 0
+                skip -= 1
+                continue
+            end
+
+            if to_be_removed[old_element_id]
+                # If an element is to be removed, sanity check if the following elements
+                # are also marked - otherwise there would be an error in the way the
+                # cells/elements are sorted
+                @assert all(to_be_removed[old_element_id:(old_element_id + 2^ndims(mesh) - 1)]) "bad cell/element order"
+
+                # Coarsen elements and store solution directly in new data structure
+                coarsen_elements!(u, element_id, old_u, old_element_id,
+                                  adaptor, equations, dg)
+                element_id += 1
+                skip = 2^ndims(mesh) - 1
+            else
+                # Copy old element data to new element container
+                @views u[:, .., element_id] .= old_u[:, .., old_element_id]
+                element_id += 1
+            end
+        end
+        # If everything is correct, we should have processed all elements.
+        @assert element_id==nelements(dg, cache) + 1 "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))"
+    end # GC.@preserve old_u_ode
+
+    # Sanity check
+    if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0 &&
+       !mpi_isparallel()
+        @assert ninterfaces(cache.interfaces)==ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements")
+    end
 
-  return nothing
+    return nothing
 end
 
-
 # TODO: Taal compare performance of different implementations
 # Coarsen solution data u for four elements, using L2 projection
-function coarsen_elements!(u::AbstractArray{<:Any,4}, element_id,
+function coarsen_elements!(u::AbstractArray{<:Any, 4}, element_id,
                            old_u, old_element_id,
                            adaptor::LobattoLegendreAdaptorL2, equations, dg)
-  @unpack reverse_upper, reverse_lower = adaptor
-
-  # Store old element ids
-  lower_left_id  = old_element_id
-  lower_right_id = old_element_id + 1
-  upper_left_id  = old_element_id + 2
-  upper_right_id = old_element_id + 3
-
-  @boundscheck begin
-    @assert old_element_id >= 1
-    @assert size(old_u, 1) == nvariables(equations)
-    @assert size(old_u, 2) == nnodes(dg)
-    @assert size(old_u, 3) == nnodes(dg)
-    @assert size(old_u, 4) >= old_element_id + 3
-    @assert     element_id >= 1
-    @assert size(    u, 1) == nvariables(equations)
-    @assert size(    u, 2) == nnodes(dg)
-    @assert size(    u, 3) == nnodes(dg)
-    @assert size(    u, 4) >= element_id
-  end
-
-  for j in eachnode(dg), i in eachnode(dg)
-    acc = zero(get_node_vars(u, equations, dg, i, j, element_id))
-
-    # Project from lower left element
-    for l in eachnode(dg), k in eachnode(dg)
-      acc += get_node_vars(old_u, equations, dg, k, l, lower_left_id) * reverse_lower[i, k] * reverse_lower[j, l]
+    @unpack reverse_upper, reverse_lower = adaptor
+
+    # Store old element ids
+    lower_left_id = old_element_id
+    lower_right_id = old_element_id + 1
+    upper_left_id = old_element_id + 2
+    upper_right_id = old_element_id + 3
+
+    @boundscheck begin
+        @assert old_element_id >= 1
+        @assert size(old_u, 1) == nvariables(equations)
+        @assert size(old_u, 2) == nnodes(dg)
+        @assert size(old_u, 3) == nnodes(dg)
+        @assert size(old_u, 4) >= old_element_id + 3
+        @assert element_id >= 1
+        @assert size(u, 1) == nvariables(equations)
+        @assert size(u, 2) == nnodes(dg)
+        @assert size(u, 3) == nnodes(dg)
+        @assert size(u, 4) >= element_id
     end
 
-    # Project from lower right element
-    for l in eachnode(dg), k in eachnode(dg)
-      acc += get_node_vars(old_u, equations, dg, k, l, lower_right_id) * reverse_upper[i, k] * reverse_lower[j, l]
-    end
-
-    # Project from upper left element
-    for l in eachnode(dg), k in eachnode(dg)
-      acc += get_node_vars(old_u, equations, dg, k, l, upper_left_id) * reverse_lower[i, k] * reverse_upper[j, l]
-    end
+    for j in eachnode(dg), i in eachnode(dg)
+        acc = zero(get_node_vars(u, equations, dg, i, j, element_id))
 
-    # Project from upper right element
-    for l in eachnode(dg), k in eachnode(dg)
-      acc += get_node_vars(old_u, equations, dg, k, l, upper_right_id) * reverse_upper[i, k] * reverse_upper[j, l]
-    end
+        # Project from lower left element
+        for l in eachnode(dg), k in eachnode(dg)
+            acc += get_node_vars(old_u, equations, dg, k, l, lower_left_id) *
+                   reverse_lower[i, k] * reverse_lower[j, l]
+        end
 
-    # Update value
-    set_node_vars!(u, acc, equations, dg, i, j, element_id)
-  end
-end
+        # Project from lower right element
+        for l in eachnode(dg), k in eachnode(dg)
+            acc += get_node_vars(old_u, equations, dg, k, l, lower_right_id) *
+                   reverse_upper[i, k] * reverse_lower[j, l]
+        end
 
+        # Project from upper left element
+        for l in eachnode(dg), k in eachnode(dg)
+            acc += get_node_vars(old_u, equations, dg, k, l, upper_left_id) *
+                   reverse_lower[i, k] * reverse_upper[j, l]
+        end
 
-# this method is called when an `ControllerThreeLevel` is constructed
-function create_cache(::Type{ControllerThreeLevel}, mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations, dg::DG, cache)
+        # Project from upper right element
+        for l in eachnode(dg), k in eachnode(dg)
+            acc += get_node_vars(old_u, equations, dg, k, l, upper_right_id) *
+                   reverse_upper[i, k] * reverse_upper[j, l]
+        end
 
-  controller_value = Vector{Int}(undef, nelements(dg, cache))
-  return (; controller_value)
+        # Update value
+        set_node_vars!(u, acc, equations, dg, i, j, element_id)
+    end
 end
 
-function create_cache(::Type{ControllerThreeLevelCombined}, mesh::TreeMesh{2}, equations, dg::DG, cache)
-
-  controller_value = Vector{Int}(undef, nelements(dg, cache))
-  return (; controller_value)
+# this method is called when an `ControllerThreeLevel` is constructed
+function create_cache(::Type{ControllerThreeLevel},
+                      mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations, dg::DG, cache)
+    controller_value = Vector{Int}(undef, nelements(dg, cache))
+    return (; controller_value)
 end
 
-
+function create_cache(::Type{ControllerThreeLevelCombined}, mesh::TreeMesh{2},
+                      equations, dg::DG, cache)
+    controller_value = Vector{Int}(undef, nelements(dg, cache))
+    return (; controller_value)
+end
 end # @muladd
diff --git a/src/callbacks_step/amr_dg3d.jl b/src/callbacks_step/amr_dg3d.jl
index 44f73547efc..c8abe6fdb05 100644
--- a/src/callbacks_step/amr_dg3d.jl
+++ b/src/callbacks_step/amr_dg3d.jl
@@ -3,302 +3,310 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Refine elements in the DG solver based on a list of cell_ids that should be refined
 function refine!(u_ode::AbstractVector, adaptor,
                  mesh::Union{TreeMesh{3}, P4estMesh{3}},
                  equations, dg::DGSEM, cache, elements_to_refine)
-  # Return early if there is nothing to do
-  if isempty(elements_to_refine)
-    if mpi_isparallel()
-      # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do
-      # locally (there still might be other MPI ranks that have refined elements)
-      reinitialize_containers!(mesh, equations, dg, cache)
+    # Return early if there is nothing to do
+    if isempty(elements_to_refine)
+        if mpi_isparallel()
+            # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do
+            # locally (there still might be other MPI ranks that have refined elements)
+            reinitialize_containers!(mesh, equations, dg, cache)
+        end
+        return
     end
-    return
-  end
-
-  # Determine for each existing element whether it needs to be refined
-  needs_refinement = falses(nelements(dg, cache))
-  needs_refinement[elements_to_refine] .= true
-
-  # Retain current solution data
-  old_n_elements = nelements(dg, cache)
-  old_u_ode = copy(u_ode)
-  GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
-    old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
-
-    reinitialize_containers!(mesh, equations, dg, cache)
-
-    resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
-    u = wrap_array(u_ode, mesh, equations, dg, cache)
-
-    # Loop over all elements in old container and either copy them or refine them
-    u_tmp1 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg))
-    u_tmp2 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg))
-    element_id = 1
-    for old_element_id in 1:old_n_elements
-      if needs_refinement[old_element_id]
-        # Refine element and store solution directly in new data structure
-        refine_element!(u, element_id, old_u, old_element_id,
-                        adaptor, equations, dg, u_tmp1, u_tmp2)
-        element_id += 2^ndims(mesh)
-      else
-        # Copy old element data to new element container
-        @views u[:, .., element_id] .= old_u[:, .., old_element_id]
-        element_id += 1
-      end
+
+    # Determine for each existing element whether it needs to be refined
+    needs_refinement = falses(nelements(dg, cache))
+    needs_refinement[elements_to_refine] .= true
+
+    # Retain current solution data
+    old_n_elements = nelements(dg, cache)
+    old_u_ode = copy(u_ode)
+    GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
+        old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
+
+        reinitialize_containers!(mesh, equations, dg, cache)
+
+        resize!(u_ode,
+                nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
+        u = wrap_array(u_ode, mesh, equations, dg, cache)
+
+        # Loop over all elements in old container and either copy them or refine them
+        u_tmp1 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg),
+                                     nnodes(dg), nnodes(dg))
+        u_tmp2 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg),
+                                     nnodes(dg), nnodes(dg))
+        element_id = 1
+        for old_element_id in 1:old_n_elements
+            if needs_refinement[old_element_id]
+                # Refine element and store solution directly in new data structure
+                refine_element!(u, element_id, old_u, old_element_id,
+                                adaptor, equations, dg, u_tmp1, u_tmp2)
+                element_id += 2^ndims(mesh)
+            else
+                # Copy old element data to new element container
+                @views u[:, .., element_id] .= old_u[:, .., old_element_id]
+                element_id += 1
+            end
+        end
+        # If everything is correct, we should have processed all elements.
+        # Depending on whether the last element processed above had to be refined or not,
+        # the counter `element_id` can have two different values at the end.
+        @assert element_id ==
+                nelements(dg, cache) +
+                1||element_id == nelements(dg, cache) + 2^ndims(mesh) "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))"
+    end # GC.@preserve old_u_ode
+
+    # Sanity check
+    if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0
+        @assert ninterfaces(cache.interfaces)==ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements")
     end
-    # If everything is correct, we should have processed all elements.
-    # Depending on whether the last element processed above had to be refined or not,
-    # the counter `element_id` can have two different values at the end.
-    @assert element_id == nelements(dg, cache) + 1 || element_id == nelements(dg, cache) + 2^ndims(mesh) "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))"
-  end # GC.@preserve old_u_ode
-
-  # Sanity check
-  if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0
-    @assert ninterfaces(cache.interfaces) == ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements")
-  end
-
-  return nothing
-end
 
+    return nothing
+end
 
 # TODO: Taal compare performance of different implementations
 # Refine solution data u for an element, using L2 projection (interpolation)
-function refine_element!(u::AbstractArray{<:Any,5}, element_id,
+function refine_element!(u::AbstractArray{<:Any, 5}, element_id,
                          old_u, old_element_id,
                          adaptor::LobattoLegendreAdaptorL2, equations, dg,
                          u_tmp1, u_tmp2)
-  @unpack forward_upper, forward_lower = adaptor
-
-  # Store new element ids
-  bottom_lower_left_id  = element_id
-  bottom_lower_right_id = element_id + 1
-  bottom_upper_left_id  = element_id + 2
-  bottom_upper_right_id = element_id + 3
-  top_lower_left_id     = element_id + 4
-  top_lower_right_id    = element_id + 5
-  top_upper_left_id     = element_id + 6
-  top_upper_right_id    = element_id + 7
-
-  @boundscheck begin
-    @assert old_element_id >= 1
-    @assert size(old_u, 1) == nvariables(equations)
-    @assert size(old_u, 2) == nnodes(dg)
-    @assert size(old_u, 3) == nnodes(dg)
-    @assert size(old_u, 4) == nnodes(dg)
-    @assert size(old_u, 5) >= old_element_id
-    @assert     element_id >= 1
-    @assert size(    u, 1) == nvariables(equations)
-    @assert size(    u, 2) == nnodes(dg)
-    @assert size(    u, 3) == nnodes(dg)
-    @assert size(    u, 4) == nnodes(dg)
-    @assert size(    u, 5) >= element_id + 7
-  end
-
-  # Interpolate to bottom lower left element
-  multiply_dimensionwise!(
-    view(u,     :, :, :, :, bottom_lower_left_id), forward_lower, forward_lower, forward_lower,
-    view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
-
-  # Interpolate to bottom lower right element
-  multiply_dimensionwise!(
-    view(u,     :, :, :, :, bottom_lower_right_id), forward_upper, forward_lower, forward_lower,
-    view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
-
-  # Interpolate to bottom upper left element
-  multiply_dimensionwise!(
-    view(u,     :, :, :, :, bottom_upper_left_id), forward_lower, forward_upper, forward_lower,
-    view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
-
-  # Interpolate to bottom upper right element
-  multiply_dimensionwise!(
-    view(u,     :, :, :, :, bottom_upper_right_id), forward_upper, forward_upper, forward_lower,
-    view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
-
-  # Interpolate to top lower left element
-  multiply_dimensionwise!(
-    view(u,     :, :, :, :, top_lower_left_id), forward_lower, forward_lower, forward_upper,
-    view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
-
-  # Interpolate to top lower right element
-  multiply_dimensionwise!(
-    view(u,     :, :, :, :, top_lower_right_id), forward_upper, forward_lower, forward_upper,
-    view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
-
-  # Interpolate to top upper left element
-  multiply_dimensionwise!(
-    view(u,     :, :, :, :, top_upper_left_id), forward_lower, forward_upper, forward_upper,
-    view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
-
-  # Interpolate to top upper right element
-  multiply_dimensionwise!(
-    view(u,     :, :, :, :, top_upper_right_id), forward_upper, forward_upper, forward_upper,
-    view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
-
-  return nothing
-end
-
+    @unpack forward_upper, forward_lower = adaptor
+
+    # Store new element ids
+    bottom_lower_left_id = element_id
+    bottom_lower_right_id = element_id + 1
+    bottom_upper_left_id = element_id + 2
+    bottom_upper_right_id = element_id + 3
+    top_lower_left_id = element_id + 4
+    top_lower_right_id = element_id + 5
+    top_upper_left_id = element_id + 6
+    top_upper_right_id = element_id + 7
+
+    @boundscheck begin
+        @assert old_element_id >= 1
+        @assert size(old_u, 1) == nvariables(equations)
+        @assert size(old_u, 2) == nnodes(dg)
+        @assert size(old_u, 3) == nnodes(dg)
+        @assert size(old_u, 4) == nnodes(dg)
+        @assert size(old_u, 5) >= old_element_id
+        @assert element_id >= 1
+        @assert size(u, 1) == nvariables(equations)
+        @assert size(u, 2) == nnodes(dg)
+        @assert size(u, 3) == nnodes(dg)
+        @assert size(u, 4) == nnodes(dg)
+        @assert size(u, 5) >= element_id + 7
+    end
 
+    # Interpolate to bottom lower left element
+    multiply_dimensionwise!(view(u, :, :, :, :, bottom_lower_left_id), forward_lower,
+                            forward_lower, forward_lower,
+                            view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
+
+    # Interpolate to bottom lower right element
+    multiply_dimensionwise!(view(u, :, :, :, :, bottom_lower_right_id), forward_upper,
+                            forward_lower, forward_lower,
+                            view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
+
+    # Interpolate to bottom upper left element
+    multiply_dimensionwise!(view(u, :, :, :, :, bottom_upper_left_id), forward_lower,
+                            forward_upper, forward_lower,
+                            view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
+
+    # Interpolate to bottom upper right element
+    multiply_dimensionwise!(view(u, :, :, :, :, bottom_upper_right_id), forward_upper,
+                            forward_upper, forward_lower,
+                            view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
+
+    # Interpolate to top lower left element
+    multiply_dimensionwise!(view(u, :, :, :, :, top_lower_left_id), forward_lower,
+                            forward_lower, forward_upper,
+                            view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
+
+    # Interpolate to top lower right element
+    multiply_dimensionwise!(view(u, :, :, :, :, top_lower_right_id), forward_upper,
+                            forward_lower, forward_upper,
+                            view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
+
+    # Interpolate to top upper left element
+    multiply_dimensionwise!(view(u, :, :, :, :, top_upper_left_id), forward_lower,
+                            forward_upper, forward_upper,
+                            view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
+
+    # Interpolate to top upper right element
+    multiply_dimensionwise!(view(u, :, :, :, :, top_upper_right_id), forward_upper,
+                            forward_upper, forward_upper,
+                            view(old_u, :, :, :, :, old_element_id), u_tmp1, u_tmp2)
+
+    return nothing
+end
 
 # Coarsen elements in the DG solver based on a list of cell_ids that should be removed
 function coarsen!(u_ode::AbstractVector, adaptor,
                   mesh::Union{TreeMesh{3}, P4estMesh{3}},
                   equations, dg::DGSEM, cache, elements_to_remove)
-  # Return early if there is nothing to do
-  if isempty(elements_to_remove)
-    if mpi_isparallel()
-      # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do
-      # locally (there still might be other MPI ranks that have coarsened elements)
-      reinitialize_containers!(mesh, equations, dg, cache)
-    end
-    return
-  end
-
-  # Determine for each old element whether it needs to be removed
-  to_be_removed = falses(nelements(dg, cache))
-  to_be_removed[elements_to_remove] .= true
-
-  # Retain current solution data
-  old_n_elements = nelements(dg, cache)
-  old_u_ode = copy(u_ode)
-  GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
-    old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
-
-    reinitialize_containers!(mesh, equations, dg, cache)
-
-    resize!(u_ode, nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
-    u = wrap_array(u_ode, mesh, equations, dg, cache)
-
-    # Loop over all elements in old container and either copy them or coarsen them
-    u_tmp1 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg))
-    u_tmp2 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg))
-    skip = 0
-    element_id = 1
-    for old_element_id in 1:old_n_elements
-      # If skip is non-zero, we just coarsened 2^ndims elements and need to omit the following elements
-      if skip > 0
-        skip -= 1
-        continue
-      end
-
-      if to_be_removed[old_element_id]
-        # If an element is to be removed, sanity check if the following elements
-        # are also marked - otherwise there would be an error in the way the
-        # cells/elements are sorted
-        @assert all(to_be_removed[old_element_id:(old_element_id+2^ndims(mesh)-1)]) "bad cell/element order"
-
-        # Coarsen elements and store solution directly in new data structure
-        coarsen_elements!(u, element_id, old_u, old_element_id,
-                          adaptor, equations, dg, u_tmp1, u_tmp2)
-        element_id += 1
-        skip = 2^ndims(mesh) - 1
-      else
-        # Copy old element data to new element container
-        @views u[:, .., element_id] .= old_u[:, .., old_element_id]
-        element_id += 1
-      end
+    # Return early if there is nothing to do
+    if isempty(elements_to_remove)
+        if mpi_isparallel()
+            # MPICache init uses all-to-all communication -> reinitialize even if there is nothing to do
+            # locally (there still might be other MPI ranks that have coarsened elements)
+            reinitialize_containers!(mesh, equations, dg, cache)
+        end
+        return
     end
-    # If everything is correct, we should have processed all elements.
-    @assert element_id == nelements(dg, cache) + 1 "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))"
-  end # GC.@preserve old_u_ode
 
-  # Sanity check
-  if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0
-    @assert ninterfaces(cache.interfaces) == ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements")
-  end
+    # Determine for each old element whether it needs to be removed
+    to_be_removed = falses(nelements(dg, cache))
+    to_be_removed[elements_to_remove] .= true
+
+    # Retain current solution data
+    old_n_elements = nelements(dg, cache)
+    old_u_ode = copy(u_ode)
+    GC.@preserve old_u_ode begin # OBS! If we don't GC.@preserve old_u_ode, it might be GC'ed
+        old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
+
+        reinitialize_containers!(mesh, equations, dg, cache)
+
+        resize!(u_ode,
+                nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
+        u = wrap_array(u_ode, mesh, equations, dg, cache)
+
+        # Loop over all elements in old container and either copy them or coarsen them
+        u_tmp1 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg),
+                                     nnodes(dg), nnodes(dg))
+        u_tmp2 = Array{eltype(u), 4}(undef, nvariables(equations), nnodes(dg),
+                                     nnodes(dg), nnodes(dg))
+        skip = 0
+        element_id = 1
+        for old_element_id in 1:old_n_elements
+            # If skip is non-zero, we just coarsened 2^ndims elements and need to omit the following elements
+            if skip > 0
+                skip -= 1
+                continue
+            end
+
+            if to_be_removed[old_element_id]
+                # If an element is to be removed, sanity check if the following elements
+                # are also marked - otherwise there would be an error in the way the
+                # cells/elements are sorted
+                @assert all(to_be_removed[old_element_id:(old_element_id + 2^ndims(mesh) - 1)]) "bad cell/element order"
+
+                # Coarsen elements and store solution directly in new data structure
+                coarsen_elements!(u, element_id, old_u, old_element_id,
+                                  adaptor, equations, dg, u_tmp1, u_tmp2)
+                element_id += 1
+                skip = 2^ndims(mesh) - 1
+            else
+                # Copy old element data to new element container
+                @views u[:, .., element_id] .= old_u[:, .., old_element_id]
+                element_id += 1
+            end
+        end
+        # If everything is correct, we should have processed all elements.
+        @assert element_id==nelements(dg, cache) + 1 "element_id = $element_id, nelements(dg, cache) = $(nelements(dg, cache))"
+    end # GC.@preserve old_u_ode
+
+    # Sanity check
+    if mesh isa TreeMesh && isperiodic(mesh.tree) && nmortars(cache.mortars) == 0
+        @assert ninterfaces(cache.interfaces)==ndims(mesh) * nelements(dg, cache) ("For $(ndims(mesh))D and periodic domains and conforming elements, the number of interfaces must be $(ndims(mesh)) times the number of elements")
+    end
 
-  return nothing
+    return nothing
 end
 
-
 # TODO: Taal compare performance of different implementations
 # Coarsen solution data u for four elements, using L2 projection
-function coarsen_elements!(u::AbstractArray{<:Any,5}, element_id,
+function coarsen_elements!(u::AbstractArray{<:Any, 5}, element_id,
                            old_u, old_element_id,
                            adaptor::LobattoLegendreAdaptorL2, equations, dg,
                            u_tmp1, u_tmp2)
-  @unpack reverse_upper, reverse_lower = adaptor
-
-  # Store old element ids
-  bottom_lower_left_id  = old_element_id
-  bottom_lower_right_id = old_element_id + 1
-  bottom_upper_left_id  = old_element_id + 2
-  bottom_upper_right_id = old_element_id + 3
-  top_lower_left_id     = old_element_id + 4
-  top_lower_right_id    = old_element_id + 5
-  top_upper_left_id     = old_element_id + 6
-  top_upper_right_id    = old_element_id + 7
-
-  @boundscheck begin
-    @assert old_element_id >= 1
-    @assert size(old_u, 1) == nvariables(equations)
-    @assert size(old_u, 2) == nnodes(dg)
-    @assert size(old_u, 3) == nnodes(dg)
-    @assert size(old_u, 4) == nnodes(dg)
-    @assert size(old_u, 5) >= old_element_id + 7
-    @assert     element_id >= 1
-    @assert size(    u, 1) == nvariables(equations)
-    @assert size(    u, 2) == nnodes(dg)
-    @assert size(    u, 3) == nnodes(dg)
-    @assert size(    u, 4) == nnodes(dg)
-    @assert size(    u, 5) >= element_id
-  end
-
-  # Project from bottom lower left element
-  multiply_dimensionwise!(
-    view(u,     :, :, :, :, element_id), reverse_lower, reverse_lower, reverse_lower,
-    view(old_u, :, :, :, :, bottom_lower_left_id), u_tmp1, u_tmp2)
-
-  # Project from bottom lower right element_variables
-  add_multiply_dimensionwise!(
-    view(u,     :, :, :, :, element_id), reverse_upper, reverse_lower, reverse_lower,
-    view(old_u, :, :, :, :, bottom_lower_right_id), u_tmp1, u_tmp2)
-
-  # Project from bottom upper left element
-  add_multiply_dimensionwise!(
-    view(u,     :, :, :, :, element_id), reverse_lower, reverse_upper, reverse_lower,
-    view(old_u, :, :, :, :, bottom_upper_left_id), u_tmp1, u_tmp2)
-
-  # Project from bottom upper right element
-  add_multiply_dimensionwise!(
-    view(u,     :, :, :, :, element_id), reverse_upper, reverse_upper, reverse_lower,
-    view(old_u, :, :, :, :, bottom_upper_right_id), u_tmp1, u_tmp2)
-
-  # Project from top lower left element
-  add_multiply_dimensionwise!(
-    view(u,     :, :, :, :, element_id), reverse_lower, reverse_lower, reverse_upper,
-    view(old_u, :, :, :, :, top_lower_left_id), u_tmp1, u_tmp2)
-
-  # Project from top lower right element
-  add_multiply_dimensionwise!(
-    view(u,     :, :, :, :, element_id), reverse_upper, reverse_lower, reverse_upper,
-    view(old_u, :, :, :, :, top_lower_right_id), u_tmp1, u_tmp2)
-
-  # Project from top upper left element
-  add_multiply_dimensionwise!(
-    view(u,     :, :, :, :, element_id), reverse_lower, reverse_upper, reverse_upper,
-    view(old_u, :, :, :, :, top_upper_left_id), u_tmp1, u_tmp2)
-
-  # Project from top upper right element
-  add_multiply_dimensionwise!(
-    view(u,     :, :, :, :, element_id), reverse_upper, reverse_upper, reverse_upper,
-    view(old_u, :, :, :, :, top_upper_right_id), u_tmp1, u_tmp2)
-
-  return nothing
-end
+    @unpack reverse_upper, reverse_lower = adaptor
+
+    # Store old element ids
+    bottom_lower_left_id = old_element_id
+    bottom_lower_right_id = old_element_id + 1
+    bottom_upper_left_id = old_element_id + 2
+    bottom_upper_right_id = old_element_id + 3
+    top_lower_left_id = old_element_id + 4
+    top_lower_right_id = old_element_id + 5
+    top_upper_left_id = old_element_id + 6
+    top_upper_right_id = old_element_id + 7
+
+    @boundscheck begin
+        @assert old_element_id >= 1
+        @assert size(old_u, 1) == nvariables(equations)
+        @assert size(old_u, 2) == nnodes(dg)
+        @assert size(old_u, 3) == nnodes(dg)
+        @assert size(old_u, 4) == nnodes(dg)
+        @assert size(old_u, 5) >= old_element_id + 7
+        @assert element_id >= 1
+        @assert size(u, 1) == nvariables(equations)
+        @assert size(u, 2) == nnodes(dg)
+        @assert size(u, 3) == nnodes(dg)
+        @assert size(u, 4) == nnodes(dg)
+        @assert size(u, 5) >= element_id
+    end
 
+    # Project from bottom lower left element
+    multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_lower,
+                            reverse_lower, reverse_lower,
+                            view(old_u, :, :, :, :, bottom_lower_left_id), u_tmp1,
+                            u_tmp2)
+
+    # Project from bottom lower right element_variables
+    add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_upper,
+                                reverse_lower, reverse_lower,
+                                view(old_u, :, :, :, :, bottom_lower_right_id), u_tmp1,
+                                u_tmp2)
+
+    # Project from bottom upper left element
+    add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_lower,
+                                reverse_upper, reverse_lower,
+                                view(old_u, :, :, :, :, bottom_upper_left_id), u_tmp1,
+                                u_tmp2)
+
+    # Project from bottom upper right element
+    add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_upper,
+                                reverse_upper, reverse_lower,
+                                view(old_u, :, :, :, :, bottom_upper_right_id), u_tmp1,
+                                u_tmp2)
+
+    # Project from top lower left element
+    add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_lower,
+                                reverse_lower, reverse_upper,
+                                view(old_u, :, :, :, :, top_lower_left_id), u_tmp1,
+                                u_tmp2)
+
+    # Project from top lower right element
+    add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_upper,
+                                reverse_lower, reverse_upper,
+                                view(old_u, :, :, :, :, top_lower_right_id), u_tmp1,
+                                u_tmp2)
+
+    # Project from top upper left element
+    add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_lower,
+                                reverse_upper, reverse_upper,
+                                view(old_u, :, :, :, :, top_upper_left_id), u_tmp1,
+                                u_tmp2)
+
+    # Project from top upper right element
+    add_multiply_dimensionwise!(view(u, :, :, :, :, element_id), reverse_upper,
+                                reverse_upper, reverse_upper,
+                                view(old_u, :, :, :, :, top_upper_right_id), u_tmp1,
+                                u_tmp2)
+
+    return nothing
+end
 
 # this method is called when an `ControllerThreeLevel` is constructed
 function create_cache(::Type{ControllerThreeLevel},
                       mesh::Union{TreeMesh{3}, P4estMesh{3}},
                       equations, dg::DG, cache)
-
-  controller_value = Vector{Int}(undef, nelements(dg, cache))
-  return (; controller_value)
+    controller_value = Vector{Int}(undef, nelements(dg, cache))
+    return (; controller_value)
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index c6a2ee6fb95..2e038401df7 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # TODO: Taal refactor
 # - analysis_interval part as PeriodicCallback called after a certain amount of simulation time
@@ -36,539 +36,557 @@ evaluating the computational performance, such as the total runtime, the perform
 (time/DOF/rhs!), the time spent in garbage collection (GC), or the current memory usage (alloc'd
 memory).
 """
-mutable struct AnalysisCallback{Analyzer, AnalysisIntegrals, InitialStateIntegrals, Cache}
-  start_time::Float64
-  start_time_last_analysis::Float64
-  ncalls_rhs_last_analysis::Int
-  start_gc_time::Float64
-  interval::Int
-  save_analysis::Bool
-  output_directory::String
-  analysis_filename::String
-  analyzer::Analyzer
-  analysis_errors::Vector{Symbol}
-  analysis_integrals::AnalysisIntegrals
-  initial_state_integrals::InitialStateIntegrals
-  cache::Cache
+mutable struct AnalysisCallback{Analyzer, AnalysisIntegrals, InitialStateIntegrals,
+                                Cache}
+    start_time::Float64
+    start_time_last_analysis::Float64
+    ncalls_rhs_last_analysis::Int
+    start_gc_time::Float64
+    interval::Int
+    save_analysis::Bool
+    output_directory::String
+    analysis_filename::String
+    analyzer::Analyzer
+    analysis_errors::Vector{Symbol}
+    analysis_integrals::AnalysisIntegrals
+    initial_state_integrals::InitialStateIntegrals
+    cache::Cache
 end
 
-
 # TODO: Taal bikeshedding, implement a method with less information and the signature
 # function Base.show(io::IO, analysis_callback::AnalysisCallback)
 # end
-function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:AnalysisCallback})
-  @nospecialize cb # reduce precompilation time
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{<:Any, <:AnalysisCallback})
+    @nospecialize cb # reduce precompilation time
 
-  if get(io, :compact, false)
-    show(io, cb)
-  else
-    analysis_callback = cb.affect!
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        analysis_callback = cb.affect!
 
-    setup = Pair{String,Any}[
-             "interval" => analysis_callback.interval,
-             "analyzer" => analysis_callback.analyzer,
-            ]
-    for (idx, error) in enumerate(analysis_callback.analysis_errors)
-      push!(setup, "│ error " * string(idx) => error)
-    end
-    for (idx, integral) in enumerate(analysis_callback.analysis_integrals)
-      push!(setup, "│ integral " * string(idx) => integral)
-    end
-    push!(setup, "save analysis to file" => analysis_callback.save_analysis ? "yes" : "no")
-    if analysis_callback.save_analysis
-      push!(setup, "│ filename" => analysis_callback.analysis_filename)
-      push!(setup, "│ output directory" => abspath(normpath(analysis_callback.output_directory)))
+        setup = Pair{String, Any}["interval" => analysis_callback.interval,
+                                  "analyzer" => analysis_callback.analyzer]
+        for (idx, error) in enumerate(analysis_callback.analysis_errors)
+            push!(setup, "│ error " * string(idx) => error)
+        end
+        for (idx, integral) in enumerate(analysis_callback.analysis_integrals)
+            push!(setup, "│ integral " * string(idx) => integral)
+        end
+        push!(setup,
+              "save analysis to file" => analysis_callback.save_analysis ? "yes" : "no")
+        if analysis_callback.save_analysis
+            push!(setup, "│ filename" => analysis_callback.analysis_filename)
+            push!(setup,
+                  "│ output directory" => abspath(normpath(analysis_callback.output_directory)))
+        end
+        summary_box(io, "AnalysisCallback", setup)
     end
-    summary_box(io, "AnalysisCallback", setup)
-  end
 end
 
-
 function AnalysisCallback(semi::AbstractSemidiscretization; kwargs...)
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  AnalysisCallback(mesh, equations, solver, cache; kwargs...)
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    AnalysisCallback(mesh, equations, solver, cache; kwargs...)
 end
 
 function AnalysisCallback(mesh, equations::AbstractEquations, solver, cache;
-                          interval=0,
-                          save_analysis=false,
-                          output_directory="out",
-                          analysis_filename="analysis.dat",
-                          extra_analysis_errors=Symbol[],
-                          analysis_errors=union(default_analysis_errors(equations), extra_analysis_errors),
-                          extra_analysis_integrals=(),
-                          analysis_integrals=union(default_analysis_integrals(equations), extra_analysis_integrals),
-                          RealT=real(solver),
-                          uEltype=eltype(cache.elements),
+                          interval = 0,
+                          save_analysis = false,
+                          output_directory = "out",
+                          analysis_filename = "analysis.dat",
+                          extra_analysis_errors = Symbol[],
+                          analysis_errors = union(default_analysis_errors(equations),
+                                                  extra_analysis_errors),
+                          extra_analysis_integrals = (),
+                          analysis_integrals = union(default_analysis_integrals(equations),
+                                                     extra_analysis_integrals),
+                          RealT = real(solver),
+                          uEltype = eltype(cache.elements),
                           kwargs...)
-  # Decide when the callback is activated.
-  # With error-based step size control, some steps can be rejected. Thus,
-  #   `integrator.iter >= integrator.stats.naccept`
-  #    (total #steps)       (#accepted steps)
-  # We need to check the number of accepted steps since callbacks are not
-  # activated after a rejected step.
-  condition = (u, t, integrator) -> interval > 0 && ( (integrator.stats.naccept % interval == 0 &&
-                                                       !(integrator.stats.naccept == 0 && integrator.iter > 0)) ||
-                                                     isfinished(integrator))
-
-  analyzer = SolutionAnalyzer(solver; kwargs...)
-  cache_analysis = create_cache_analysis(analyzer, mesh, equations, solver, cache, RealT, uEltype)
-
-  analysis_callback = AnalysisCallback(0.0, 0.0, 0, 0.0,
-                                       interval, save_analysis, output_directory, analysis_filename,
-                                       analyzer,
-                                       analysis_errors, Tuple(analysis_integrals),
-                                       SVector(ntuple(_ -> zero(uEltype), Val(nvariables(equations)))),
-                                       cache_analysis)
-
-  DiscreteCallback(condition, analysis_callback,
-                   save_positions=(false,false),
-                   initialize=initialize!)
+    # Decide when the callback is activated.
+    # With error-based step size control, some steps can be rejected. Thus,
+    #   `integrator.iter >= integrator.stats.naccept`
+    #    (total #steps)       (#accepted steps)
+    # We need to check the number of accepted steps since callbacks are not
+    # activated after a rejected step.
+    condition = (u, t, integrator) -> interval > 0 &&
+        ((integrator.stats.naccept % interval == 0 &&
+          !(integrator.stats.naccept == 0 && integrator.iter > 0)) ||
+         isfinished(integrator))
+
+    analyzer = SolutionAnalyzer(solver; kwargs...)
+    cache_analysis = create_cache_analysis(analyzer, mesh, equations, solver, cache,
+                                           RealT, uEltype)
+
+    analysis_callback = AnalysisCallback(0.0, 0.0, 0, 0.0,
+                                         interval, save_analysis, output_directory,
+                                         analysis_filename,
+                                         analyzer,
+                                         analysis_errors, Tuple(analysis_integrals),
+                                         SVector(ntuple(_ -> zero(uEltype),
+                                                        Val(nvariables(equations)))),
+                                         cache_analysis)
+
+    DiscreteCallback(condition, analysis_callback,
+                     save_positions = (false, false),
+                     initialize = initialize!)
 end
 
+function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, t,
+                     integrator) where {Condition, Affect! <: AnalysisCallback}
+    semi = integrator.p
+    initial_state_integrals = integrate(u_ode, semi)
+    _, equations, _, _ = mesh_equations_solver_cache(semi)
 
-function initialize!(cb::DiscreteCallback{Condition,Affect!}, u_ode, t, integrator) where {Condition, Affect!<:AnalysisCallback}
-  semi = integrator.p
-  initial_state_integrals = integrate(u_ode, semi)
-  _, equations, _, _ = mesh_equations_solver_cache(semi)
-
-  analysis_callback = cb.affect!
-  analysis_callback.initial_state_integrals = initial_state_integrals
-  @unpack save_analysis, output_directory, analysis_filename, analysis_errors, analysis_integrals = analysis_callback
-
-  if save_analysis && mpi_isroot()
-    mkpath(output_directory)
-
-    # write header of output file
-    open(joinpath(output_directory, analysis_filename), "w") do io
-      @printf(io, "#%-8s", "timestep")
-      @printf(io, "  %-14s", "time")
-      @printf(io, "  %-14s", "dt")
-      if :l2_error in analysis_errors
-        for v in varnames(cons2cons, equations)
-          @printf(io, "   %-14s", "l2_" * v)
-        end
-      end
-      if :linf_error in analysis_errors
-        for v in varnames(cons2cons, equations)
-          @printf(io, "   %-14s", "linf_" * v)
-        end
-      end
-      if :conservation_error in analysis_errors
-        for v in varnames(cons2cons, equations)
-          @printf(io, "   %-14s", "cons_" * v)
-        end
-      end
-      if :residual in analysis_errors
-        for v in varnames(cons2cons, equations)
-          @printf(io, "   %-14s", "res_" * v)
-        end
-      end
-      if :l2_error_primitive in analysis_errors
-        for v in varnames(cons2prim, equations)
-          @printf(io, "   %-14s", "l2_" * v)
-        end
-      end
-      if :linf_error_primitive in analysis_errors
-        for v in varnames(cons2prim, equations)
-          @printf(io, "   %-14s", "linf_" * v)
+    analysis_callback = cb.affect!
+    analysis_callback.initial_state_integrals = initial_state_integrals
+    @unpack save_analysis, output_directory, analysis_filename, analysis_errors, analysis_integrals = analysis_callback
+
+    if save_analysis && mpi_isroot()
+        mkpath(output_directory)
+
+        # write header of output file
+        open(joinpath(output_directory, analysis_filename), "w") do io
+            @printf(io, "#%-8s", "timestep")
+            @printf(io, "  %-14s", "time")
+            @printf(io, "  %-14s", "dt")
+            if :l2_error in analysis_errors
+                for v in varnames(cons2cons, equations)
+                    @printf(io, "   %-14s", "l2_"*v)
+                end
+            end
+            if :linf_error in analysis_errors
+                for v in varnames(cons2cons, equations)
+                    @printf(io, "   %-14s", "linf_"*v)
+                end
+            end
+            if :conservation_error in analysis_errors
+                for v in varnames(cons2cons, equations)
+                    @printf(io, "   %-14s", "cons_"*v)
+                end
+            end
+            if :residual in analysis_errors
+                for v in varnames(cons2cons, equations)
+                    @printf(io, "   %-14s", "res_"*v)
+                end
+            end
+            if :l2_error_primitive in analysis_errors
+                for v in varnames(cons2prim, equations)
+                    @printf(io, "   %-14s", "l2_"*v)
+                end
+            end
+            if :linf_error_primitive in analysis_errors
+                for v in varnames(cons2prim, equations)
+                    @printf(io, "   %-14s", "linf_"*v)
+                end
+            end
+
+            for quantity in analysis_integrals
+                @printf(io, "   %-14s", pretty_form_ascii(quantity))
+            end
+
+            println(io)
         end
-      end
-
-      for quantity in analysis_integrals
-        @printf(io, "   %-14s", pretty_form_ascii(quantity))
-      end
-
-      println(io)
     end
 
-  end
-
-  # Record current time using a high-resolution clock
-  analysis_callback.start_time = time_ns()
+    # Record current time using a high-resolution clock
+    analysis_callback.start_time = time_ns()
 
-  # Record current time for performance index computation
-  analysis_callback.start_time_last_analysis = time_ns()
+    # Record current time for performance index computation
+    analysis_callback.start_time_last_analysis = time_ns()
 
-  # Record current number of `rhs!` calls for performance index computation
-  analysis_callback.ncalls_rhs_last_analysis = ncalls(semi.performance_counter)
+    # Record current number of `rhs!` calls for performance index computation
+    analysis_callback.ncalls_rhs_last_analysis = ncalls(semi.performance_counter)
 
-  # Record total time spent in garbage collection so far using a high-resolution clock
-  # Note: For details see the actual callback function below
-  analysis_callback.start_gc_time = Base.gc_time_ns()
+    # Record total time spent in garbage collection so far using a high-resolution clock
+    # Note: For details see the actual callback function below
+    analysis_callback.start_gc_time = Base.gc_time_ns()
 
-  analysis_callback(integrator)
-  return nothing
+    analysis_callback(integrator)
+    return nothing
 end
 
-
 # TODO: Taal refactor, allow passing an IO object (which could be devnull to avoid cluttering the console)
 function (analysis_callback::AnalysisCallback)(integrator)
-  semi = integrator.p
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  @unpack dt, t = integrator
-  iter = integrator.stats.naccept
-
-  # Record performance measurements and compute performance index (PID)
-  runtime_since_last_analysis = 1.0e-9 * (time_ns() - analysis_callback.start_time_last_analysis)
-  # PID is an MPI-aware measure of how much time per global degree of freedom (i.e., over all ranks)
-  # and per `rhs!` evaluation is required. MPI-aware means that it essentially adds up the time
-  # spent on each MPI rank. Thus, in an ideally parallelized program, the PID should be constant
-  # independent of the number of MPI ranks used, since, e.g., using 4x the number of ranks should
-  # divide the runtime on each rank by 4. See also the Trixi.jl docs ("Performance" section) for
-  # more information.
-  ncalls_rhs_since_last_analysis = (ncalls(semi.performance_counter)
-                                    - analysis_callback.ncalls_rhs_last_analysis)
-  performance_index = runtime_since_last_analysis * mpi_nranks() / (ndofsglobal(mesh, solver, cache)
-                                                                    * ncalls_rhs_since_last_analysis)
-
-  # Compute the total runtime since the analysis callback has been initialized, in seconds
-  runtime_absolute = 1.0e-9 * (time_ns() - analysis_callback.start_time)
-
-  # Compute the relative runtime as time spent in `rhs!` divided by the number of calls to `rhs!`
-  # and the number of local degrees of freedom
-  # OBS! This computation must happen *after* the PID computation above, since `take!(...)`
-  #      will reset the number of calls to `rhs!`
-  runtime_relative = 1.0e-9 * take!(semi.performance_counter) / ndofs(semi)
-
-  # Compute the total time spent in garbage collection since the analysis callback has been
-  # initialized, in seconds
-  # Note: `Base.gc_time_ns()` is not part of the public Julia API but has been available at least
-  #        since Julia 1.6. Should this function be removed without replacement in a future Julia
-  #        release, just delete this analysis quantity from the callback.
-  # Source: https://github.com/JuliaLang/julia/blob/b540315cb4bd91e6f3a3e4ab8129a58556947628/base/timing.jl#L83-L84
-  gc_time_absolute = 1.0e-9 * (Base.gc_time_ns() - analysis_callback.start_gc_time)
-
-  # Compute the percentage of total time that was spent in garbage collection
-  gc_time_percentage = gc_time_absolute / runtime_absolute
-
-  # Obtain the current memory usage of the Julia garbage collector, in MiB, i.e., the total size of
-  # objects in memory that have been allocated by the JIT compiler or the user code.
-  # Note: `Base.gc_live_bytes()` is not part of the public Julia API but has been available at least
-  #        since Julia 1.6. Should this function be removed without replacement in a future Julia
-  #        release, just delete this analysis quantity from the callback.
-  # Source: https://github.com/JuliaLang/julia/blob/b540315cb4bd91e6f3a3e4ab8129a58556947628/base/timing.jl#L86-L97
-  memory_use = Base.gc_live_bytes() / 2^20 # bytes -> MiB
-
-  @trixi_timeit timer() "analyze solution" begin
-    # General information
-    mpi_println()
-    mpi_println("─"^100)
-    # TODO: Taal refactor, polydeg is specific to DGSEM
-    mpi_println(" Simulation running '", get_name(equations), "' with ", summary(solver))
-    mpi_println("─"^100)
-    mpi_println(" #timesteps:     " * @sprintf("% 14d", iter) *
-                "               " *
-                " run time:       " * @sprintf("%10.8e s", runtime_absolute))
-    mpi_println(" Δt:             " * @sprintf("%10.8e", dt) *
-                "               " *
-                " └── GC time:    " * @sprintf("%10.8e s (%5.3f%%)", gc_time_absolute, gc_time_percentage))
-    mpi_println(" sim. time:      " * @sprintf("%10.8e", t) *
-                "               " *
-                " time/DOF/rhs!:  " * @sprintf("%10.8e s", runtime_relative))
-    mpi_println("                 " * "              " *
-                "               " *
-                " PID:            " * @sprintf("%10.8e s", performance_index))
-    mpi_println(" #DOF:           " * @sprintf("% 14d", ndofs(semi)) *
-                "               " *
-                " alloc'd memory: " * @sprintf("%14.3f MiB", memory_use))
-    mpi_println(" #elements:      " * @sprintf("% 14d", nelements(mesh, solver, cache)))
-
-    # Level information (only show for AMR)
-    print_amr_information(integrator.opts.callback, mesh, solver, cache)
-    mpi_println()
-
-    # Open file for appending and store time step and time information
-    if mpi_isroot() && analysis_callback.save_analysis
-      io = open(joinpath(analysis_callback.output_directory, analysis_callback.analysis_filename), "a")
-      @printf(io, "% 9d", iter)
-      @printf(io, "  %10.8e", t)
-      @printf(io, "  %10.8e", dt)
-    else
-      io = devnull
-    end
-
-    # Calculate current time derivative (needed for semidiscrete entropy time derivative, residual, etc.)
-    du_ode = first(get_tmp_cache(integrator))
-    # `integrator.f` is usually just a call to `rhs!`
-    # However, we want to allow users to modify the ODE RHS outside of Trixi.jl
-    # and allow us to pass a combined ODE RHS to OrdinaryDiffEq, e.g., for
-    # hyperbolic-parabolic systems.
-    @notimeit timer() integrator.f(du_ode, integrator.u, semi, t)
-    u  = wrap_array(integrator.u, mesh, equations, solver, cache)
-    du = wrap_array(du_ode,       mesh, equations, solver, cache)
-    l2_error, linf_error = analysis_callback(io, du, u, integrator.u, t, semi)
-
-    mpi_println("─"^100)
-    mpi_println()
+    semi = integrator.p
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    @unpack dt, t = integrator
+    iter = integrator.stats.naccept
+
+    # Record performance measurements and compute performance index (PID)
+    runtime_since_last_analysis = 1.0e-9 * (time_ns() -
+                                   analysis_callback.start_time_last_analysis)
+    # PID is an MPI-aware measure of how much time per global degree of freedom (i.e., over all ranks)
+    # and per `rhs!` evaluation is required. MPI-aware means that it essentially adds up the time
+    # spent on each MPI rank. Thus, in an ideally parallelized program, the PID should be constant
+    # independent of the number of MPI ranks used, since, e.g., using 4x the number of ranks should
+    # divide the runtime on each rank by 4. See also the Trixi.jl docs ("Performance" section) for
+    # more information.
+    ncalls_rhs_since_last_analysis = (ncalls(semi.performance_counter)
+                                      -
+                                      analysis_callback.ncalls_rhs_last_analysis)
+    performance_index = runtime_since_last_analysis * mpi_nranks() /
+                        (ndofsglobal(mesh, solver, cache)
+                         *
+                         ncalls_rhs_since_last_analysis)
+
+    # Compute the total runtime since the analysis callback has been initialized, in seconds
+    runtime_absolute = 1.0e-9 * (time_ns() - analysis_callback.start_time)
+
+    # Compute the relative runtime as time spent in `rhs!` divided by the number of calls to `rhs!`
+    # and the number of local degrees of freedom
+    # OBS! This computation must happen *after* the PID computation above, since `take!(...)`
+    #      will reset the number of calls to `rhs!`
+    runtime_relative = 1.0e-9 * take!(semi.performance_counter) / ndofs(semi)
+
+    # Compute the total time spent in garbage collection since the analysis callback has been
+    # initialized, in seconds
+    # Note: `Base.gc_time_ns()` is not part of the public Julia API but has been available at least
+    #        since Julia 1.6. Should this function be removed without replacement in a future Julia
+    #        release, just delete this analysis quantity from the callback.
+    # Source: https://github.com/JuliaLang/julia/blob/b540315cb4bd91e6f3a3e4ab8129a58556947628/base/timing.jl#L83-L84
+    gc_time_absolute = 1.0e-9 * (Base.gc_time_ns() - analysis_callback.start_gc_time)
+
+    # Compute the percentage of total time that was spent in garbage collection
+    gc_time_percentage = gc_time_absolute / runtime_absolute
+
+    # Obtain the current memory usage of the Julia garbage collector, in MiB, i.e., the total size of
+    # objects in memory that have been allocated by the JIT compiler or the user code.
+    # Note: `Base.gc_live_bytes()` is not part of the public Julia API but has been available at least
+    #        since Julia 1.6. Should this function be removed without replacement in a future Julia
+    #        release, just delete this analysis quantity from the callback.
+    # Source: https://github.com/JuliaLang/julia/blob/b540315cb4bd91e6f3a3e4ab8129a58556947628/base/timing.jl#L86-L97
+    memory_use = Base.gc_live_bytes() / 2^20 # bytes -> MiB
+
+    @trixi_timeit timer() "analyze solution" begin
+        # General information
+        mpi_println()
+        mpi_println("─"^100)
+        mpi_println(" Simulation running '", get_name(equations), "' with ",
+                    summary(solver))
+        mpi_println("─"^100)
+        mpi_println(" #timesteps:     " * @sprintf("% 14d", iter) *
+                    "               " *
+                    " run time:       " * @sprintf("%10.8e s", runtime_absolute))
+        mpi_println(" Δt:             " * @sprintf("%10.8e", dt) *
+                    "               " *
+                    " └── GC time:    " *
+                    @sprintf("%10.8e s (%5.3f%%)", gc_time_absolute, gc_time_percentage))
+        mpi_println(" sim. time:      " * @sprintf("%10.8e", t) *
+                    "               " *
+                    " time/DOF/rhs!:  " * @sprintf("%10.8e s", runtime_relative))
+        mpi_println("                 " * "              " *
+                    "               " *
+                    " PID:            " * @sprintf("%10.8e s", performance_index))
+        mpi_println(" #DOF:           " * @sprintf("% 14d", ndofs(semi)) *
+                    "               " *
+                    " alloc'd memory: " * @sprintf("%14.3f MiB", memory_use))
+        mpi_println(" #elements:      " *
+                    @sprintf("% 14d", nelements(mesh, solver, cache)))
+
+        # Level information (only show for AMR)
+        print_amr_information(integrator.opts.callback, mesh, solver, cache)
+        mpi_println()
+
+        # Open file for appending and store time step and time information
+        if mpi_isroot() && analysis_callback.save_analysis
+            io = open(joinpath(analysis_callback.output_directory,
+                               analysis_callback.analysis_filename), "a")
+            @printf(io, "% 9d", iter)
+            @printf(io, "  %10.8e", t)
+            @printf(io, "  %10.8e", dt)
+        else
+            io = devnull
+        end
 
-    # Add line break and close analysis file if it was opened
-    if mpi_isroot() && analysis_callback.save_analysis
-      # This resolves a possible type instability introduced above, since `io`
-      # can either be an `IOStream` or `devnull`, but we know that it must be
-      # an `IOStream here`.
-      println(io::IOStream)
-      close(io::IOStream)
+        # Calculate current time derivative (needed for semidiscrete entropy time derivative, residual, etc.)
+        du_ode = first(get_tmp_cache(integrator))
+        # `integrator.f` is usually just a call to `rhs!`
+        # However, we want to allow users to modify the ODE RHS outside of Trixi.jl
+        # and allow us to pass a combined ODE RHS to OrdinaryDiffEq, e.g., for
+        # hyperbolic-parabolic systems.
+        @notimeit timer() integrator.f(du_ode, integrator.u, semi, t)
+        u = wrap_array(integrator.u, mesh, equations, solver, cache)
+        du = wrap_array(du_ode, mesh, equations, solver, cache)
+        l2_error, linf_error = analysis_callback(io, du, u, integrator.u, t, semi)
+
+        mpi_println("─"^100)
+        mpi_println()
+
+        # Add line break and close analysis file if it was opened
+        if mpi_isroot() && analysis_callback.save_analysis
+            # This resolves a possible type instability introduced above, since `io`
+            # can either be an `IOStream` or `devnull`, but we know that it must be
+            # an `IOStream here`.
+            println(io::IOStream)
+            close(io::IOStream)
+        end
     end
-  end
 
-  # avoid re-evaluating possible FSAL stages
-  u_modified!(integrator, false)
+    # avoid re-evaluating possible FSAL stages
+    u_modified!(integrator, false)
 
-  # Reset performance measurements
-  analysis_callback.start_time_last_analysis = time_ns()
-  analysis_callback.ncalls_rhs_last_analysis = ncalls(semi.performance_counter)
+    # Reset performance measurements
+    analysis_callback.start_time_last_analysis = time_ns()
+    analysis_callback.ncalls_rhs_last_analysis = ncalls(semi.performance_counter)
 
-  # Return errors for EOC analysis
-  return l2_error, linf_error
+    # Return errors for EOC analysis
+    return l2_error, linf_error
 end
 
-
 # This method is just called internally from `(analysis_callback::AnalysisCallback)(integrator)`
 # and serves as a function barrier. Additionally, it makes the code easier to profile and optimize.
 function (analysis_callback::AnalysisCallback)(io, du, u, u_ode, t, semi)
-  @unpack analyzer, analysis_errors, analysis_integrals = analysis_callback
-  cache_analysis = analysis_callback.cache
-  _, equations, _, _ = mesh_equations_solver_cache(semi)
-
-  # Calculate and print derived quantities (error norms, entropy etc.)
-  # Variable names required for L2 error, Linf error, and conservation error
-  if any(q in analysis_errors for q in
-         (:l2_error, :linf_error, :conservation_error, :residual)) && mpi_isroot()
-    print(" Variable:    ")
-    for v in eachvariable(equations)
-      @printf("   %-14s", varnames(cons2cons, equations)[v])
-    end
-    println()
-  end
-
-  # Calculate L2/Linf errors, which are also returned
-  l2_error, linf_error = calc_error_norms(u_ode, t, analyzer, semi, cache_analysis)
-
-  if mpi_isroot()
-    # L2 error
-    if :l2_error in analysis_errors
-      print(" L2 error:    ")
-      for v in eachvariable(equations)
-        @printf("  % 10.8e", l2_error[v])
-        @printf(io, "  % 10.8e", l2_error[v])
-      end
-      println()
-    end
-
-    # Linf error
-    if :linf_error in analysis_errors
-      print(" Linf error:  ")
-      for v in eachvariable(equations)
-        @printf("  % 10.8e", linf_error[v])
-        @printf(io, "  % 10.8e", linf_error[v])
-      end
-      println()
+    @unpack analyzer, analysis_errors, analysis_integrals = analysis_callback
+    cache_analysis = analysis_callback.cache
+    _, equations, _, _ = mesh_equations_solver_cache(semi)
+
+    # Calculate and print derived quantities (error norms, entropy etc.)
+    # Variable names required for L2 error, Linf error, and conservation error
+    if any(q in analysis_errors
+           for q in (:l2_error, :linf_error, :conservation_error, :residual)) &&
+       mpi_isroot()
+        print(" Variable:    ")
+        for v in eachvariable(equations)
+            @printf("   %-14s", varnames(cons2cons, equations)[v])
+        end
+        println()
     end
-  end
-
 
-  # Conservation error
-  if :conservation_error in analysis_errors
-    @unpack initial_state_integrals = analysis_callback
-    state_integrals = integrate(u_ode, semi)
+    # Calculate L2/Linf errors, which are also returned
+    l2_error, linf_error = calc_error_norms(u_ode, t, analyzer, semi, cache_analysis)
 
     if mpi_isroot()
-      print(" |∑U - ∑U₀|:  ")
-      for v in eachvariable(equations)
-        err = abs(state_integrals[v] - initial_state_integrals[v])
-        @printf("  % 10.8e", err)
-        @printf(io, "  % 10.8e", err)
-      end
-      println()
+        # L2 error
+        if :l2_error in analysis_errors
+            print(" L2 error:    ")
+            for v in eachvariable(equations)
+                @printf("  % 10.8e", l2_error[v])
+                @printf(io, "  % 10.8e", l2_error[v])
+            end
+            println()
+        end
+
+        # Linf error
+        if :linf_error in analysis_errors
+            print(" Linf error:  ")
+            for v in eachvariable(equations)
+                @printf("  % 10.8e", linf_error[v])
+                @printf(io, "  % 10.8e", linf_error[v])
+            end
+            println()
+        end
     end
-  end
-
-  # Residual (defined here as the vector maximum of the absolute values of the time derivatives)
-  if :residual in analysis_errors
-    mpi_print(" max(|Uₜ|):   ")
-    for v in eachvariable(equations)
-      # Calculate maximum absolute value of Uₜ
-      res = maximum(abs, view(du, v, ..))
-      if mpi_isparallel()
-        # TODO: Debugging, here is a type instability
-        global_res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm())
+
+    # Conservation error
+    if :conservation_error in analysis_errors
+        @unpack initial_state_integrals = analysis_callback
+        state_integrals = integrate(u_ode, semi)
+
         if mpi_isroot()
-          res::eltype(du) = global_res[]
+            print(" |∑U - ∑U₀|:  ")
+            for v in eachvariable(equations)
+                err = abs(state_integrals[v] - initial_state_integrals[v])
+                @printf("  % 10.8e", err)
+                @printf(io, "  % 10.8e", err)
+            end
+            println()
         end
-      end
-      if mpi_isroot()
-        @printf("  % 10.8e", res)
-        @printf(io, "  % 10.8e", res)
-      end
     end
-    mpi_println()
-  end
-
-  # L2/L∞ errors of the primitive variables
-  if :l2_error_primitive in analysis_errors || :linf_error_primitive in analysis_errors
-    l2_error_prim, linf_error_prim = calc_error_norms(cons2prim, u_ode, t, analyzer, semi, cache_analysis)
 
-    if mpi_isroot()
-      print(" Variable:    ")
-      for v in eachvariable(equations)
-        @printf("   %-14s", varnames(cons2prim, equations)[v])
-      end
-      println()
-
-      # L2 error
-      if :l2_error_primitive in analysis_errors
-        print(" L2 error prim.: ")
+    # Residual (defined here as the vector maximum of the absolute values of the time derivatives)
+    if :residual in analysis_errors
+        mpi_print(" max(|Uₜ|):   ")
         for v in eachvariable(equations)
-          @printf("%10.8e   ", l2_error_prim[v])
-          @printf(io, "  % 10.8e", l2_error_prim[v])
+            # Calculate maximum absolute value of Uₜ
+            res = maximum(abs, view(du, v, ..))
+            if mpi_isparallel()
+                # TODO: Debugging, here is a type instability
+                global_res = MPI.Reduce!(Ref(res), max, mpi_root(), mpi_comm())
+                if mpi_isroot()
+                    res::eltype(du) = global_res[]
+                end
+            end
+            if mpi_isroot()
+                @printf("  % 10.8e", res)
+                @printf(io, "  % 10.8e", res)
+            end
         end
-        println()
-      end
+        mpi_println()
+    end
 
-      # L∞ error
-      if :linf_error_primitive in analysis_errors
-        print(" Linf error pri.:")
-        for v in eachvariable(equations)
-          @printf("%10.8e   ", linf_error_prim[v])
-          @printf(io, "  % 10.8e", linf_error_prim[v])
+    # L2/L∞ errors of the primitive variables
+    if :l2_error_primitive in analysis_errors ||
+       :linf_error_primitive in analysis_errors
+        l2_error_prim, linf_error_prim = calc_error_norms(cons2prim, u_ode, t, analyzer,
+                                                          semi, cache_analysis)
+
+        if mpi_isroot()
+            print(" Variable:    ")
+            for v in eachvariable(equations)
+                @printf("   %-14s", varnames(cons2prim, equations)[v])
+            end
+            println()
+
+            # L2 error
+            if :l2_error_primitive in analysis_errors
+                print(" L2 error prim.: ")
+                for v in eachvariable(equations)
+                    @printf("%10.8e   ", l2_error_prim[v])
+                    @printf(io, "  % 10.8e", l2_error_prim[v])
+                end
+                println()
+            end
+
+            # L∞ error
+            if :linf_error_primitive in analysis_errors
+                print(" Linf error pri.:")
+                for v in eachvariable(equations)
+                    @printf("%10.8e   ", linf_error_prim[v])
+                    @printf(io, "  % 10.8e", linf_error_prim[v])
+                end
+                println()
+            end
         end
-        println()
-      end
     end
-  end
 
-  # additional integrals
-  analyze_integrals(analysis_integrals, io, du, u, t, semi)
+    # additional integrals
+    analyze_integrals(analysis_integrals, io, du, u, t, semi)
 
-  return l2_error, linf_error
+    return l2_error, linf_error
 end
 
-
 # Print level information only if AMR is enabled
 function print_amr_information(callbacks, mesh, solver, cache)
 
-  # Return early if there is nothing to print
-  uses_amr(callbacks) || return nothing
-
-  levels = Vector{Int}(undef, nelements(solver, cache))
-  min_level = typemax(Int)
-  max_level = typemin(Int)
-  for element in eachelement(solver, cache)
-    current_level = mesh.tree.levels[cache.elements.cell_ids[element]]
-    levels[element] = current_level
-    min_level = min(min_level, current_level)
-    max_level = max(max_level, current_level)
-  end
-
-  for level = max_level:-1:min_level+1
-    mpi_println(" ├── level $level:    " * @sprintf("% 14d", count(==(level), levels)))
-  end
-  mpi_println(" └── level $min_level:    " * @sprintf("% 14d", count(==(min_level), levels)))
-
-  return nothing
+    # Return early if there is nothing to print
+    uses_amr(callbacks) || return nothing
+
+    levels = Vector{Int}(undef, nelements(solver, cache))
+    min_level = typemax(Int)
+    max_level = typemin(Int)
+    for element in eachelement(solver, cache)
+        current_level = mesh.tree.levels[cache.elements.cell_ids[element]]
+        levels[element] = current_level
+        min_level = min(min_level, current_level)
+        max_level = max(max_level, current_level)
+    end
+
+    for level in max_level:-1:(min_level + 1)
+        mpi_println(" ├── level $level:    " *
+                    @sprintf("% 14d", count(==(level), levels)))
+    end
+    mpi_println(" └── level $min_level:    " *
+                @sprintf("% 14d", count(==(min_level), levels)))
+
+    return nothing
 end
 
 # Print level information only if AMR is enabled
 function print_amr_information(callbacks, mesh::P4estMesh, solver, cache)
 
-  # Return early if there is nothing to print
-  uses_amr(callbacks) || return nothing
+    # Return early if there is nothing to print
+    uses_amr(callbacks) || return nothing
 
-  elements_per_level = zeros(P4EST_MAXLEVEL + 1)
+    elements_per_level = zeros(P4EST_MAXLEVEL + 1)
 
-  for tree in unsafe_wrap_sc(p4est_tree_t, unsafe_load(mesh.p4est).trees)
-    elements_per_level .+= tree.quadrants_per_level
-  end
+    for tree in unsafe_wrap_sc(p4est_tree_t, unsafe_load(mesh.p4est).trees)
+        elements_per_level .+= tree.quadrants_per_level
+    end
 
-  # levels start at zero but Julia's standard indexing starts at 1
-  min_level_1 = findfirst(i -> i > 0, elements_per_level)
-  max_level_1 = findlast(i -> i > 0, elements_per_level)
+    # levels start at zero but Julia's standard indexing starts at 1
+    min_level_1 = findfirst(i -> i > 0, elements_per_level)
+    max_level_1 = findlast(i -> i > 0, elements_per_level)
 
-  # Check if there is at least one level with an element
-  if isnothing(min_level_1) || isnothing(max_level_1)
-    return nothing
-  end
+    # Check if there is at least one level with an element
+    if isnothing(min_level_1) || isnothing(max_level_1)
+        return nothing
+    end
 
-  min_level = min_level_1 - 1
-  max_level = max_level_1 - 1
+    min_level = min_level_1 - 1
+    max_level = max_level_1 - 1
 
-  for level = max_level:-1:min_level+1
-    mpi_println(" ├── level $level:    " * @sprintf("% 14d", elements_per_level[level + 1]))
-  end
-  mpi_println(" └── level $min_level:    " * @sprintf("% 14d", elements_per_level[min_level + 1]))
+    for level in max_level:-1:(min_level + 1)
+        mpi_println(" ├── level $level:    " *
+                    @sprintf("% 14d", elements_per_level[level + 1]))
+    end
+    mpi_println(" └── level $min_level:    " *
+                @sprintf("% 14d", elements_per_level[min_level + 1]))
 
-  return nothing
+    return nothing
 end
 
-
 # Iterate over tuples of analysis integrals in a type-stable way using "lispy tuple programming".
-function analyze_integrals(analysis_integrals::NTuple{N,Any}, io, du, u, t, semi) where {N}
-
-  # Extract the first analysis integral and process it; keep the remaining to be processed later
-  quantity = first(analysis_integrals)
-  remaining_quantities = Base.tail(analysis_integrals)
-
-  res = analyze(quantity, du, u, t, semi)
-  if mpi_isroot()
-    @printf(" %-12s:", pretty_form_utf(quantity))
-    @printf("  % 10.8e", res)
-    @printf(io, "  % 10.8e", res)
-  end
-  mpi_println()
-
-  # Recursively call this method with the unprocessed integrals
-  analyze_integrals(remaining_quantities, io, du, u, t, semi)
-  return nothing
+function analyze_integrals(analysis_integrals::NTuple{N, Any}, io, du, u, t,
+                           semi) where {N}
+
+    # Extract the first analysis integral and process it; keep the remaining to be processed later
+    quantity = first(analysis_integrals)
+    remaining_quantities = Base.tail(analysis_integrals)
+
+    res = analyze(quantity, du, u, t, semi)
+    if mpi_isroot()
+        @printf(" %-12s:", pretty_form_utf(quantity))
+        @printf("  % 10.8e", res)
+        @printf(io, "  % 10.8e", res)
+    end
+    mpi_println()
+
+    # Recursively call this method with the unprocessed integrals
+    analyze_integrals(remaining_quantities, io, du, u, t, semi)
+    return nothing
 end
 
 # terminate the type-stable iteration over tuples
 function analyze_integrals(analysis_integrals::Tuple{}, io, du, u, t, semi)
-  nothing
+    nothing
 end
 
-
 # used for error checks and EOC analysis
-function (cb::DiscreteCallback{Condition,Affect!})(sol) where {Condition, Affect!<:AnalysisCallback}
-  analysis_callback = cb.affect!
-  semi = sol.prob.p
-  @unpack analyzer = analysis_callback
-  cache_analysis = analysis_callback.cache
-
-  l2_error, linf_error = calc_error_norms(sol.u[end], sol.t[end], analyzer, semi, cache_analysis)
-  (; l2=l2_error, linf=linf_error)
-end
+function (cb::DiscreteCallback{Condition, Affect!})(sol) where {Condition,
+                                                                Affect! <:
+                                                                AnalysisCallback}
+    analysis_callback = cb.affect!
+    semi = sol.prob.p
+    @unpack analyzer = analysis_callback
+    cache_analysis = analysis_callback.cache
 
+    l2_error, linf_error = calc_error_norms(sol.u[end], sol.t[end], analyzer, semi,
+                                            cache_analysis)
+    (; l2 = l2_error, linf = linf_error)
+end
 
 # some common analysis_integrals
 # to support another analysis integral, you can overload
 # Trixi.analyze, Trixi.pretty_form_utf, Trixi.pretty_form_ascii
 function analyze(quantity, du, u, t, semi::AbstractSemidiscretization)
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  analyze(quantity, du, u, t, mesh, equations, solver, cache)
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    analyze(quantity, du, u, t, mesh, equations, solver, cache)
 end
 function analyze(quantity, du, u, t, mesh, equations, solver, cache)
-  integrate(quantity, u, mesh, equations, solver, cache, normalize=true)
+    integrate(quantity, u, mesh, equations, solver, cache, normalize = true)
 end
 pretty_form_utf(quantity) = get_name(quantity)
 pretty_form_ascii(quantity) = get_name(quantity)
 
-
 # Special analyze for `SemidiscretizationHyperbolicParabolic` such that
 # precomputed gradients are available. For now only implemented for the `enstrophy`
 #!!! warning "Experimental code"
 #    This code is experimental and may be changed or removed in any future release.
-function analyze(quantity::typeof(enstrophy), du, u, t, semi::SemidiscretizationHyperbolicParabolic)
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  equations_parabolic = semi.equations_parabolic
-  cache_parabolic = semi.cache_parabolic
-  analyze(quantity, du, u, t, mesh, equations, equations_parabolic, solver, cache, cache_parabolic)
+function analyze(quantity::typeof(enstrophy), du, u, t,
+                 semi::SemidiscretizationHyperbolicParabolic)
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    equations_parabolic = semi.equations_parabolic
+    cache_parabolic = semi.cache_parabolic
+    analyze(quantity, du, u, t, mesh, equations, equations_parabolic, solver, cache,
+            cache_parabolic)
 end
-function analyze(quantity, du, u, t, mesh, equations, equations_parabolic, solver, cache, cache_parabolic)
-  integrate(quantity, u, mesh, equations, equations_parabolic, solver, cache, cache_parabolic, normalize=true)
+function analyze(quantity, du, u, t, mesh, equations, equations_parabolic, solver,
+                 cache, cache_parabolic)
+    integrate(quantity, u, mesh, equations, equations_parabolic, solver, cache,
+              cache_parabolic, normalize = true)
 end
 
-
 function entropy_timederivative end
 pretty_form_utf(::typeof(entropy_timederivative)) = "∑∂S/∂U ⋅ Uₜ"
 pretty_form_ascii(::typeof(entropy_timederivative)) = "dsdu_ut"
@@ -604,11 +622,8 @@ pretty_form_ascii(::Val{:linf_divb}) = "linf_divb"
 
 pretty_form_utf(::typeof(lake_at_rest_error)) = "∑|H₀-(h+b)|"
 pretty_form_ascii(::typeof(lake_at_rest_error)) = "|H0-(h+b)|"
-
-
 end # @muladd
 
-
 # specialized implementations specific to some solvers
 include("analysis_dg1d.jl")
 include("analysis_dg2d.jl")
diff --git a/src/callbacks_step/analysis_dg1d.jl b/src/callbacks_step/analysis_dg1d.jl
index e92701dc1fb..d2613c325be 100644
--- a/src/callbacks_step/analysis_dg1d.jl
+++ b/src/callbacks_step/analysis_dg1d.jl
@@ -3,222 +3,226 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function create_cache_analysis(analyzer, mesh::TreeMesh{1},
                                equations, dg::DG, cache,
                                RealT, uEltype)
 
-  # pre-allocate buffers
-  # We use `StrideArray`s here since these buffers are used in performance-critical
-  # places and the additional information passed to the compiler makes them faster
-  # than native `Array`s.
-  u_local = StrideArray(undef, uEltype,
-                        StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)))
-  x_local = StrideArray(undef, RealT,
-                        StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)))
+    # pre-allocate buffers
+    # We use `StrideArray`s here since these buffers are used in performance-critical
+    # places and the additional information passed to the compiler makes them faster
+    # than native `Array`s.
+    u_local = StrideArray(undef, uEltype,
+                          StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)))
+    x_local = StrideArray(undef, RealT,
+                          StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)))
 
-  return (; u_local, x_local)
+    return (; u_local, x_local)
 end
 
-
 function create_cache_analysis(analyzer, mesh::StructuredMesh{1},
                                equations, dg::DG, cache,
                                RealT, uEltype)
 
-  # pre-allocate buffers
-  # We use `StrideArray`s here since these buffers are used in performance-critical
-  # places and the additional information passed to the compiler makes them faster
-  # than native `Array`s.
-  u_local = StrideArray(undef, uEltype,
-                        StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)))
-  x_local = StrideArray(undef, RealT,
-                        StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)))
-  jacobian_local = StrideArray(undef, RealT,
-                               StaticInt(nnodes(analyzer)))
-
-  return (; u_local, x_local, jacobian_local)
+    # pre-allocate buffers
+    # We use `StrideArray`s here since these buffers are used in performance-critical
+    # places and the additional information passed to the compiler makes them faster
+    # than native `Array`s.
+    u_local = StrideArray(undef, uEltype,
+                          StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)))
+    x_local = StrideArray(undef, RealT,
+                          StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)))
+    jacobian_local = StrideArray(undef, RealT,
+                                 StaticInt(nnodes(analyzer)))
+
+    return (; u_local, x_local, jacobian_local)
 end
 
-
 function calc_error_norms(func, u, t, analyzer,
                           mesh::StructuredMesh{1}, equations, initial_condition,
                           dg::DGSEM, cache, cache_analysis)
-  @unpack vandermonde, weights = analyzer
-  @unpack node_coordinates, inverse_jacobian = cache.elements
-  @unpack u_local, x_local, jacobian_local = cache_analysis
-
-  # Set up data structures
-  l2_error   = zero(func(get_node_vars(u, equations, dg, 1, 1), equations))
-  linf_error = copy(l2_error)
-  total_volume = zero(real(mesh))
-
-  # Iterate over all elements for error calculations
-  for element in eachelement(dg, cache)
-    # Interpolate solution and node locations to analysis nodes
-    multiply_dimensionwise!(u_local, vandermonde, view(u,                :, :, element))
-    multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, element))
-    multiply_scalar_dimensionwise!(jacobian_local, vandermonde, inv.(view(inverse_jacobian, :, element)))
-
-    # Calculate errors at each analysis node
-    @. jacobian_local = abs(jacobian_local)
-
-    for i in eachnode(analyzer)
-      u_exact = initial_condition(get_node_coords(x_local, equations, dg, i), t, equations)
-      diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i), equations)
-      l2_error += diff.^2 * (weights[i] * jacobian_local[i])
-      linf_error = @. max(linf_error, abs(diff))
-      total_volume += weights[i] * jacobian_local[i]
+    @unpack vandermonde, weights = analyzer
+    @unpack node_coordinates, inverse_jacobian = cache.elements
+    @unpack u_local, x_local, jacobian_local = cache_analysis
+
+    # Set up data structures
+    l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1), equations))
+    linf_error = copy(l2_error)
+    total_volume = zero(real(mesh))
+
+    # Iterate over all elements for error calculations
+    for element in eachelement(dg, cache)
+        # Interpolate solution and node locations to analysis nodes
+        multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, element))
+        multiply_dimensionwise!(x_local, vandermonde,
+                                view(node_coordinates, :, :, element))
+        multiply_scalar_dimensionwise!(jacobian_local, vandermonde,
+                                       inv.(view(inverse_jacobian, :, element)))
+
+        # Calculate errors at each analysis node
+        @. jacobian_local = abs(jacobian_local)
+
+        for i in eachnode(analyzer)
+            u_exact = initial_condition(get_node_coords(x_local, equations, dg, i), t,
+                                        equations)
+            diff = func(u_exact, equations) -
+                   func(get_node_vars(u_local, equations, dg, i), equations)
+            l2_error += diff .^ 2 * (weights[i] * jacobian_local[i])
+            linf_error = @. max(linf_error, abs(diff))
+            total_volume += weights[i] * jacobian_local[i]
+        end
     end
-  end
 
-  # For L2 error, divide by total volume
-  l2_error = @. sqrt(l2_error / total_volume)
+    # For L2 error, divide by total volume
+    l2_error = @. sqrt(l2_error / total_volume)
 
-  return l2_error, linf_error
+    return l2_error, linf_error
 end
 
-
 function calc_error_norms(func, u, t, analyzer,
                           mesh::TreeMesh{1}, equations, initial_condition,
                           dg::DGSEM, cache, cache_analysis)
-  @unpack vandermonde, weights = analyzer
-  @unpack node_coordinates = cache.elements
-  @unpack u_local, x_local = cache_analysis
-
-  # Set up data structures
-  l2_error   = zero(func(get_node_vars(u, equations, dg, 1, 1), equations))
-  linf_error = copy(l2_error)
-
-  # Iterate over all elements for error calculations
-  for element in eachelement(dg, cache)
-    # Interpolate solution and node locations to analysis nodes
-    multiply_dimensionwise!(u_local, vandermonde, view(u,                :, :, element))
-    multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, element))
-
-    # Calculate errors at each analysis node
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-
-    for i in eachnode(analyzer)
-      u_exact = initial_condition(get_node_coords(x_local, equations, dg, i), t, equations)
-      diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i), equations)
-      l2_error += diff.^2 * (weights[i] * volume_jacobian_)
-      linf_error = @. max(linf_error, abs(diff))
+    @unpack vandermonde, weights = analyzer
+    @unpack node_coordinates = cache.elements
+    @unpack u_local, x_local = cache_analysis
+
+    # Set up data structures
+    l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1), equations))
+    linf_error = copy(l2_error)
+
+    # Iterate over all elements for error calculations
+    for element in eachelement(dg, cache)
+        # Interpolate solution and node locations to analysis nodes
+        multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, element))
+        multiply_dimensionwise!(x_local, vandermonde,
+                                view(node_coordinates, :, :, element))
+
+        # Calculate errors at each analysis node
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+
+        for i in eachnode(analyzer)
+            u_exact = initial_condition(get_node_coords(x_local, equations, dg, i), t,
+                                        equations)
+            diff = func(u_exact, equations) -
+                   func(get_node_vars(u_local, equations, dg, i), equations)
+            l2_error += diff .^ 2 * (weights[i] * volume_jacobian_)
+            linf_error = @. max(linf_error, abs(diff))
+        end
     end
-  end
 
-  # For L2 error, divide by total volume
-  total_volume_ = total_volume(mesh)
-  l2_error = @. sqrt(l2_error / total_volume_)
+    # For L2 error, divide by total volume
+    total_volume_ = total_volume(mesh)
+    l2_error = @. sqrt(l2_error / total_volume_)
 
-  return l2_error, linf_error
+    return l2_error, linf_error
 end
 
-
 function integrate_via_indices(func::Func, u,
                                mesh::StructuredMesh{1}, equations, dg::DGSEM, cache,
-                               args...; normalize=true) where {Func}
-  @unpack weights = dg.basis
-
-  # Initialize integral with zeros of the right shape
-  integral = zero(func(u, 1, 1, equations, dg, args...))
-  total_volume = zero(real(mesh))
-
-  # Use quadrature to numerically integrate over entire domain
-  for element in eachelement(dg, cache)
-    for i in eachnode(dg)
-      jacobian_volume = abs(inv(cache.elements.inverse_jacobian[i, element]))
-      integral += jacobian_volume * weights[i] * func(u, i, element, equations, dg, args...)
-      total_volume += jacobian_volume * weights[i]
+                               args...; normalize = true) where {Func}
+    @unpack weights = dg.basis
+
+    # Initialize integral with zeros of the right shape
+    integral = zero(func(u, 1, 1, equations, dg, args...))
+    total_volume = zero(real(mesh))
+
+    # Use quadrature to numerically integrate over entire domain
+    for element in eachelement(dg, cache)
+        for i in eachnode(dg)
+            jacobian_volume = abs(inv(cache.elements.inverse_jacobian[i, element]))
+            integral += jacobian_volume * weights[i] *
+                        func(u, i, element, equations, dg, args...)
+            total_volume += jacobian_volume * weights[i]
+        end
+    end
+    # Normalize with total volume
+    if normalize
+        integral = integral / total_volume
     end
-  end
-  # Normalize with total volume
-  if normalize
-    integral = integral / total_volume
-  end
 
-  return integral
+    return integral
 end
 
-
 function integrate_via_indices(func::Func, u,
                                mesh::TreeMesh{1}, equations, dg::DGSEM, cache,
-                               args...; normalize=true) where {Func}
-  @unpack weights = dg.basis
-
-  # Initialize integral with zeros of the right shape
-  integral = zero(func(u, 1, 1, equations, dg, args...))
-
-  # Use quadrature to numerically integrate over entire domain
-  for element in eachelement(dg, cache)
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-    for i in eachnode(dg)
-      integral += volume_jacobian_ * weights[i] * func(u, i, element, equations, dg, args...)
+                               args...; normalize = true) where {Func}
+    @unpack weights = dg.basis
+
+    # Initialize integral with zeros of the right shape
+    integral = zero(func(u, 1, 1, equations, dg, args...))
+
+    # Use quadrature to numerically integrate over entire domain
+    for element in eachelement(dg, cache)
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+        for i in eachnode(dg)
+            integral += volume_jacobian_ * weights[i] *
+                        func(u, i, element, equations, dg, args...)
+        end
     end
-  end
 
-  # Normalize with total volume
-  if normalize
-    integral = integral / total_volume(mesh)
-  end
+    # Normalize with total volume
+    if normalize
+        integral = integral / total_volume(mesh)
+    end
 
-  return integral
+    return integral
 end
 
-
 function integrate(func::Func, u,
-                   mesh::Union{TreeMesh{1},StructuredMesh{1}},
-                   equations, dg::DG, cache; normalize=true) where {Func}
-  integrate_via_indices(u, mesh, equations, dg, cache; normalize=normalize) do u, i, element, equations, dg
-    u_local = get_node_vars(u, equations, dg, i, element)
-    return func(u_local, equations)
-  end
+                   mesh::Union{TreeMesh{1}, StructuredMesh{1}},
+                   equations, dg::DG, cache; normalize = true) where {Func}
+    integrate_via_indices(u, mesh, equations, dg, cache;
+                          normalize = normalize) do u, i, element, equations, dg
+        u_local = get_node_vars(u, equations, dg, i, element)
+        return func(u_local, equations)
+    end
 end
 
-
 function analyze(::typeof(entropy_timederivative), du, u, t,
-                 mesh::Union{TreeMesh{1},StructuredMesh{1}}, equations, dg::DG, cache)
-  # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ
-  integrate_via_indices(u, mesh, equations, dg, cache, du) do u, i, element, equations, dg, du
-    u_node  = get_node_vars(u,  equations, dg, i, element)
-    du_node = get_node_vars(du, equations, dg, i, element)
-    dot(cons2entropy(u_node, equations), du_node)
-  end
+                 mesh::Union{TreeMesh{1}, StructuredMesh{1}}, equations, dg::DG, cache)
+    # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ
+    integrate_via_indices(u, mesh, equations, dg, cache,
+                          du) do u, i, element, equations, dg, du
+        u_node = get_node_vars(u, equations, dg, i, element)
+        du_node = get_node_vars(du, equations, dg, i, element)
+        dot(cons2entropy(u_node, equations), du_node)
+    end
 end
 
 function analyze(::Val{:l2_divb}, du, u, t,
                  mesh::TreeMesh{1}, equations::IdealGlmMhdEquations1D,
                  dg::DG, cache)
-  integrate_via_indices(u, mesh, equations, dg, cache, dg.basis.derivative_matrix) do u, i, element, equations, dg, derivative_matrix
-    divb = zero(eltype(u))
-    for k in eachnode(dg)
-      divb += derivative_matrix[i, k] * u[6, k, element]
-    end
-    divb *= cache.elements.inverse_jacobian[element]
-    divb^2
-  end |> sqrt
+    integrate_via_indices(u, mesh, equations, dg, cache,
+                          dg.basis.derivative_matrix) do u, i, element, equations, dg,
+                                                         derivative_matrix
+        divb = zero(eltype(u))
+        for k in eachnode(dg)
+            divb += derivative_matrix[i, k] * u[6, k, element]
+        end
+        divb *= cache.elements.inverse_jacobian[element]
+        divb^2
+    end |> sqrt
 end
 
 function analyze(::Val{:linf_divb}, du, u, t,
                  mesh::TreeMesh{1}, equations::IdealGlmMhdEquations1D,
                  dg::DG, cache)
-  @unpack derivative_matrix, weights = dg.basis
-
-  # integrate over all elements to get the divergence-free condition errors
-  linf_divb = zero(eltype(u))
-  for element in eachelement(dg, cache)
-    for i in eachnode(dg)
-      divb = zero(eltype(u))
-      for k in eachnode(dg)
-        divb += derivative_matrix[i, k] * u[6, k, element]
-      end
-      divb *= cache.elements.inverse_jacobian[element]
-      linf_divb = max(linf_divb, abs(divb))
+    @unpack derivative_matrix, weights = dg.basis
+
+    # integrate over all elements to get the divergence-free condition errors
+    linf_divb = zero(eltype(u))
+    for element in eachelement(dg, cache)
+        for i in eachnode(dg)
+            divb = zero(eltype(u))
+            for k in eachnode(dg)
+                divb += derivative_matrix[i, k] * u[6, k, element]
+            end
+            divb *= cache.elements.inverse_jacobian[element]
+            linf_divb = max(linf_divb, abs(divb))
+        end
     end
-  end
 
-  return linf_divb
+    return linf_divb
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/analysis_dg2d.jl b/src/callbacks_step/analysis_dg2d.jl
index 453474675f1..6c74e172e46 100644
--- a/src/callbacks_step/analysis_dg2d.jl
+++ b/src/callbacks_step/analysis_dg2d.jl
@@ -3,329 +3,356 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function create_cache_analysis(analyzer, mesh::TreeMesh{2},
                                equations, dg::DG, cache,
                                RealT, uEltype)
 
-  # pre-allocate buffers
-  # We use `StrideArray`s here since these buffers are used in performance-critical
-  # places and the additional information passed to the compiler makes them faster
-  # than native `Array`s.
-  u_local = StrideArray(undef, uEltype,
-                        StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
-  u_tmp1  = StrideArray(undef, uEltype,
-                        StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
-  x_local = StrideArray(undef, RealT,
-                        StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
-  x_tmp1  = StrideArray(undef, RealT,
-                        StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
-
-  return (; u_local, u_tmp1, x_local, x_tmp1)
+    # pre-allocate buffers
+    # We use `StrideArray`s here since these buffers are used in performance-critical
+    # places and the additional information passed to the compiler makes them faster
+    # than native `Array`s.
+    u_local = StrideArray(undef, uEltype,
+                          StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)),
+                          StaticInt(nnodes(analyzer)))
+    u_tmp1 = StrideArray(undef, uEltype,
+                         StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)),
+                         StaticInt(nnodes(dg)))
+    x_local = StrideArray(undef, RealT,
+                          StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)),
+                          StaticInt(nnodes(analyzer)))
+    x_tmp1 = StrideArray(undef, RealT,
+                         StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)),
+                         StaticInt(nnodes(dg)))
+
+    return (; u_local, u_tmp1, x_local, x_tmp1)
 end
 
-
-function create_cache_analysis(analyzer, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+function create_cache_analysis(analyzer,
+                               mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
+                                           P4estMesh{2}},
                                equations, dg::DG, cache,
                                RealT, uEltype)
 
-  # pre-allocate buffers
-  # We use `StrideArray`s here since these buffers are used in performance-critical
-  # places and the additional information passed to the compiler makes them faster
-  # than native `Array`s.
-  u_local = StrideArray(undef, uEltype,
-                        StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
-  u_tmp1  = StrideArray(undef, uEltype,
-                        StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
-  x_local = StrideArray(undef, RealT,
-                        StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
-  x_tmp1  = StrideArray(undef, RealT,
-                        StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
-  jacobian_local = StrideArray(undef, RealT,
-                               StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
-  jacobian_tmp1  = StrideArray(undef, RealT,
-                               StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
-
-  return (; u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1)
+    # pre-allocate buffers
+    # We use `StrideArray`s here since these buffers are used in performance-critical
+    # places and the additional information passed to the compiler makes them faster
+    # than native `Array`s.
+    u_local = StrideArray(undef, uEltype,
+                          StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)),
+                          StaticInt(nnodes(analyzer)))
+    u_tmp1 = StrideArray(undef, uEltype,
+                         StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)),
+                         StaticInt(nnodes(dg)))
+    x_local = StrideArray(undef, RealT,
+                          StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)),
+                          StaticInt(nnodes(analyzer)))
+    x_tmp1 = StrideArray(undef, RealT,
+                         StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)),
+                         StaticInt(nnodes(dg)))
+    jacobian_local = StrideArray(undef, RealT,
+                                 StaticInt(nnodes(analyzer)),
+                                 StaticInt(nnodes(analyzer)))
+    jacobian_tmp1 = StrideArray(undef, RealT,
+                                StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
+
+    return (; u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1)
 end
 
-
 function calc_error_norms(func, u, t, analyzer,
                           mesh::TreeMesh{2}, equations, initial_condition,
                           dg::DGSEM, cache, cache_analysis)
-  @unpack vandermonde, weights = analyzer
-  @unpack node_coordinates = cache.elements
-  @unpack u_local, u_tmp1, x_local, x_tmp1 = cache_analysis
-
-  # Set up data structures
-  l2_error   = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))
-  linf_error = copy(l2_error)
-
-  # Iterate over all elements for error calculations
-  # Accumulate L2 error on the element first so that the order of summation is the
-  # same as in the parallel case to ensure exact equality. This facilitates easier parallel
-  # development and debugging (see
-  # https://github.com/trixi-framework/Trixi.jl/pull/850#pullrequestreview-757463943 for details).
-  for element in eachelement(dg, cache)
-    # Set up data structures for local element L2 error
-    l2_error_local = zero(l2_error)
-
-    # Interpolate solution and node locations to analysis nodes
-    multiply_dimensionwise!(u_local, vandermonde, view(u,                :, :, :, element), u_tmp1)
-    multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, element), x_tmp1)
-
-    # Calculate errors at each analysis node
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-
-    for j in eachnode(analyzer), i in eachnode(analyzer)
-      u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j), t, equations)
-      diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j), equations)
-      l2_error_local += diff.^2 * (weights[i] * weights[j] * volume_jacobian_)
-      linf_error = @. max(linf_error, abs(diff))
+    @unpack vandermonde, weights = analyzer
+    @unpack node_coordinates = cache.elements
+    @unpack u_local, u_tmp1, x_local, x_tmp1 = cache_analysis
+
+    # Set up data structures
+    l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))
+    linf_error = copy(l2_error)
+
+    # Iterate over all elements for error calculations
+    # Accumulate L2 error on the element first so that the order of summation is the
+    # same as in the parallel case to ensure exact equality. This facilitates easier parallel
+    # development and debugging (see
+    # https://github.com/trixi-framework/Trixi.jl/pull/850#pullrequestreview-757463943 for details).
+    for element in eachelement(dg, cache)
+        # Set up data structures for local element L2 error
+        l2_error_local = zero(l2_error)
+
+        # Interpolate solution and node locations to analysis nodes
+        multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1)
+        multiply_dimensionwise!(x_local, vandermonde,
+                                view(node_coordinates, :, :, :, element), x_tmp1)
+
+        # Calculate errors at each analysis node
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+
+        for j in eachnode(analyzer), i in eachnode(analyzer)
+            u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j),
+                                        t, equations)
+            diff = func(u_exact, equations) -
+                   func(get_node_vars(u_local, equations, dg, i, j), equations)
+            l2_error_local += diff .^ 2 * (weights[i] * weights[j] * volume_jacobian_)
+            linf_error = @. max(linf_error, abs(diff))
+        end
+        l2_error += l2_error_local
     end
-    l2_error += l2_error_local
-  end
 
-  # For L2 error, divide by total volume
-  total_volume_ = total_volume(mesh)
-  l2_error = @. sqrt(l2_error / total_volume_)
+    # For L2 error, divide by total volume
+    total_volume_ = total_volume(mesh)
+    l2_error = @. sqrt(l2_error / total_volume_)
 
-  return l2_error, linf_error
+    return l2_error, linf_error
 end
 
-
 function calc_error_norms(func, u, t, analyzer,
-                          mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, equations,
+                          mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
+                                      P4estMesh{2}}, equations,
                           initial_condition, dg::DGSEM, cache, cache_analysis)
-  @unpack vandermonde, weights = analyzer
-  @unpack node_coordinates, inverse_jacobian = cache.elements
-  @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis
-
-  # Set up data structures
-  l2_error   = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))
-  linf_error = copy(l2_error)
-  total_volume = zero(real(mesh))
-
-  # Iterate over all elements for error calculations
-  for element in eachelement(dg, cache)
-    # Interpolate solution and node locations to analysis nodes
-    multiply_dimensionwise!(u_local, vandermonde, view(u,                :, :, :, element), u_tmp1)
-    multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, element), x_tmp1)
-    multiply_scalar_dimensionwise!(jacobian_local, vandermonde, inv.(view(inverse_jacobian, :, :, element)), jacobian_tmp1)
-
-    # Calculate errors at each analysis node
-    @. jacobian_local = abs(jacobian_local)
-
-    for j in eachnode(analyzer), i in eachnode(analyzer)
-      u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j), t, equations)
-      diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j), equations)
-      l2_error += diff.^2 * (weights[i] * weights[j] * jacobian_local[i, j])
-      linf_error = @. max(linf_error, abs(diff))
-      total_volume += weights[i] * weights[j] * jacobian_local[i, j]
+    @unpack vandermonde, weights = analyzer
+    @unpack node_coordinates, inverse_jacobian = cache.elements
+    @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis
+
+    # Set up data structures
+    l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))
+    linf_error = copy(l2_error)
+    total_volume = zero(real(mesh))
+
+    # Iterate over all elements for error calculations
+    for element in eachelement(dg, cache)
+        # Interpolate solution and node locations to analysis nodes
+        multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1)
+        multiply_dimensionwise!(x_local, vandermonde,
+                                view(node_coordinates, :, :, :, element), x_tmp1)
+        multiply_scalar_dimensionwise!(jacobian_local, vandermonde,
+                                       inv.(view(inverse_jacobian, :, :, element)),
+                                       jacobian_tmp1)
+
+        # Calculate errors at each analysis node
+        @. jacobian_local = abs(jacobian_local)
+
+        for j in eachnode(analyzer), i in eachnode(analyzer)
+            u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j),
+                                        t, equations)
+            diff = func(u_exact, equations) -
+                   func(get_node_vars(u_local, equations, dg, i, j), equations)
+            l2_error += diff .^ 2 * (weights[i] * weights[j] * jacobian_local[i, j])
+            linf_error = @. max(linf_error, abs(diff))
+            total_volume += weights[i] * weights[j] * jacobian_local[i, j]
+        end
     end
-  end
 
-  # For L2 error, divide by total volume
-  l2_error = @. sqrt(l2_error / total_volume)
+    # For L2 error, divide by total volume
+    l2_error = @. sqrt(l2_error / total_volume)
 
-  return l2_error, linf_error
+    return l2_error, linf_error
 end
 
-
 function integrate_via_indices(func::Func, u,
                                mesh::TreeMesh{2}, equations, dg::DGSEM, cache,
-                               args...; normalize=true) where {Func}
-  @unpack weights = dg.basis
-
-  # Initialize integral with zeros of the right shape
-  integral = zero(func(u, 1, 1, 1, equations, dg, args...))
-
-  # Use quadrature to numerically integrate over entire domain
-  for element in eachelement(dg, cache)
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      integral += volume_jacobian_ * weights[i] * weights[j] * func(u, i, j, element, equations, dg, args...)
+                               args...; normalize = true) where {Func}
+    @unpack weights = dg.basis
+
+    # Initialize integral with zeros of the right shape
+    integral = zero(func(u, 1, 1, 1, equations, dg, args...))
+
+    # Use quadrature to numerically integrate over entire domain
+    for element in eachelement(dg, cache)
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            integral += volume_jacobian_ * weights[i] * weights[j] *
+                        func(u, i, j, element, equations, dg, args...)
+        end
     end
-  end
 
-  # Normalize with total volume
-  if normalize
-    integral = integral / total_volume(mesh)
-  end
+    # Normalize with total volume
+    if normalize
+        integral = integral / total_volume(mesh)
+    end
 
-  return integral
+    return integral
 end
 
-
 function integrate_via_indices(func::Func, u,
-                               mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, equations,
-                               dg::DGSEM, cache, args...; normalize=true) where {Func}
-  @unpack weights = dg.basis
-
-  # Initialize integral with zeros of the right shape
-  integral = zero(func(u, 1, 1, 1, equations, dg, args...))
-  total_volume = zero(real(mesh))
-
-  # Use quadrature to numerically integrate over entire domain
-  for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element]))
-      integral += volume_jacobian * weights[i] * weights[j] * func(u, i, j, element, equations, dg, args...)
-      total_volume += volume_jacobian * weights[i] * weights[j]
+                               mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
+                                           P4estMesh{2}}, equations,
+                               dg::DGSEM, cache, args...; normalize = true) where {Func}
+    @unpack weights = dg.basis
+
+    # Initialize integral with zeros of the right shape
+    integral = zero(func(u, 1, 1, 1, equations, dg, args...))
+    total_volume = zero(real(mesh))
+
+    # Use quadrature to numerically integrate over entire domain
+    for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element]))
+            integral += volume_jacobian * weights[i] * weights[j] *
+                        func(u, i, j, element, equations, dg, args...)
+            total_volume += volume_jacobian * weights[i] * weights[j]
+        end
     end
-  end
 
-  # Normalize with total volume
-  if normalize
-    integral = integral / total_volume
-  end
+    # Normalize with total volume
+    if normalize
+        integral = integral / total_volume
+    end
 
-  return integral
+    return integral
 end
 
-
 function integrate(func::Func, u,
-                   mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
-                   equations, dg::DG, cache; normalize=true) where {Func}
-  integrate_via_indices(u, mesh, equations, dg, cache; normalize=normalize) do u, i, j, element, equations, dg
-    u_local = get_node_vars(u, equations, dg, i, j, element)
-    return func(u_local, equations)
-  end
+                   mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
+                               P4estMesh{2}},
+                   equations, dg::DG, cache; normalize = true) where {Func}
+    integrate_via_indices(u, mesh, equations, dg, cache;
+                          normalize = normalize) do u, i, j, element, equations, dg
+        u_local = get_node_vars(u, equations, dg, i, j, element)
+        return func(u_local, equations)
+    end
 end
 
-
 function analyze(::typeof(entropy_timederivative), du, u, t,
-                 mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                 mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
+                             P4estMesh{2}},
                  equations, dg::DG, cache)
-  # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ
-  integrate_via_indices(u, mesh, equations, dg, cache, du) do u, i, j, element, equations, dg, du
-    u_node  = get_node_vars(u,  equations, dg, i, j, element)
-    du_node = get_node_vars(du, equations, dg, i, j, element)
-    dot(cons2entropy(u_node, equations), du_node)
-  end
+    # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ
+    integrate_via_indices(u, mesh, equations, dg, cache,
+                          du) do u, i, j, element, equations, dg, du
+        u_node = get_node_vars(u, equations, dg, i, j, element)
+        du_node = get_node_vars(du, equations, dg, i, j, element)
+        dot(cons2entropy(u_node, equations), du_node)
+    end
 end
 
-
-
 function analyze(::Val{:l2_divb}, du, u, t,
                  mesh::TreeMesh{2},
                  equations::IdealGlmMhdEquations2D, dg::DGSEM, cache)
-  integrate_via_indices(u, mesh, equations, dg, cache, cache, dg.basis.derivative_matrix) do u, i, j, element, equations, dg, cache, derivative_matrix
-    divb = zero(eltype(u))
-    for k in eachnode(dg)
-      divb += ( derivative_matrix[i, k] * u[6, k, j, element] +
-                derivative_matrix[j, k] * u[7, i, k, element] )
-    end
-    divb *= cache.elements.inverse_jacobian[element]
-    divb^2
-  end |> sqrt
+    integrate_via_indices(u, mesh, equations, dg, cache, cache,
+                          dg.basis.derivative_matrix) do u, i, j, element, equations,
+                                                         dg, cache, derivative_matrix
+        divb = zero(eltype(u))
+        for k in eachnode(dg)
+            divb += (derivative_matrix[i, k] * u[6, k, j, element] +
+                     derivative_matrix[j, k] * u[7, i, k, element])
+        end
+        divb *= cache.elements.inverse_jacobian[element]
+        divb^2
+    end |> sqrt
 end
 
 function analyze(::Val{:l2_divb}, du, u, t,
                  mesh::TreeMesh{2}, equations::IdealGlmMhdMulticomponentEquations2D,
                  dg::DG, cache)
-  integrate_via_indices(u, mesh, equations, dg, cache, cache, dg.basis.derivative_matrix) do u, i, j, element, equations, dg, cache, derivative_matrix
-    divb = zero(eltype(u))
-    for k in eachnode(dg)
-      divb += ( derivative_matrix[i, k] * u[5, k, j, element] +
-                derivative_matrix[j, k] * u[6, i, k, element] )
-    end
-    divb *= cache.elements.inverse_jacobian[element]
-    divb^2
-  end |> sqrt
+    integrate_via_indices(u, mesh, equations, dg, cache, cache,
+                          dg.basis.derivative_matrix) do u, i, j, element, equations,
+                                                         dg, cache, derivative_matrix
+        divb = zero(eltype(u))
+        for k in eachnode(dg)
+            divb += (derivative_matrix[i, k] * u[5, k, j, element] +
+                     derivative_matrix[j, k] * u[6, i, k, element])
+        end
+        divb *= cache.elements.inverse_jacobian[element]
+        divb^2
+    end |> sqrt
 end
 
 function analyze(::Val{:l2_divb}, du, u, t,
-                 mesh::Union{StructuredMesh{2},UnstructuredMesh2D,P4estMesh{2}},
+                 mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
                  equations::IdealGlmMhdEquations2D, dg::DGSEM, cache)
-  @unpack contravariant_vectors = cache.elements
-  integrate_via_indices(u, mesh, equations, dg, cache, cache, dg.basis.derivative_matrix) do u, i, j, element, equations, dg, cache, derivative_matrix
-    divb = zero(eltype(u))
-    # Get the contravariant vectors Ja^1 and Ja^2
-    Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element)
-    Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element)
-    # Compute the transformed divergence
-    for k in eachnode(dg)
-      divb += ( derivative_matrix[i, k] * (Ja11 * u[6, k, j, element] + Ja12 * u[7, k, j, element]) +
-                derivative_matrix[j, k] * (Ja21 * u[6, i, k, element] + Ja22 * u[7, i, k, element]) )
-    end
-    divb *= cache.elements.inverse_jacobian[i, j, element]
-    divb^2
-  end |> sqrt
+    @unpack contravariant_vectors = cache.elements
+    integrate_via_indices(u, mesh, equations, dg, cache, cache,
+                          dg.basis.derivative_matrix) do u, i, j, element, equations,
+                                                         dg, cache, derivative_matrix
+        divb = zero(eltype(u))
+        # Get the contravariant vectors Ja^1 and Ja^2
+        Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element)
+        Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+        # Compute the transformed divergence
+        for k in eachnode(dg)
+            divb += (derivative_matrix[i, k] *
+                     (Ja11 * u[6, k, j, element] + Ja12 * u[7, k, j, element]) +
+                     derivative_matrix[j, k] *
+                     (Ja21 * u[6, i, k, element] + Ja22 * u[7, i, k, element]))
+        end
+        divb *= cache.elements.inverse_jacobian[i, j, element]
+        divb^2
+    end |> sqrt
 end
 
-
 function analyze(::Val{:linf_divb}, du, u, t,
                  mesh::TreeMesh{2},
                  equations::IdealGlmMhdEquations2D, dg::DGSEM, cache)
-  @unpack derivative_matrix, weights = dg.basis
-
-  # integrate over all elements to get the divergence-free condition errors
-  linf_divb = zero(eltype(u))
-  for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      divb = zero(eltype(u))
-      for k in eachnode(dg)
-        divb += ( derivative_matrix[i, k] * u[6, k, j, element] +
-                  derivative_matrix[j, k] * u[7, i, k, element] )
-      end
-      divb *= cache.elements.inverse_jacobian[element]
-      linf_divb = max(linf_divb, abs(divb))
+    @unpack derivative_matrix, weights = dg.basis
+
+    # integrate over all elements to get the divergence-free condition errors
+    linf_divb = zero(eltype(u))
+    for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            divb = zero(eltype(u))
+            for k in eachnode(dg)
+                divb += (derivative_matrix[i, k] * u[6, k, j, element] +
+                         derivative_matrix[j, k] * u[7, i, k, element])
+            end
+            divb *= cache.elements.inverse_jacobian[element]
+            linf_divb = max(linf_divb, abs(divb))
+        end
     end
-  end
 
-  return linf_divb
+    return linf_divb
 end
 
 function analyze(::Val{:linf_divb}, du, u, t,
                  mesh::TreeMesh{2}, equations::IdealGlmMhdMulticomponentEquations2D,
                  dg::DG, cache)
-  @unpack derivative_matrix, weights = dg.basis
-
-  # integrate over all elements to get the divergence-free condition errors
-  linf_divb = zero(eltype(u))
-  for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      divb = zero(eltype(u))
-      for k in eachnode(dg)
-        divb += ( derivative_matrix[i, k] * u[5, k, j, element] +
-                  derivative_matrix[j, k] * u[6, i, k, element] )
-      end
-      divb *= cache.elements.inverse_jacobian[element]
-      linf_divb = max(linf_divb, abs(divb))
+    @unpack derivative_matrix, weights = dg.basis
+
+    # integrate over all elements to get the divergence-free condition errors
+    linf_divb = zero(eltype(u))
+    for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            divb = zero(eltype(u))
+            for k in eachnode(dg)
+                divb += (derivative_matrix[i, k] * u[5, k, j, element] +
+                         derivative_matrix[j, k] * u[6, i, k, element])
+            end
+            divb *= cache.elements.inverse_jacobian[element]
+            linf_divb = max(linf_divb, abs(divb))
+        end
     end
-  end
 
-  return linf_divb
+    return linf_divb
 end
 
 function analyze(::Val{:linf_divb}, du, u, t,
-                 mesh::Union{StructuredMesh{2},UnstructuredMesh2D,P4estMesh{2}},
+                 mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
                  equations::IdealGlmMhdEquations2D, dg::DGSEM, cache)
-  @unpack derivative_matrix, weights = dg.basis
-  @unpack contravariant_vectors = cache.elements
-
-  # integrate over all elements to get the divergence-free condition errors
-  linf_divb = zero(eltype(u))
-  for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      divb = zero(eltype(u))
-      # Get the contravariant vectors Ja^1 and Ja^2
-      Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element)
-      Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element)
-      # Compute the transformed divergence
-      for k in eachnode(dg)
-        divb += ( derivative_matrix[i, k] * (Ja11 * u[6, k, j, element] + Ja12 * u[7, k, j, element]) +
-                  derivative_matrix[j, k] * (Ja21 * u[6, i, k, element] + Ja22 * u[7, i, k, element]) )
-      end
-      divb *= cache.elements.inverse_jacobian[i, j, element]
-      linf_divb = max(linf_divb, abs(divb))
+    @unpack derivative_matrix, weights = dg.basis
+    @unpack contravariant_vectors = cache.elements
+
+    # integrate over all elements to get the divergence-free condition errors
+    linf_divb = zero(eltype(u))
+    for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            divb = zero(eltype(u))
+            # Get the contravariant vectors Ja^1 and Ja^2
+            Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j,
+                                                  element)
+            Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j,
+                                                  element)
+            # Compute the transformed divergence
+            for k in eachnode(dg)
+                divb += (derivative_matrix[i, k] *
+                         (Ja11 * u[6, k, j, element] + Ja12 * u[7, k, j, element]) +
+                         derivative_matrix[j, k] *
+                         (Ja21 * u[6, i, k, element] + Ja22 * u[7, i, k, element]))
+            end
+            divb *= cache.elements.inverse_jacobian[i, j, element]
+            linf_divb = max(linf_divb, abs(divb))
+        end
     end
-  end
 
-  return linf_divb
+    return linf_divb
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
index 2d382604030..a04bf732604 100644
--- a/src/callbacks_step/analysis_dg2d_parallel.jl
+++ b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -3,200 +3,209 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function calc_error_norms(func, u, t, analyzer,
                           mesh::ParallelTreeMesh{2}, equations, initial_condition,
                           dg::DGSEM, cache, cache_analysis)
-  l2_errors, linf_errors = calc_error_norms_per_element(func, u, t, analyzer,
-                                                        mesh, equations, initial_condition,
-                                                        dg, cache, cache_analysis)
-
-  # Collect local error norms for each element on root process. That way, when aggregating the L2
-  # errors, the order of summation is the same as in the serial case to ensure exact equality.
-  # This facilitates easier parallel development and debugging (see
-  # https://github.com/trixi-framework/Trixi.jl/pull/850#pullrequestreview-757463943 for details).
-  # Note that this approach does not scale.
-  if mpi_isroot()
-    global_l2_errors = zeros(eltype(l2_errors), cache.mpi_cache.n_elements_global)
-    global_linf_errors = similar(global_l2_errors)
-
-    n_elements_by_rank = parent(cache.mpi_cache.n_elements_by_rank) # convert OffsetArray to Array
-    l2_buf = MPI.VBuffer(global_l2_errors, n_elements_by_rank)
-    linf_buf = MPI.VBuffer(global_linf_errors, n_elements_by_rank)
-    MPI.Gatherv!(l2_errors, l2_buf, mpi_root(), mpi_comm())
-    MPI.Gatherv!(linf_errors, linf_buf, mpi_root(), mpi_comm())
-  else
-    MPI.Gatherv!(l2_errors, nothing, mpi_root(), mpi_comm())
-    MPI.Gatherv!(linf_errors, nothing, mpi_root(), mpi_comm())
-  end
-
-  # Aggregate element error norms on root process
-  if mpi_isroot()
-    # sum(global_l2_errors) does not produce the same result as in the serial case, thus a
-    # hand-written loop is used
-    l2_error = zero(eltype(global_l2_errors))
-    for error in global_l2_errors
-      l2_error += error
+    l2_errors, linf_errors = calc_error_norms_per_element(func, u, t, analyzer,
+                                                          mesh, equations,
+                                                          initial_condition,
+                                                          dg, cache, cache_analysis)
+
+    # Collect local error norms for each element on root process. That way, when aggregating the L2
+    # errors, the order of summation is the same as in the serial case to ensure exact equality.
+    # This facilitates easier parallel development and debugging (see
+    # https://github.com/trixi-framework/Trixi.jl/pull/850#pullrequestreview-757463943 for details).
+    # Note that this approach does not scale.
+    if mpi_isroot()
+        global_l2_errors = zeros(eltype(l2_errors), cache.mpi_cache.n_elements_global)
+        global_linf_errors = similar(global_l2_errors)
+
+        n_elements_by_rank = parent(cache.mpi_cache.n_elements_by_rank) # convert OffsetArray to Array
+        l2_buf = MPI.VBuffer(global_l2_errors, n_elements_by_rank)
+        linf_buf = MPI.VBuffer(global_linf_errors, n_elements_by_rank)
+        MPI.Gatherv!(l2_errors, l2_buf, mpi_root(), mpi_comm())
+        MPI.Gatherv!(linf_errors, linf_buf, mpi_root(), mpi_comm())
+    else
+        MPI.Gatherv!(l2_errors, nothing, mpi_root(), mpi_comm())
+        MPI.Gatherv!(linf_errors, nothing, mpi_root(), mpi_comm())
     end
-    linf_error = reduce((x, y) -> max.(x, y), global_linf_errors)
 
-    # For L2 error, divide by total volume
-    total_volume_ = total_volume(mesh)
-    l2_error = @. sqrt(l2_error / total_volume_)
-  else
-    l2_error = convert(eltype(l2_errors), NaN * zero(eltype(l2_errors)))
-    linf_error = convert(eltype(linf_errors), NaN * zero(eltype(linf_errors)))
-  end
+    # Aggregate element error norms on root process
+    if mpi_isroot()
+        # sum(global_l2_errors) does not produce the same result as in the serial case, thus a
+        # hand-written loop is used
+        l2_error = zero(eltype(global_l2_errors))
+        for error in global_l2_errors
+            l2_error += error
+        end
+        linf_error = reduce((x, y) -> max.(x, y), global_linf_errors)
+
+        # For L2 error, divide by total volume
+        total_volume_ = total_volume(mesh)
+        l2_error = @. sqrt(l2_error / total_volume_)
+    else
+        l2_error = convert(eltype(l2_errors), NaN * zero(eltype(l2_errors)))
+        linf_error = convert(eltype(linf_errors), NaN * zero(eltype(linf_errors)))
+    end
 
-  return l2_error, linf_error
+    return l2_error, linf_error
 end
 
 function calc_error_norms_per_element(func, u, t, analyzer,
-                                      mesh::ParallelTreeMesh{2}, equations, initial_condition,
+                                      mesh::ParallelTreeMesh{2}, equations,
+                                      initial_condition,
                                       dg::DGSEM, cache, cache_analysis)
-  @unpack vandermonde, weights = analyzer
-  @unpack node_coordinates = cache.elements
-  @unpack u_local, u_tmp1, x_local, x_tmp1 = cache_analysis
-
-  # Set up data structures
-  T = typeof(zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)))
-  l2_errors = zeros(T, nelements(dg, cache))
-  linf_errors = copy(l2_errors)
-
-  # Iterate over all elements for error calculations
-  for element in eachelement(dg, cache)
-    # Interpolate solution and node locations to analysis nodes
-    multiply_dimensionwise!(u_local, vandermonde, view(u,                :, :, :, element), u_tmp1)
-    multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, element), x_tmp1)
-
-    # Calculate errors at each analysis node
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-
-    for j in eachnode(analyzer), i in eachnode(analyzer)
-      u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j), t, equations)
-      diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j), equations)
-      l2_errors[element] += diff.^2 * (weights[i] * weights[j] * volume_jacobian_)
-      linf_errors[element] = @. max(linf_errors[element], abs(diff))
+    @unpack vandermonde, weights = analyzer
+    @unpack node_coordinates = cache.elements
+    @unpack u_local, u_tmp1, x_local, x_tmp1 = cache_analysis
+
+    # Set up data structures
+    T = typeof(zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations)))
+    l2_errors = zeros(T, nelements(dg, cache))
+    linf_errors = copy(l2_errors)
+
+    # Iterate over all elements for error calculations
+    for element in eachelement(dg, cache)
+        # Interpolate solution and node locations to analysis nodes
+        multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1)
+        multiply_dimensionwise!(x_local, vandermonde,
+                                view(node_coordinates, :, :, :, element), x_tmp1)
+
+        # Calculate errors at each analysis node
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+
+        for j in eachnode(analyzer), i in eachnode(analyzer)
+            u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j),
+                                        t, equations)
+            diff = func(u_exact, equations) -
+                   func(get_node_vars(u_local, equations, dg, i, j), equations)
+            l2_errors[element] += diff .^ 2 *
+                                  (weights[i] * weights[j] * volume_jacobian_)
+            linf_errors[element] = @. max(linf_errors[element], abs(diff))
+        end
     end
-  end
 
-  return l2_errors, linf_errors
+    return l2_errors, linf_errors
 end
 
-
 function calc_error_norms(func, u, t, analyzer,
                           mesh::ParallelP4estMesh{2}, equations,
                           initial_condition, dg::DGSEM, cache, cache_analysis)
-  @unpack vandermonde, weights = analyzer
-  @unpack node_coordinates, inverse_jacobian = cache.elements
-  @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis
-
-  # Set up data structures
-  l2_error   = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))
-  linf_error = copy(l2_error)
-  volume = zero(real(mesh))
-
-  # Iterate over all elements for error calculations
-  for element in eachelement(dg, cache)
-    # Interpolate solution and node locations to analysis nodes
-    multiply_dimensionwise!(u_local, vandermonde, view(u,                :, :, :, element), u_tmp1)
-    multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, element), x_tmp1)
-    multiply_scalar_dimensionwise!(jacobian_local, vandermonde, inv.(view(inverse_jacobian, :, :, element)), jacobian_tmp1)
-
-    # Calculate errors at each analysis node
-    @. jacobian_local = abs(jacobian_local)
-
-    for j in eachnode(analyzer), i in eachnode(analyzer)
-      u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j), t, equations)
-      diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j), equations)
-      l2_error += diff.^2 * (weights[i] * weights[j] * jacobian_local[i, j])
-      linf_error = @. max(linf_error, abs(diff))
-      volume += weights[i] * weights[j] * jacobian_local[i, j]
+    @unpack vandermonde, weights = analyzer
+    @unpack node_coordinates, inverse_jacobian = cache.elements
+    @unpack u_local, u_tmp1, x_local, x_tmp1, jacobian_local, jacobian_tmp1 = cache_analysis
+
+    # Set up data structures
+    l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))
+    linf_error = copy(l2_error)
+    volume = zero(real(mesh))
+
+    # Iterate over all elements for error calculations
+    for element in eachelement(dg, cache)
+        # Interpolate solution and node locations to analysis nodes
+        multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, element), u_tmp1)
+        multiply_dimensionwise!(x_local, vandermonde,
+                                view(node_coordinates, :, :, :, element), x_tmp1)
+        multiply_scalar_dimensionwise!(jacobian_local, vandermonde,
+                                       inv.(view(inverse_jacobian, :, :, element)),
+                                       jacobian_tmp1)
+
+        # Calculate errors at each analysis node
+        @. jacobian_local = abs(jacobian_local)
+
+        for j in eachnode(analyzer), i in eachnode(analyzer)
+            u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j),
+                                        t, equations)
+            diff = func(u_exact, equations) -
+                   func(get_node_vars(u_local, equations, dg, i, j), equations)
+            l2_error += diff .^ 2 * (weights[i] * weights[j] * jacobian_local[i, j])
+            linf_error = @. max(linf_error, abs(diff))
+            volume += weights[i] * weights[j] * jacobian_local[i, j]
+        end
     end
-  end
-
-  # Accumulate local results on root process
-  global_l2_error = Vector(l2_error)
-  global_linf_error = Vector(linf_error)
-  MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
-  MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm())
-  total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
-  if mpi_isroot()
-    l2_error   = convert(typeof(l2_error),   global_l2_error)
-    linf_error = convert(typeof(linf_error), global_linf_error)
-    # For L2 error, divide by total volume
-    l2_error = @. sqrt(l2_error / total_volume)
-  else
-    l2_error   = convert(typeof(l2_error),   NaN * global_l2_error)
-    linf_error = convert(typeof(linf_error), NaN * global_linf_error)
-  end
-
-  return l2_error, linf_error
-end
 
+    # Accumulate local results on root process
+    global_l2_error = Vector(l2_error)
+    global_linf_error = Vector(linf_error)
+    MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
+    MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm())
+    total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
+    if mpi_isroot()
+        l2_error = convert(typeof(l2_error), global_l2_error)
+        linf_error = convert(typeof(linf_error), global_linf_error)
+        # For L2 error, divide by total volume
+        l2_error = @. sqrt(l2_error / total_volume)
+    else
+        l2_error = convert(typeof(l2_error), NaN * global_l2_error)
+        linf_error = convert(typeof(linf_error), NaN * global_linf_error)
+    end
+
+    return l2_error, linf_error
+end
 
 function integrate_via_indices(func::Func, u,
                                mesh::ParallelTreeMesh{2}, equations, dg::DGSEM, cache,
-                               args...; normalize=true) where {Func}
-  # call the method accepting a general `mesh::TreeMesh{2}`
-  # TODO: MPI, we should improve this; maybe we should dispatch on `u`
-  #       and create some MPI array type, overloading broadcasting and mapreduce etc.
-  #       Then, this specific array type should also work well with DiffEq etc.
-  local_integral = invoke(integrate_via_indices,
-    Tuple{typeof(func), typeof(u), TreeMesh{2}, typeof(equations),
-          typeof(dg), typeof(cache), map(typeof, args)...},
-    func, u, mesh, equations, dg, cache, args..., normalize=normalize)
-
-  # OBS! Global results are only calculated on MPI root, all other domains receive `nothing`
-  global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm())
-  if mpi_isroot()
-    integral = convert(typeof(local_integral), global_integral[])
-  else
-    integral = convert(typeof(local_integral), NaN * local_integral)
-  end
-
-  return integral
-end
+                               args...; normalize = true) where {Func}
+    # call the method accepting a general `mesh::TreeMesh{2}`
+    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
+    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
+    #       Then, this specific array type should also work well with DiffEq etc.
+    local_integral = invoke(integrate_via_indices,
+                            Tuple{typeof(func), typeof(u), TreeMesh{2},
+                                  typeof(equations),
+                                  typeof(dg), typeof(cache), map(typeof, args)...},
+                            func, u, mesh, equations, dg, cache, args...,
+                            normalize = normalize)
+
+    # OBS! Global results are only calculated on MPI root, all other domains receive `nothing`
+    global_integral = MPI.Reduce!(Ref(local_integral), +, mpi_root(), mpi_comm())
+    if mpi_isroot()
+        integral = convert(typeof(local_integral), global_integral[])
+    else
+        integral = convert(typeof(local_integral), NaN * local_integral)
+    end
 
+    return integral
+end
 
 function integrate_via_indices(func::Func, u,
                                mesh::ParallelP4estMesh{2}, equations,
-                               dg::DGSEM, cache, args...; normalize=true) where {Func}
-  @unpack weights = dg.basis
-
-  # Initialize integral with zeros of the right shape
-  # Pass `zero(SVector{nvariables(equations), eltype(u))}` to `func` since `u` might be empty, if the
-  # current rank has no elements, see also https://github.com/trixi-framework/Trixi.jl/issues/1096.
-  integral = zero(func(zero(SVector{nvariables(equations), eltype(u)}), 1, 1, 1, equations, dg, args...))
-  volume = zero(real(mesh))
-
-
-  # Use quadrature to numerically integrate over entire domain
-  for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element]))
-      integral += volume_jacobian * weights[i] * weights[j] * func(u, i, j, element, equations, dg, args...)
-      volume += volume_jacobian * weights[i] * weights[j]
+                               dg::DGSEM, cache, args...; normalize = true) where {Func}
+    @unpack weights = dg.basis
+
+    # Initialize integral with zeros of the right shape
+    # Pass `zero(SVector{nvariables(equations), eltype(u))}` to `func` since `u` might be empty, if the
+    # current rank has no elements, see also https://github.com/trixi-framework/Trixi.jl/issues/1096.
+    integral = zero(func(zero(SVector{nvariables(equations), eltype(u)}), 1, 1, 1,
+                         equations, dg, args...))
+    volume = zero(real(mesh))
+
+    # Use quadrature to numerically integrate over entire domain
+    for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, element]))
+            integral += volume_jacobian * weights[i] * weights[j] *
+                        func(u, i, j, element, equations, dg, args...)
+            volume += volume_jacobian * weights[i] * weights[j]
+        end
     end
-  end
-
-  global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
-  total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
-  if mpi_isroot()
-    integral = convert(typeof(integral), global_integral[])
-  else
-    integral = convert(typeof(integral), NaN * integral)
-    total_volume = volume # non-root processes receive nothing from reduce -> overwrite
-  end
-
-  # Normalize with total volume
-  if normalize
-    integral = integral / total_volume
-  end
-
-  return integral
-end
 
+    global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
+    if mpi_isroot()
+        integral = convert(typeof(integral), global_integral[])
+    else
+        integral = convert(typeof(integral), NaN * integral)
+        total_volume = volume # non-root processes receive nothing from reduce -> overwrite
+    end
 
+    # Normalize with total volume
+    if normalize
+        integral = integral / total_volume
+    end
+
+    return integral
+end
 end # @muladd
diff --git a/src/callbacks_step/analysis_dg3d.jl b/src/callbacks_step/analysis_dg3d.jl
index 77cf1f819ea..76aba813fab 100644
--- a/src/callbacks_step/analysis_dg3d.jl
+++ b/src/callbacks_step/analysis_dg3d.jl
@@ -3,319 +3,368 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function create_cache_analysis(analyzer, mesh::TreeMesh{3},
                                equations, dg::DG, cache,
                                RealT, uEltype)
 
-  # pre-allocate buffers
-  # We use `StrideArray`s here since these buffers are used in performance-critical
-  # places and the additional information passed to the compiler makes them faster
-  # than native `Array`s.
-  u_local = StrideArray(undef, uEltype,
-                        StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
-  u_tmp1  = StrideArray(undef, uEltype,
-                        StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)))
-  u_tmp2  = StrideArray(undef, uEltype,
-                        StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
-  x_local = StrideArray(undef, RealT,
-                        StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
-  x_tmp1  = StrideArray(undef, RealT,
-                        StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)))
-  x_tmp2  = StrideArray(undef, RealT,
-                        StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
-
-  return (; u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2)
+    # pre-allocate buffers
+    # We use `StrideArray`s here since these buffers are used in performance-critical
+    # places and the additional information passed to the compiler makes them faster
+    # than native `Array`s.
+    u_local = StrideArray(undef, uEltype,
+                          StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)),
+                          StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
+    u_tmp1 = StrideArray(undef, uEltype,
+                         StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)),
+                         StaticInt(nnodes(dg)), StaticInt(nnodes(dg)))
+    u_tmp2 = StrideArray(undef, uEltype,
+                         StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)),
+                         StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
+    x_local = StrideArray(undef, RealT,
+                          StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)),
+                          StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
+    x_tmp1 = StrideArray(undef, RealT,
+                         StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)),
+                         StaticInt(nnodes(dg)), StaticInt(nnodes(dg)))
+    x_tmp2 = StrideArray(undef, RealT,
+                         StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)),
+                         StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
+
+    return (; u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2)
 end
 
-
 function create_cache_analysis(analyzer, mesh::Union{StructuredMesh{3}, P4estMesh{3}},
                                equations, dg::DG, cache,
                                RealT, uEltype)
 
-  # pre-allocate buffers
-  # We use `StrideArray`s here since these buffers are used in performance-critical
-  # places and the additional information passed to the compiler makes them faster
-  # than native `Array`s.
-  u_local = StrideArray(undef, uEltype,
-                        StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
-  u_tmp1  = StrideArray(undef, uEltype,
-                        StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)))
-  u_tmp2  = StrideArray(undef, uEltype,
-                        StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
-  x_local = StrideArray(undef, RealT,
-                        StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
-  x_tmp1  = StrideArray(undef, RealT,
-                        StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)))
-  x_tmp2  = StrideArray(undef, RealT,
-                        StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
-  jacobian_local = StrideArray(undef, RealT,
-                                StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
-  jacobian_tmp1  = StrideArray(undef, RealT,
-                               StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)))
-  jacobian_tmp2  = StrideArray(undef, RealT,
-                               StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
-
-  return (; u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2)
+    # pre-allocate buffers
+    # We use `StrideArray`s here since these buffers are used in performance-critical
+    # places and the additional information passed to the compiler makes them faster
+    # than native `Array`s.
+    u_local = StrideArray(undef, uEltype,
+                          StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)),
+                          StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
+    u_tmp1 = StrideArray(undef, uEltype,
+                         StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)),
+                         StaticInt(nnodes(dg)), StaticInt(nnodes(dg)))
+    u_tmp2 = StrideArray(undef, uEltype,
+                         StaticInt(nvariables(equations)), StaticInt(nnodes(analyzer)),
+                         StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
+    x_local = StrideArray(undef, RealT,
+                          StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)),
+                          StaticInt(nnodes(analyzer)), StaticInt(nnodes(analyzer)))
+    x_tmp1 = StrideArray(undef, RealT,
+                         StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)),
+                         StaticInt(nnodes(dg)), StaticInt(nnodes(dg)))
+    x_tmp2 = StrideArray(undef, RealT,
+                         StaticInt(ndims(equations)), StaticInt(nnodes(analyzer)),
+                         StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
+    jacobian_local = StrideArray(undef, RealT,
+                                 StaticInt(nnodes(analyzer)),
+                                 StaticInt(nnodes(analyzer)),
+                                 StaticInt(nnodes(analyzer)))
+    jacobian_tmp1 = StrideArray(undef, RealT,
+                                StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)),
+                                StaticInt(nnodes(dg)))
+    jacobian_tmp2 = StrideArray(undef, RealT,
+                                StaticInt(nnodes(analyzer)),
+                                StaticInt(nnodes(analyzer)), StaticInt(nnodes(dg)))
+
+    return (; u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local,
+            jacobian_tmp1, jacobian_tmp2)
 end
 
-
 function calc_error_norms(func, u, t, analyzer,
                           mesh::TreeMesh{3}, equations, initial_condition,
                           dg::DGSEM, cache, cache_analysis)
-  @unpack vandermonde, weights = analyzer
-  @unpack node_coordinates = cache.elements
-  @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2 = cache_analysis
-
-  # Set up data structures
-  l2_error   = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations))
-  linf_error = copy(l2_error)
-
-  # Iterate over all elements for error calculations
-  for element in eachelement(dg, cache)
-    # Interpolate solution and node locations to analysis nodes
-    multiply_dimensionwise!(u_local, vandermonde, view(u,                :, :, :, :, element), u_tmp1, u_tmp2)
-    multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, :, element), x_tmp1, x_tmp2)
-
-    # Calculate errors at each analysis node
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-
-    for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer)
-      u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j, k), t, equations)
-      diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j, k), equations)
-      l2_error += diff.^2 * (weights[i] * weights[j] * weights[k] * volume_jacobian_)
-      linf_error = @. max(linf_error, abs(diff))
+    @unpack vandermonde, weights = analyzer
+    @unpack node_coordinates = cache.elements
+    @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2 = cache_analysis
+
+    # Set up data structures
+    l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations))
+    linf_error = copy(l2_error)
+
+    # Iterate over all elements for error calculations
+    for element in eachelement(dg, cache)
+        # Interpolate solution and node locations to analysis nodes
+        multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, :, element),
+                                u_tmp1, u_tmp2)
+        multiply_dimensionwise!(x_local, vandermonde,
+                                view(node_coordinates, :, :, :, :, element), x_tmp1,
+                                x_tmp2)
+
+        # Calculate errors at each analysis node
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+
+        for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer)
+            u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j,
+                                                        k), t, equations)
+            diff = func(u_exact, equations) -
+                   func(get_node_vars(u_local, equations, dg, i, j, k), equations)
+            l2_error += diff .^ 2 *
+                        (weights[i] * weights[j] * weights[k] * volume_jacobian_)
+            linf_error = @. max(linf_error, abs(diff))
+        end
     end
-  end
 
-  # For L2 error, divide by total volume
-  total_volume_ = total_volume(mesh)
-  l2_error = @. sqrt(l2_error / total_volume_)
+    # For L2 error, divide by total volume
+    total_volume_ = total_volume(mesh)
+    l2_error = @. sqrt(l2_error / total_volume_)
 
-  return l2_error, linf_error
+    return l2_error, linf_error
 end
 
-
 function calc_error_norms(func, u, t, analyzer,
                           mesh::Union{StructuredMesh{3}, P4estMesh{3}},
                           equations, initial_condition,
                           dg::DGSEM, cache, cache_analysis)
-  @unpack vandermonde, weights = analyzer
-  @unpack node_coordinates, inverse_jacobian = cache.elements
-  @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis
-
-  # Set up data structures
-  l2_error   = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations))
-  linf_error = copy(l2_error)
-  total_volume = zero(real(mesh))
-
-  # Iterate over all elements for error calculations
-  for element in eachelement(dg, cache)
-    # Interpolate solution and node locations to analysis nodes
-    multiply_dimensionwise!(u_local, vandermonde, view(u,                :, :, :, :, element), u_tmp1, u_tmp2)
-    multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, :, element), x_tmp1, x_tmp2)
-    multiply_scalar_dimensionwise!(jacobian_local, vandermonde, inv.(view(inverse_jacobian, :, :, :, element)), jacobian_tmp1, jacobian_tmp2)
-
-    # Calculate errors at each analysis node
-    @. jacobian_local = abs(jacobian_local)
-
-    for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer)
-      u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j, k), t, equations)
-      diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j, k), equations)
-      l2_error += diff.^2 * (weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k])
-      linf_error = @. max(linf_error, abs(diff))
-      total_volume += weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k]
+    @unpack vandermonde, weights = analyzer
+    @unpack node_coordinates, inverse_jacobian = cache.elements
+    @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis
+
+    # Set up data structures
+    l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations))
+    linf_error = copy(l2_error)
+    total_volume = zero(real(mesh))
+
+    # Iterate over all elements for error calculations
+    for element in eachelement(dg, cache)
+        # Interpolate solution and node locations to analysis nodes
+        multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, :, element),
+                                u_tmp1, u_tmp2)
+        multiply_dimensionwise!(x_local, vandermonde,
+                                view(node_coordinates, :, :, :, :, element), x_tmp1,
+                                x_tmp2)
+        multiply_scalar_dimensionwise!(jacobian_local, vandermonde,
+                                       inv.(view(inverse_jacobian, :, :, :, element)),
+                                       jacobian_tmp1, jacobian_tmp2)
+
+        # Calculate errors at each analysis node
+        @. jacobian_local = abs(jacobian_local)
+
+        for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer)
+            u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j,
+                                                        k), t, equations)
+            diff = func(u_exact, equations) -
+                   func(get_node_vars(u_local, equations, dg, i, j, k), equations)
+            l2_error += diff .^ 2 *
+                        (weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k])
+            linf_error = @. max(linf_error, abs(diff))
+            total_volume += weights[i] * weights[j] * weights[k] *
+                            jacobian_local[i, j, k]
+        end
     end
-  end
 
-  # For L2 error, divide by total volume
-  l2_error = @. sqrt(l2_error / total_volume)
+    # For L2 error, divide by total volume
+    l2_error = @. sqrt(l2_error / total_volume)
 
-  return l2_error, linf_error
+    return l2_error, linf_error
 end
 
-
 function integrate_via_indices(func::Func, u,
                                mesh::TreeMesh{3}, equations, dg::DGSEM, cache,
-                               args...; normalize=true) where {Func}
-  @unpack weights = dg.basis
-
-  # Initialize integral with zeros of the right shape
-  integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...))
-
-  # Use quadrature to numerically integrate over entire domain
-  for element in eachelement(dg, cache)
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      integral += volume_jacobian_ * weights[i] * weights[j] * weights[k] * func(u, i, j, k, element, equations, dg, args...)
+                               args...; normalize = true) where {Func}
+    @unpack weights = dg.basis
+
+    # Initialize integral with zeros of the right shape
+    integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...))
+
+    # Use quadrature to numerically integrate over entire domain
+    for element in eachelement(dg, cache)
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            integral += volume_jacobian_ * weights[i] * weights[j] * weights[k] *
+                        func(u, i, j, k, element, equations, dg, args...)
+        end
     end
-  end
 
-  # Normalize with total volume
-  if normalize
-    integral = integral / total_volume(mesh)
-  end
+    # Normalize with total volume
+    if normalize
+        integral = integral / total_volume(mesh)
+    end
 
-  return integral
+    return integral
 end
 
-
 function integrate_via_indices(func::Func, u,
                                mesh::Union{StructuredMesh{3}, P4estMesh{3}},
                                equations, dg::DGSEM, cache,
-                               args...; normalize=true) where {Func}
-  @unpack weights = dg.basis
-
-  # Initialize integral with zeros of the right shape
-  integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...))
-  total_volume = zero(real(mesh))
-
-  # Use quadrature to numerically integrate over entire domain
-  for element in eachelement(dg, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, k, element]))
-      integral += volume_jacobian * weights[i] * weights[j] * weights[k] * func(u, i, j, k, element, equations, dg, args...)
-      total_volume += volume_jacobian * weights[i] * weights[j] * weights[k]
+                               args...; normalize = true) where {Func}
+    @unpack weights = dg.basis
+
+    # Initialize integral with zeros of the right shape
+    integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...))
+    total_volume = zero(real(mesh))
+
+    # Use quadrature to numerically integrate over entire domain
+    for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, k, element]))
+            integral += volume_jacobian * weights[i] * weights[j] * weights[k] *
+                        func(u, i, j, k, element, equations, dg, args...)
+            total_volume += volume_jacobian * weights[i] * weights[j] * weights[k]
+        end
     end
-  end
 
-  # Normalize with total volume
-  if normalize
-    integral = integral / total_volume
-  end
+    # Normalize with total volume
+    if normalize
+        integral = integral / total_volume
+    end
 
-  return integral
+    return integral
 end
 
-
 function integrate(func::Func, u,
                    mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}},
-                   equations, dg::DG, cache; normalize=true) where {Func}
-  integrate_via_indices(u, mesh, equations, dg, cache; normalize=normalize) do u, i, j, k, element, equations, dg
-    u_local = get_node_vars(u, equations, dg, i, j, k, element)
-    return func(u_local, equations)
-  end
+                   equations, dg::DG, cache; normalize = true) where {Func}
+    integrate_via_indices(u, mesh, equations, dg, cache;
+                          normalize = normalize) do u, i, j, k, element, equations, dg
+        u_local = get_node_vars(u, equations, dg, i, j, k, element)
+        return func(u_local, equations)
+    end
 end
 
-
 function integrate(func::Func, u,
                    mesh::TreeMesh{3},
                    equations, equations_parabolic,
                    dg::DGSEM,
-                   cache, cache_parabolic; normalize=true) where {Func}
-  gradients_x, gradients_y, gradients_z = cache_parabolic.gradients
-  integrate_via_indices(u, mesh, equations, dg, cache; normalize=normalize) do u, i, j, k, element, equations, dg
-    u_local = get_node_vars(u, equations, dg, i, j, k, element)
-    gradients_1_local = get_node_vars(gradients_x, equations_parabolic, dg, i, j, k, element)
-    gradients_2_local = get_node_vars(gradients_y, equations_parabolic, dg, i, j, k, element)
-    gradients_3_local = get_node_vars(gradients_z, equations_parabolic, dg, i, j, k, element)
-    return func(u_local, (gradients_1_local, gradients_2_local, gradients_3_local), equations_parabolic)
-  end
+                   cache, cache_parabolic; normalize = true) where {Func}
+    gradients_x, gradients_y, gradients_z = cache_parabolic.gradients
+    integrate_via_indices(u, mesh, equations, dg, cache;
+                          normalize = normalize) do u, i, j, k, element, equations, dg
+        u_local = get_node_vars(u, equations, dg, i, j, k, element)
+        gradients_1_local = get_node_vars(gradients_x, equations_parabolic, dg, i, j, k,
+                                          element)
+        gradients_2_local = get_node_vars(gradients_y, equations_parabolic, dg, i, j, k,
+                                          element)
+        gradients_3_local = get_node_vars(gradients_z, equations_parabolic, dg, i, j, k,
+                                          element)
+        return func(u_local, (gradients_1_local, gradients_2_local, gradients_3_local),
+                    equations_parabolic)
+    end
 end
 
-
 function analyze(::typeof(entropy_timederivative), du, u, t,
                  mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}},
                  equations, dg::DG, cache)
-  # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ
-  integrate_via_indices(u, mesh, equations, dg, cache, du) do u, i, j, k, element, equations, dg, du
-    u_node  = get_node_vars(u,  equations, dg, i, j, k, element)
-    du_node = get_node_vars(du, equations, dg, i, j, k, element)
-    dot(cons2entropy(u_node, equations), du_node)
-  end
+    # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ
+    integrate_via_indices(u, mesh, equations, dg, cache,
+                          du) do u, i, j, k, element, equations, dg, du
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+        du_node = get_node_vars(du, equations, dg, i, j, k, element)
+        dot(cons2entropy(u_node, equations), du_node)
+    end
 end
 
-
-
 function analyze(::Val{:l2_divb}, du, u, t,
                  mesh::TreeMesh{3}, equations::IdealGlmMhdEquations3D,
                  dg::DGSEM, cache)
-  integrate_via_indices(u, mesh, equations, dg, cache, cache, dg.basis.derivative_matrix) do u, i, j, k, element, equations, dg, cache, derivative_matrix
-    divb = zero(eltype(u))
-    for l in eachnode(dg)
-      divb += ( derivative_matrix[i, l] * u[6, l, j, k, element] +
-                derivative_matrix[j, l] * u[7, i, l, k, element] +
-                derivative_matrix[k, l] * u[8, i, j, l, element] )
-    end
-    divb *= cache.elements.inverse_jacobian[element]
-    divb^2
-  end |> sqrt
+    integrate_via_indices(u, mesh, equations, dg, cache, cache,
+                          dg.basis.derivative_matrix) do u, i, j, k, element, equations,
+                                                         dg, cache, derivative_matrix
+        divb = zero(eltype(u))
+        for l in eachnode(dg)
+            divb += (derivative_matrix[i, l] * u[6, l, j, k, element] +
+                     derivative_matrix[j, l] * u[7, i, l, k, element] +
+                     derivative_matrix[k, l] * u[8, i, j, l, element])
+        end
+        divb *= cache.elements.inverse_jacobian[element]
+        divb^2
+    end |> sqrt
 end
 
 function analyze(::Val{:l2_divb}, du, u, t,
-                 mesh::Union{StructuredMesh{3}, P4estMesh{3}}, equations::IdealGlmMhdEquations3D,
+                 mesh::Union{StructuredMesh{3}, P4estMesh{3}},
+                 equations::IdealGlmMhdEquations3D,
                  dg::DGSEM, cache)
-  @unpack contravariant_vectors = cache.elements
-  integrate_via_indices(u, mesh, equations, dg, cache, cache, dg.basis.derivative_matrix) do u, i, j, k, element, equations, dg, cache, derivative_matrix
-    divb = zero(eltype(u))
-    # Get the contravariant vectors Ja^1, Ja^2, and Ja^3
-    Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
-    Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
-    Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
-    # Compute the transformed divergence
-    for l in eachnode(dg)
-      divb += ( derivative_matrix[i, l] * (Ja11 * u[6, l, j, k, element] + Ja12 * u[7, l, j, k, element] + Ja13 * u[8, l, j, k, element]) +
-                derivative_matrix[j, l] * (Ja21 * u[6, i, l, k, element] + Ja22 * u[7, i, l, k, element] + Ja23 * u[8, i, l, k, element]) +
-                derivative_matrix[k, l] * (Ja31 * u[6, i, j, l, element] + Ja32 * u[7, i, j, l, element] + Ja33 * u[8, i, j, l, element]) )
-    end
-    divb *= cache.elements.inverse_jacobian[i, j, k, element]
-    divb^2
-  end |> sqrt
+    @unpack contravariant_vectors = cache.elements
+    integrate_via_indices(u, mesh, equations, dg, cache, cache,
+                          dg.basis.derivative_matrix) do u, i, j, k, element, equations,
+                                                         dg, cache, derivative_matrix
+        divb = zero(eltype(u))
+        # Get the contravariant vectors Ja^1, Ja^2, and Ja^3
+        Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k,
+                                                    element)
+        Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k,
+                                                    element)
+        Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k,
+                                                    element)
+        # Compute the transformed divergence
+        for l in eachnode(dg)
+            divb += (derivative_matrix[i, l] *
+                     (Ja11 * u[6, l, j, k, element] + Ja12 * u[7, l, j, k, element] +
+                      Ja13 * u[8, l, j, k, element]) +
+                     derivative_matrix[j, l] *
+                     (Ja21 * u[6, i, l, k, element] + Ja22 * u[7, i, l, k, element] +
+                      Ja23 * u[8, i, l, k, element]) +
+                     derivative_matrix[k, l] *
+                     (Ja31 * u[6, i, j, l, element] + Ja32 * u[7, i, j, l, element] +
+                      Ja33 * u[8, i, j, l, element]))
+        end
+        divb *= cache.elements.inverse_jacobian[i, j, k, element]
+        divb^2
+    end |> sqrt
 end
 
-
 function analyze(::Val{:linf_divb}, du, u, t,
                  mesh::TreeMesh{3}, equations::IdealGlmMhdEquations3D,
                  dg::DGSEM, cache)
-  @unpack derivative_matrix, weights = dg.basis
-
-  # integrate over all elements to get the divergence-free condition errors
-  linf_divb = zero(eltype(u))
-  for element in eachelement(dg, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      divb = zero(eltype(u))
-      for l in eachnode(dg)
-        divb += ( derivative_matrix[i, l] * u[6, l, j, k, element] +
-                  derivative_matrix[j, l] * u[7, i, l, k, element] +
-                  derivative_matrix[k, l] * u[8, i, j, l, element] )
-      end
-      divb *= cache.elements.inverse_jacobian[element]
-      linf_divb = max(linf_divb, abs(divb))
+    @unpack derivative_matrix, weights = dg.basis
+
+    # integrate over all elements to get the divergence-free condition errors
+    linf_divb = zero(eltype(u))
+    for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            divb = zero(eltype(u))
+            for l in eachnode(dg)
+                divb += (derivative_matrix[i, l] * u[6, l, j, k, element] +
+                         derivative_matrix[j, l] * u[7, i, l, k, element] +
+                         derivative_matrix[k, l] * u[8, i, j, l, element])
+            end
+            divb *= cache.elements.inverse_jacobian[element]
+            linf_divb = max(linf_divb, abs(divb))
+        end
     end
-  end
 
-  return linf_divb
+    return linf_divb
 end
 
 function analyze(::Val{:linf_divb}, du, u, t,
-                 mesh::Union{StructuredMesh{3}, P4estMesh{3}}, equations::IdealGlmMhdEquations3D,
+                 mesh::Union{StructuredMesh{3}, P4estMesh{3}},
+                 equations::IdealGlmMhdEquations3D,
                  dg::DGSEM, cache)
-  @unpack derivative_matrix, weights = dg.basis
-  @unpack contravariant_vectors = cache.elements
-
-  # integrate over all elements to get the divergence-free condition errors
-  linf_divb = zero(eltype(u))
-  for element in eachelement(dg, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      divb = zero(eltype(u))
-      # Get the contravariant vectors Ja^1, Ja^2, and Ja^3
-      Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
-      Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
-      Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
-      # Compute the transformed divergence
-      for l in eachnode(dg)
-        divb += ( derivative_matrix[i, l] * (Ja11 * u[6, l, j, k, element] + Ja12 * u[7, l, j, k, element] + Ja13 * u[8, l, j, k, element]) +
-                  derivative_matrix[j, l] * (Ja21 * u[6, i, l, k, element] + Ja22 * u[7, i, l, k, element] + Ja23 * u[8, i, l, k, element]) +
-                  derivative_matrix[k, l] * (Ja31 * u[6, i, j, l, element] + Ja32 * u[7, i, j, l, element] + Ja33 * u[8, i, j, l, element]) )
-      end
-      divb *= cache.elements.inverse_jacobian[i, j, k, element]
-      linf_divb = max(linf_divb, abs(divb))
+    @unpack derivative_matrix, weights = dg.basis
+    @unpack contravariant_vectors = cache.elements
+
+    # integrate over all elements to get the divergence-free condition errors
+    linf_divb = zero(eltype(u))
+    for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            divb = zero(eltype(u))
+            # Get the contravariant vectors Ja^1, Ja^2, and Ja^3
+            Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j,
+                                                        k, element)
+            Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j,
+                                                        k, element)
+            Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j,
+                                                        k, element)
+            # Compute the transformed divergence
+            for l in eachnode(dg)
+                divb += (derivative_matrix[i, l] * (Ja11 * u[6, l, j, k, element] +
+                          Ja12 * u[7, l, j, k, element] + Ja13 * u[8, l, j, k, element]) +
+                         derivative_matrix[j, l] * (Ja21 * u[6, i, l, k, element] +
+                          Ja22 * u[7, i, l, k, element] + Ja23 * u[8, i, l, k, element]) +
+                         derivative_matrix[k, l] * (Ja31 * u[6, i, j, l, element] +
+                          Ja32 * u[7, i, j, l, element] + Ja33 * u[8, i, j, l, element]))
+            end
+            divb *= cache.elements.inverse_jacobian[i, j, k, element]
+            linf_divb = max(linf_divb, abs(divb))
+        end
     end
-  end
 
-  return linf_divb
+    return linf_divb
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/analysis_dg3d_parallel.jl b/src/callbacks_step/analysis_dg3d_parallel.jl
index 058960dd63a..d8756d91c9d 100644
--- a/src/callbacks_step/analysis_dg3d_parallel.jl
+++ b/src/callbacks_step/analysis_dg3d_parallel.jl
@@ -3,95 +3,102 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function calc_error_norms(func, u, t, analyzer,
                           mesh::ParallelP4estMesh{3}, equations,
                           initial_condition, dg::DGSEM, cache, cache_analysis)
-  @unpack vandermonde, weights = analyzer
-  @unpack node_coordinates, inverse_jacobian = cache.elements
-  @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis
-
-  # Set up data structures
-  l2_error   = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations))
-  linf_error = copy(l2_error)
-  volume = zero(real(mesh))
-
-  # Iterate over all elements for error calculations
-  for element in eachelement(dg, cache)
-    # Interpolate solution and node locations to analysis nodes
-    multiply_dimensionwise!(u_local, vandermonde, view(u,                :, :, :, :, element), u_tmp1, u_tmp2)
-    multiply_dimensionwise!(x_local, vandermonde, view(node_coordinates, :, :, :, :, element), x_tmp1, x_tmp2)
-    multiply_scalar_dimensionwise!(jacobian_local, vandermonde, inv.(view(inverse_jacobian, :, :, :, element)), jacobian_tmp1, jacobian_tmp2)
-
-    # Calculate errors at each analysis node
-    @. jacobian_local = abs(jacobian_local)
-
-    for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer)
-      u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j, k), t, equations)
-      diff = func(u_exact, equations) - func(get_node_vars(u_local, equations, dg, i, j, k), equations)
-      l2_error += diff.^2 * (weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k])
-      linf_error = @. max(linf_error, abs(diff))
-      volume += weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k]
+    @unpack vandermonde, weights = analyzer
+    @unpack node_coordinates, inverse_jacobian = cache.elements
+    @unpack u_local, u_tmp1, u_tmp2, x_local, x_tmp1, x_tmp2, jacobian_local, jacobian_tmp1, jacobian_tmp2 = cache_analysis
+
+    # Set up data structures
+    l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations))
+    linf_error = copy(l2_error)
+    volume = zero(real(mesh))
+
+    # Iterate over all elements for error calculations
+    for element in eachelement(dg, cache)
+        # Interpolate solution and node locations to analysis nodes
+        multiply_dimensionwise!(u_local, vandermonde, view(u, :, :, :, :, element),
+                                u_tmp1, u_tmp2)
+        multiply_dimensionwise!(x_local, vandermonde,
+                                view(node_coordinates, :, :, :, :, element), x_tmp1,
+                                x_tmp2)
+        multiply_scalar_dimensionwise!(jacobian_local, vandermonde,
+                                       inv.(view(inverse_jacobian, :, :, :, element)),
+                                       jacobian_tmp1, jacobian_tmp2)
+
+        # Calculate errors at each analysis node
+        @. jacobian_local = abs(jacobian_local)
+
+        for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer)
+            u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j,
+                                                        k), t, equations)
+            diff = func(u_exact, equations) -
+                   func(get_node_vars(u_local, equations, dg, i, j, k), equations)
+            l2_error += diff .^ 2 *
+                        (weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k])
+            linf_error = @. max(linf_error, abs(diff))
+            volume += weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k]
+        end
     end
-  end
 
-  # Accumulate local results on root process
-  global_l2_error = Vector(l2_error)
-  global_linf_error = Vector(linf_error)
-  MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
-  MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm())
-  total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
-  if mpi_isroot()
-    l2_error   = convert(typeof(l2_error),   global_l2_error)
-    linf_error = convert(typeof(linf_error), global_linf_error)
-    # For L2 error, divide by total volume
-    l2_error = @. sqrt(l2_error / total_volume)
-  else
-    l2_error   = convert(typeof(l2_error),   NaN * global_l2_error)
-    linf_error = convert(typeof(linf_error), NaN * global_linf_error)
-  end
+    # Accumulate local results on root process
+    global_l2_error = Vector(l2_error)
+    global_linf_error = Vector(linf_error)
+    MPI.Reduce!(global_l2_error, +, mpi_root(), mpi_comm())
+    MPI.Reduce!(global_linf_error, max, mpi_root(), mpi_comm())
+    total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
+    if mpi_isroot()
+        l2_error = convert(typeof(l2_error), global_l2_error)
+        linf_error = convert(typeof(linf_error), global_linf_error)
+        # For L2 error, divide by total volume
+        l2_error = @. sqrt(l2_error / total_volume)
+    else
+        l2_error = convert(typeof(l2_error), NaN * global_l2_error)
+        linf_error = convert(typeof(linf_error), NaN * global_linf_error)
+    end
 
-  return l2_error, linf_error
+    return l2_error, linf_error
 end
 
-
 function integrate_via_indices(func::Func, u,
                                mesh::ParallelP4estMesh{3}, equations,
-                               dg::DGSEM, cache, args...; normalize=true) where {Func}
-  @unpack weights = dg.basis
-
-  # Initialize integral with zeros of the right shape
-  # Pass `zero(SVector{nvariables(equations), eltype(u))}` to `func` since `u` might be empty, if the
-  # current rank has no elements, see also https://github.com/trixi-framework/Trixi.jl/issues/1096.
-  integral = zero(func(zero(SVector{nvariables(equations), eltype(u)}), 1, 1, 1, 1, equations, dg, args...))
-  volume = zero(real(mesh))
-
-  # Use quadrature to numerically integrate over entire domain
-  for element in eachelement(dg, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, k, element]))
-      integral += volume_jacobian * weights[i] * weights[j] * weights[k] * func(u, i, j, k, element, equations, dg, args...)
-      volume += volume_jacobian * weights[i] * weights[j] * weights[k]
+                               dg::DGSEM, cache, args...; normalize = true) where {Func}
+    @unpack weights = dg.basis
+
+    # Initialize integral with zeros of the right shape
+    # Pass `zero(SVector{nvariables(equations), eltype(u))}` to `func` since `u` might be empty, if the
+    # current rank has no elements, see also https://github.com/trixi-framework/Trixi.jl/issues/1096.
+    integral = zero(func(zero(SVector{nvariables(equations), eltype(u)}), 1, 1, 1, 1,
+                         equations, dg, args...))
+    volume = zero(real(mesh))
+
+    # Use quadrature to numerically integrate over entire domain
+    for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            volume_jacobian = abs(inv(cache.elements.inverse_jacobian[i, j, k, element]))
+            integral += volume_jacobian * weights[i] * weights[j] * weights[k] *
+                        func(u, i, j, k, element, equations, dg, args...)
+            volume += volume_jacobian * weights[i] * weights[j] * weights[k]
+        end
     end
-  end
 
-  global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
-  total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
-  if mpi_isroot()
-    integral = convert(typeof(integral), global_integral[])
-  else
-    integral = convert(typeof(integral), NaN * integral)
-    total_volume = volume # non-root processes receive nothing from reduce -> overwrite
-  end
+    global_integral = MPI.Reduce!(Ref(integral), +, mpi_root(), mpi_comm())
+    total_volume = MPI.Reduce(volume, +, mpi_root(), mpi_comm())
+    if mpi_isroot()
+        integral = convert(typeof(integral), global_integral[])
+    else
+        integral = convert(typeof(integral), NaN * integral)
+        total_volume = volume # non-root processes receive nothing from reduce -> overwrite
+    end
 
-  # Normalize with total volume
-  if normalize
-    integral = integral / total_volume
-  end
+    # Normalize with total volume
+    if normalize
+        integral = integral / total_volume
+    end
 
-  return integral
+    return integral
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/analysis_dgmulti.jl b/src/callbacks_step/analysis_dgmulti.jl
index 18640c9379f..dc294de9e7b 100644
--- a/src/callbacks_step/analysis_dgmulti.jl
+++ b/src/callbacks_step/analysis_dgmulti.jl
@@ -3,89 +3,89 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function calc_error_norms(func, u, t, analyzer,
                           mesh::DGMultiMesh{NDIMS}, equations, initial_condition,
                           dg::DGMulti{NDIMS}, cache, cache_analysis) where {NDIMS}
-  rd = dg.basis
-  md = mesh.md
-  @unpack u_values = cache
-
-  # interpolate u to quadrature points
-  apply_to_each_field(mul_by!(rd.Vq), u_values, u)
-
-  component_l2_errors = zero(eltype(u_values))
-  component_linf_errors = zero(eltype(u_values))
-  for i in each_quad_node_global(mesh, dg, cache)
-    u_exact = initial_condition(SVector(getindex.(md.xyzq, i)), t, equations)
-    error_at_node = func(u_values[i], equations) - func(u_exact, equations)
-    component_l2_errors += md.wJq[i] * error_at_node.^2
-    component_linf_errors = max.(component_linf_errors, abs.(error_at_node))
-  end
-  total_volume = sum(md.wJq)
-  return sqrt.(component_l2_errors ./ total_volume), component_linf_errors
+    rd = dg.basis
+    md = mesh.md
+    @unpack u_values = cache
+
+    # interpolate u to quadrature points
+    apply_to_each_field(mul_by!(rd.Vq), u_values, u)
+
+    component_l2_errors = zero(eltype(u_values))
+    component_linf_errors = zero(eltype(u_values))
+    for i in each_quad_node_global(mesh, dg, cache)
+        u_exact = initial_condition(SVector(getindex.(md.xyzq, i)), t, equations)
+        error_at_node = func(u_values[i], equations) - func(u_exact, equations)
+        component_l2_errors += md.wJq[i] * error_at_node .^ 2
+        component_linf_errors = max.(component_linf_errors, abs.(error_at_node))
+    end
+    total_volume = sum(md.wJq)
+    return sqrt.(component_l2_errors ./ total_volume), component_linf_errors
 end
 
 function integrate(func::Func, u,
                    mesh::DGMultiMesh,
-                   equations, dg::DGMulti, cache; normalize=true) where {Func}
-  rd = dg.basis
-  md = mesh.md
-  @unpack u_values = cache
-
-  # interpolate u to quadrature points
-  apply_to_each_field(mul_by!(rd.Vq), u_values, u)
-
-  integral = sum(md.wJq .* func.(u_values, equations))
-  if normalize == true
-    integral /= sum(md.wJq)
-  end
-  return integral
+                   equations, dg::DGMulti, cache; normalize = true) where {Func}
+    rd = dg.basis
+    md = mesh.md
+    @unpack u_values = cache
+
+    # interpolate u to quadrature points
+    apply_to_each_field(mul_by!(rd.Vq), u_values, u)
+
+    integral = sum(md.wJq .* func.(u_values, equations))
+    if normalize == true
+        integral /= sum(md.wJq)
+    end
+    return integral
 end
 
 function analyze(::typeof(entropy_timederivative), du, u, t,
                  mesh::DGMultiMesh, equations, dg::DGMulti, cache)
-
-  rd = dg.basis
-  md = mesh.md
-  @unpack u_values = cache
-
-  # interpolate u, du to quadrature points
-  du_values = similar(u_values) # Todo: DGMulti. Can we move this to the analysis cache somehow?
-  apply_to_each_field(mul_by!(rd.Vq), du_values, du)
-  apply_to_each_field(mul_by!(rd.Vq), u_values, u)
-
-  # compute ∫v(u) * du/dt = ∫dS/dt. We can directly compute v(u) instead of computing the entropy
-  # projection here, since the RHS will be projected to polynomials of degree N and testing with
-  # the L2 projection of v(u) would be equivalent to testing with v(u) due to the moment-preserving
-  # property of the L2 projection.
-  dS_dt = zero(eltype(first(du)))
-  for i in Base.OneTo(length(md.wJq))
-    dS_dt += dot(cons2entropy(u_values[i], equations), du_values[i]) * md.wJq[i]
-  end
-  return dS_dt
+    rd = dg.basis
+    md = mesh.md
+    @unpack u_values = cache
+
+    # interpolate u, du to quadrature points
+    du_values = similar(u_values) # Todo: DGMulti. Can we move this to the analysis cache somehow?
+    apply_to_each_field(mul_by!(rd.Vq), du_values, du)
+    apply_to_each_field(mul_by!(rd.Vq), u_values, u)
+
+    # compute ∫v(u) * du/dt = ∫dS/dt. We can directly compute v(u) instead of computing the entropy
+    # projection here, since the RHS will be projected to polynomials of degree N and testing with
+    # the L2 projection of v(u) would be equivalent to testing with v(u) due to the moment-preserving
+    # property of the L2 projection.
+    dS_dt = zero(eltype(first(du)))
+    for i in Base.OneTo(length(md.wJq))
+        dS_dt += dot(cons2entropy(u_values[i], equations), du_values[i]) * md.wJq[i]
+    end
+    return dS_dt
 end
 
 # This function is used in `analyze(::Val{:l2_divb},...)` and `analyze(::Val{:linf_divb},...)`
 function compute_local_divergence!(local_divergence, element, vector_field,
                                    mesh, dg::DGMulti, cache)
-  @unpack md = mesh
-  rd = dg.basis
-  uEltype = eltype(first(vector_field))
-
-  fill!(local_divergence, zero(uEltype))
-
-  # computes dU_i/dx_i = ∑_j dxhat_j/dx_i * dU_i / dxhat_j
-  # dU_i/dx_i is then accumulated into local_divergence.
-      # TODO: DGMulti. Extend to curved elements.
-  for i in eachdim(mesh)
-    for j in eachdim(mesh)
-      geometric_scaling = md.rstxyzJ[i, j][1, element]
-      jth_ref_derivative_matrix = rd.Drst[j]
-      mul!(local_divergence, jth_ref_derivative_matrix, vector_field[i], geometric_scaling, one(uEltype))
+    @unpack md = mesh
+    rd = dg.basis
+    uEltype = eltype(first(vector_field))
+
+    fill!(local_divergence, zero(uEltype))
+
+    # computes dU_i/dx_i = ∑_j dxhat_j/dx_i * dU_i / dxhat_j
+    # dU_i/dx_i is then accumulated into local_divergence.
+    # TODO: DGMulti. Extend to curved elements.
+    for i in eachdim(mesh)
+        for j in eachdim(mesh)
+            geometric_scaling = md.rstxyzJ[i, j][1, element]
+            jth_ref_derivative_matrix = rd.Drst[j]
+            mul!(local_divergence, jth_ref_derivative_matrix, vector_field[i],
+                 geometric_scaling, one(uEltype))
+        end
     end
-  end
 end
 
 get_component(u::StructArray, i::Int) = StructArrays.component(u, i)
@@ -94,101 +94,102 @@ get_component(u::AbstractArray{<:SVector}, i::Int) = getindex.(u, i)
 function analyze(::Val{:l2_divb}, du, u, t,
                  mesh::DGMultiMesh, equations::IdealGlmMhdEquations2D,
                  dg::DGMulti, cache)
-  @unpack md = mesh
-  rd = dg.basis
-  B1 = get_component(u, 6)
-  B2 = get_component(u, 7)
-  B = (B1, B2)
-
-  uEltype = eltype(B1)
-  l2norm_divB = zero(uEltype)
-  local_divB = zeros(uEltype, size(B1, 1))
-  for e in eachelement(mesh, dg, cache)
-    compute_local_divergence!(local_divB, e, view.(B, :, e), mesh, dg, cache)
-
-    # TODO: DGMulti. Extend to curved elements.
-    # compute L2 norm squared via J[1, e] * u' * M * u
-    local_l2norm_divB = md.J[1, e] * dot(local_divB, rd.M, local_divB)
-    l2norm_divB += local_l2norm_divB
-  end
+    @unpack md = mesh
+    rd = dg.basis
+    B1 = get_component(u, 6)
+    B2 = get_component(u, 7)
+    B = (B1, B2)
+
+    uEltype = eltype(B1)
+    l2norm_divB = zero(uEltype)
+    local_divB = zeros(uEltype, size(B1, 1))
+    for e in eachelement(mesh, dg, cache)
+        compute_local_divergence!(local_divB, e, view.(B, :, e), mesh, dg, cache)
+
+        # TODO: DGMulti. Extend to curved elements.
+        # compute L2 norm squared via J[1, e] * u' * M * u
+        local_l2norm_divB = md.J[1, e] * dot(local_divB, rd.M, local_divB)
+        l2norm_divB += local_l2norm_divB
+    end
 
-  return sqrt(l2norm_divB)
+    return sqrt(l2norm_divB)
 end
 
 function analyze(::Val{:linf_divb}, du, u, t,
                  mesh::DGMultiMesh, equations::IdealGlmMhdEquations2D,
                  dg::DGMulti, cache)
-  B1 = get_component(u, 6)
-  B2 = get_component(u, 7)
-  B = (B1, B2)
-
-  uEltype = eltype(B1)
-  linf_divB = zero(uEltype)
-  local_divB = zeros(uEltype, size(B1, 1))
-  for e in eachelement(mesh, dg, cache)
-    compute_local_divergence!(local_divB, e, view.(B, :, e), mesh, dg, cache)
-
-    # compute maximum norm
-    linf_divB = max(linf_divB, maximum(abs, local_divB))
-  end
+    B1 = get_component(u, 6)
+    B2 = get_component(u, 7)
+    B = (B1, B2)
+
+    uEltype = eltype(B1)
+    linf_divB = zero(uEltype)
+    local_divB = zeros(uEltype, size(B1, 1))
+    for e in eachelement(mesh, dg, cache)
+        compute_local_divergence!(local_divB, e, view.(B, :, e), mesh, dg, cache)
+
+        # compute maximum norm
+        linf_divB = max(linf_divB, maximum(abs, local_divB))
+    end
 
-  return linf_divB
+    return linf_divB
 end
 
 function integrate(func::typeof(enstrophy), u,
                    mesh::DGMultiMesh,
                    equations, equations_parabolic::CompressibleNavierStokesDiffusion3D,
                    dg::DGMulti,
-                   cache, cache_parabolic; normalize=true)
-
-  gradients_x, gradients_y, gradients_z = cache_parabolic.gradients
-
-  # allocate local storage for gradients.
-  # TODO: can we avoid allocating here?
-  local_gradient_quadrature_values = ntuple(_ -> similar(cache_parabolic.local_u_values_threaded), 3)
-
-  integral = zero(eltype(u))
-  for e in eachelement(mesh, dg)
-    u_quadrature_values = cache_parabolic.local_u_values_threaded[Threads.threadid()]
-    gradient_x_quadrature_values = local_gradient_quadrature_values[1][Threads.threadid()]
-    gradient_y_quadrature_values = local_gradient_quadrature_values[2][Threads.threadid()]
-    gradient_z_quadrature_values = local_gradient_quadrature_values[3][Threads.threadid()]
-
-    # interpolate to quadrature on each element
-    apply_to_each_field(mul_by!(dg.basis.Vq), u_quadrature_values, view(u, :, e))
-    apply_to_each_field(mul_by!(dg.basis.Vq), gradient_x_quadrature_values, view(gradients_x, :, e))
-    apply_to_each_field(mul_by!(dg.basis.Vq), gradient_y_quadrature_values, view(gradients_y, :, e))
-    apply_to_each_field(mul_by!(dg.basis.Vq), gradient_z_quadrature_values, view(gradients_z, :, e))
-
-    # integrate over the element
-    for i in eachindex(u_quadrature_values)
-      gradients_i = SVector(gradient_x_quadrature_values[i],
-                            gradient_y_quadrature_values[i],
-                            gradient_z_quadrature_values[i])
-      integral += mesh.md.wJq[i, e] * func(u_quadrature_values[i], gradients_i, equations)
+                   cache, cache_parabolic; normalize = true)
+    gradients_x, gradients_y, gradients_z = cache_parabolic.gradients
+
+    # allocate local storage for gradients.
+    # TODO: can we avoid allocating here?
+    local_gradient_quadrature_values = ntuple(_ -> similar(cache_parabolic.local_u_values_threaded),
+                                              3)
+
+    integral = zero(eltype(u))
+    for e in eachelement(mesh, dg)
+        u_quadrature_values = cache_parabolic.local_u_values_threaded[Threads.threadid()]
+        gradient_x_quadrature_values = local_gradient_quadrature_values[1][Threads.threadid()]
+        gradient_y_quadrature_values = local_gradient_quadrature_values[2][Threads.threadid()]
+        gradient_z_quadrature_values = local_gradient_quadrature_values[3][Threads.threadid()]
+
+        # interpolate to quadrature on each element
+        apply_to_each_field(mul_by!(dg.basis.Vq), u_quadrature_values, view(u, :, e))
+        apply_to_each_field(mul_by!(dg.basis.Vq), gradient_x_quadrature_values,
+                            view(gradients_x, :, e))
+        apply_to_each_field(mul_by!(dg.basis.Vq), gradient_y_quadrature_values,
+                            view(gradients_y, :, e))
+        apply_to_each_field(mul_by!(dg.basis.Vq), gradient_z_quadrature_values,
+                            view(gradients_z, :, e))
+
+        # integrate over the element
+        for i in eachindex(u_quadrature_values)
+            gradients_i = SVector(gradient_x_quadrature_values[i],
+                                  gradient_y_quadrature_values[i],
+                                  gradient_z_quadrature_values[i])
+            integral += mesh.md.wJq[i, e] *
+                        func(u_quadrature_values[i], gradients_i, equations)
+        end
     end
-  end
-  return integral
+    return integral
 end
 
-
 function create_cache_analysis(analyzer, mesh::DGMultiMesh,
                                equations, dg::DGMulti, cache,
                                RealT, uEltype)
-  md = mesh.md
-  return (; )
+    md = mesh.md
+    return (;)
 end
 
 SolutionAnalyzer(rd::RefElemData) = rd
 
 nelements(mesh::DGMultiMesh, ::DGMulti, other_args...) = mesh.md.num_elements
 function ndofsglobal(mesh::DGMultiMesh, solver::DGMulti, cache)
-  if mpi_isparallel()
-    error("`ndofsglobal` is not implemented for `DGMultiMesh` when used in parallel with MPI")
-  else
-    return ndofs(mesh, solver, cache)
-  end
+    if mpi_isparallel()
+        error("`ndofsglobal` is not implemented for `DGMultiMesh` when used in parallel with MPI")
+    else
+        return ndofs(mesh, solver, cache)
+    end
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/averaging.jl b/src/callbacks_step/averaging.jl
index 1052efe4bee..8d2dcfeaefe 100644
--- a/src/callbacks_step/averaging.jl
+++ b/src/callbacks_step/averaging.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     AveragingCallback(semi::SemidiscretizationHyperbolic, tspan; output_directory="out",
@@ -19,112 +19,114 @@ mean speed of sound, mean density, and mean vorticity for each node over the tim
 that this callback does not support adaptive mesh refinement ([`AMRCallback`](@ref)).
 """
 struct AveragingCallback{TSpan, MeanValues, Cache}
-  tspan::TSpan
-  mean_values::MeanValues
-  cache::Cache
-  output_directory::String
-  filename::String
+    tspan::TSpan
+    mean_values::MeanValues
+    cache::Cache
+    output_directory::String
+    filename::String
 end
 
-
 function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:AveragingCallback})
-  @nospecialize cb # reduce precompilation time
-  averaging_callback = cb.affect!
-  @unpack tspan = averaging_callback
-
-  print(io, "AveragingCallback(tspan=", tspan, ")")
-end
-
-function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:AveragingCallback})
-  @nospecialize cb # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, cb)
-  else
+    @nospecialize cb # reduce precompilation time
     averaging_callback = cb.affect!
+    @unpack tspan = averaging_callback
 
-    setup = [
-             "Start time" => first(averaging_callback.tspan),
-             "Final time" => last(averaging_callback.tspan)
-            ]
-    summary_box(io, "AveragingCallback", setup)
-  end
+    print(io, "AveragingCallback(tspan=", tspan, ")")
 end
 
-function AveragingCallback(semi::SemidiscretizationHyperbolic{<:Any, <:CompressibleEulerEquations2D},
-                           tspan; output_directory="out", filename="averaging.h5")
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  mean_values = initialize_mean_values(mesh, equations, solver, cache)
-  cache = create_cache(AveragingCallback, mesh, equations, solver, cache)
-
-  averaging_callback = AveragingCallback(tspan, mean_values, cache, output_directory, filename)
-  condition = (u, t, integrator) -> first(tspan) <= t <= last(tspan)
-
-  return DiscreteCallback(condition, averaging_callback, save_positions=(false,false),
-                          initialize=initialize!)
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{<:Any, <:AveragingCallback})
+    @nospecialize cb # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        averaging_callback = cb.affect!
+
+        setup = [
+            "Start time" => first(averaging_callback.tspan),
+            "Final time" => last(averaging_callback.tspan),
+        ]
+        summary_box(io, "AveragingCallback", setup)
+    end
 end
 
+function AveragingCallback(semi::SemidiscretizationHyperbolic{<:Any,
+                                                              <:CompressibleEulerEquations2D
+                                                              },
+                           tspan; output_directory = "out", filename = "averaging.h5")
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    mean_values = initialize_mean_values(mesh, equations, solver, cache)
+    cache = create_cache(AveragingCallback, mesh, equations, solver, cache)
+
+    averaging_callback = AveragingCallback(tspan, mean_values, cache, output_directory,
+                                           filename)
+    condition = (u, t, integrator) -> first(tspan) <= t <= last(tspan)
+
+    return DiscreteCallback(condition, averaging_callback,
+                            save_positions = (false, false),
+                            initialize = initialize!)
+end
 
-function initialize!(cb::DiscreteCallback{Condition,Affect!}, u_ode, t, integrator) where {Condition, Affect!<:AveragingCallback}
-  averaging_callback = cb.affect!
-  semi = integrator.p
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  u = wrap_array(u_ode, mesh, equations, solver, cache)
+function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, t,
+                     integrator) where {Condition, Affect! <: AveragingCallback}
+    averaging_callback = cb.affect!
+    semi = integrator.p
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
 
-  @trixi_timeit timer() "averaging" initialize_cache!(averaging_callback.cache, u,
-                                                      mesh, equations, solver, cache)
+    @trixi_timeit timer() "averaging" initialize_cache!(averaging_callback.cache, u,
+                                                        mesh, equations, solver, cache)
 
-  # avoid re-evaluating possible FSAL stages
-  u_modified!(integrator, false)
-  return nothing
+    # avoid re-evaluating possible FSAL stages
+    u_modified!(integrator, false)
+    return nothing
 end
 
 # This function is called during time integration and updates the mean values according to the
 # trapezoidal rule
 function (averaging_callback::AveragingCallback)(integrator)
-  @unpack mean_values = averaging_callback
+    @unpack mean_values = averaging_callback
 
-  u_ode = integrator.u
-  u_prev_ode = integrator.uprev
-  semi = integrator.p
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  u = wrap_array(u_ode, mesh, equations, solver, cache)
-  u_prev = wrap_array(u_prev_ode, mesh, equations, solver, cache)
+    u_ode = integrator.u
+    u_prev_ode = integrator.uprev
+    semi = integrator.p
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
+    u_prev = wrap_array(u_prev_ode, mesh, equations, solver, cache)
 
-  dt = integrator.t - integrator.tprev
-  tspan = averaging_callback.tspan
+    dt = integrator.t - integrator.tprev
+    tspan = averaging_callback.tspan
 
-  integration_constant = 0.5 * dt / (tspan[2] - tspan[1]) # .5 due to trapezoidal rule
+    integration_constant = 0.5 * dt / (tspan[2] - tspan[1]) # .5 due to trapezoidal rule
 
-  @trixi_timeit timer() "averaging" calc_mean_values!(mean_values, averaging_callback.cache,
-                                                      u, u_prev, integration_constant,
-                                                      mesh, equations, solver, cache)
+    @trixi_timeit timer() "averaging" calc_mean_values!(mean_values,
+                                                        averaging_callback.cache,
+                                                        u, u_prev, integration_constant,
+                                                        mesh, equations, solver, cache)
 
-  # Store mean values in a file if this is the last time step
-  if isfinished(integrator)
-    save_averaging_file(averaging_callback, semi)
-  end
+    # Store mean values in a file if this is the last time step
+    if isfinished(integrator)
+        save_averaging_file(averaging_callback, semi)
+    end
 
-  # avoid re-evaluating possible FSAL stages
-  u_modified!(integrator, false)
+    # avoid re-evaluating possible FSAL stages
+    u_modified!(integrator, false)
 
-  return nothing
+    return nothing
 end
 
-
 function save_averaging_file(averaging_callback, semi::AbstractSemidiscretization)
-  # Create output directory if it doesn't exist
-  mkpath(averaging_callback.output_directory)
+    # Create output directory if it doesn't exist
+    mkpath(averaging_callback.output_directory)
 
-  save_averaging_file(averaging_callback, mesh_equations_solver_cache(semi)...)
+    save_averaging_file(averaging_callback, mesh_equations_solver_cache(semi)...)
 end
 
 function load_averaging_file(averaging_file, semi::AbstractSemidiscretization)
-  load_averaging_file(averaging_file, mesh_equations_solver_cache(semi)...)
+    load_averaging_file(averaging_file, mesh_equations_solver_cache(semi)...)
 end
 
-
 include("averaging_dg.jl")
 include("averaging_dg2d.jl")
-
-end # @muladd
\ No newline at end of file
+end # @muladd
diff --git a/src/callbacks_step/averaging_dg.jl b/src/callbacks_step/averaging_dg.jl
index c73b982b093..ca6b839f457 100644
--- a/src/callbacks_step/averaging_dg.jl
+++ b/src/callbacks_step/averaging_dg.jl
@@ -3,49 +3,49 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
-
-function save_averaging_file(averaging_callback, mesh::TreeMesh, equations, dg::DGSEM, cache)
-  @unpack output_directory, filename, mean_values = averaging_callback
-  h5open(joinpath(output_directory, filename), "w") do file
-    # Add context information
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["polydeg"] = polydeg(dg)
-    attributes(file)["n_elements"] = nelements(dg, cache)
-
-    # Store all mean variables as multi-dimensional arrays
-    for field in fieldnames(typeof(mean_values))
-      name = string(field)
-      data = getfield(mean_values, field)
-      file[name] = data
+#! format: noindent
+
+function save_averaging_file(averaging_callback, mesh::TreeMesh, equations, dg::DGSEM,
+                             cache)
+    @unpack output_directory, filename, mean_values = averaging_callback
+    h5open(joinpath(output_directory, filename), "w") do file
+        # Add context information
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["polydeg"] = polydeg(dg)
+        attributes(file)["n_elements"] = nelements(dg, cache)
+
+        # Store all mean variables as multi-dimensional arrays
+        for field in fieldnames(typeof(mean_values))
+            name = string(field)
+            data = getfield(mean_values, field)
+            file[name] = data
+        end
     end
-  end
 
-  return filename
+    return filename
 end
 
+function load_averaging_file(averaging_file, mesh::TreeMesh, equations, dg::DGSEM,
+                             cache)
+    # Read and check mesh and solver info
+    h5open(averaging_file, "r") do file
+        n_dims = read(attributes(file)["ndims"])
+        n_nodes = read(attributes(file)["polydeg"]) + 1
+        n_elements = read(attributes(file)["n_elements"])
+
+        @assert n_dims==ndims(mesh) "ndims differs from value in averaging file"
+        @assert n_nodes - 1==polydeg(dg) "polynomial degree in solver differs from value in averaging file"
+        @assert n_elements==nelements(dg, cache) "nelements in solver differs from value in averaging file"
+    end
 
-function load_averaging_file(averaging_file, mesh::TreeMesh, equations, dg::DGSEM, cache)
-  # Read and check mesh and solver info
-  h5open(averaging_file, "r") do file
-    n_dims = read(attributes(file)["ndims"])
-    n_nodes = read(attributes(file)["polydeg"]) + 1
-    n_elements = read(attributes(file)["n_elements"])
-
-    @assert n_dims == ndims(mesh) "ndims differs from value in averaging file"
-    @assert n_nodes - 1 == polydeg(dg) "polynomial degree in solver differs from value in averaging file"
-    @assert n_elements == nelements(dg, cache) "nelements in solver differs from value in averaging file"
-  end
-
-  # Read and return mean values
-  v_mean, c_mean, rho_mean, vorticity_mean = h5open(averaging_file, "r") do file
-    return read(file["v_mean"]),
-           read(file["c_mean"]),
-           read(file["rho_mean"]),
-           read(file["vorticity_mean"])
-  end
+    # Read and return mean values
+    v_mean, c_mean, rho_mean, vorticity_mean = h5open(averaging_file, "r") do file
+        return read(file["v_mean"]),
+               read(file["c_mean"]),
+               read(file["rho_mean"]),
+               read(file["vorticity_mean"])
+    end
 
-  return (; v_mean, c_mean, rho_mean, vorticity_mean)
+    return (; v_mean, c_mean, rho_mean, vorticity_mean)
 end
-
-end # @muladd
\ No newline at end of file
+end # @muladd
diff --git a/src/callbacks_step/averaging_dg2d.jl b/src/callbacks_step/averaging_dg2d.jl
index 70eafcb29e2..959a5655d96 100644
--- a/src/callbacks_step/averaging_dg2d.jl
+++ b/src/callbacks_step/averaging_dg2d.jl
@@ -3,75 +3,84 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Create arrays with DGSEM-specific structure to store the mean values and set them all to 0
-function initialize_mean_values(mesh::TreeMesh{2}, equations::AbstractCompressibleEulerEquations{2},
+function initialize_mean_values(mesh::TreeMesh{2},
+                                equations::AbstractCompressibleEulerEquations{2},
                                 dg::DGSEM, cache)
-  uEltype = eltype(cache.elements)
-  v_mean = zeros(uEltype, (ndims(equations), nnodes(dg), nnodes(dg), nelements(cache.elements)))
-  c_mean = zeros(uEltype, (nnodes(dg), nnodes(dg), nelements(cache.elements)))
-  rho_mean = zeros(uEltype, size(c_mean))
-  vorticity_mean = zeros(uEltype, size(c_mean))
-
-  return (; v_mean, c_mean, rho_mean, vorticity_mean)
+    uEltype = eltype(cache.elements)
+    v_mean = zeros(uEltype,
+                   (ndims(equations), nnodes(dg), nnodes(dg),
+                    nelements(cache.elements)))
+    c_mean = zeros(uEltype, (nnodes(dg), nnodes(dg), nelements(cache.elements)))
+    rho_mean = zeros(uEltype, size(c_mean))
+    vorticity_mean = zeros(uEltype, size(c_mean))
+
+    return (; v_mean, c_mean, rho_mean, vorticity_mean)
 end
 
 # Create cache which holds the vorticity for the previous time step. This is needed due to the
 # trapezoidal rule
 function create_cache(::Type{AveragingCallback}, mesh::TreeMesh{2},
-                      equations::AbstractCompressibleEulerEquations{2}, dg::DGSEM, cache)
-  # Cache vorticity from previous time step
-  uEltype = eltype(cache.elements)
-  vorticity_prev = zeros(uEltype, (nnodes(dg), nnodes(dg), nelements(cache.elements)))
-  return (; vorticity_prev)
+                      equations::AbstractCompressibleEulerEquations{2}, dg::DGSEM,
+                      cache)
+    # Cache vorticity from previous time step
+    uEltype = eltype(cache.elements)
+    vorticity_prev = zeros(uEltype, (nnodes(dg), nnodes(dg), nelements(cache.elements)))
+    return (; vorticity_prev)
 end
 
 # Calculate vorticity for the initial solution and store it in the cache
 function initialize_cache!(averaging_callback_cache, u,
-                           mesh::TreeMesh{2}, equations::AbstractCompressibleEulerEquations{2},
+                           mesh::TreeMesh{2},
+                           equations::AbstractCompressibleEulerEquations{2},
                            dg::DGSEM, cache)
-  @unpack vorticity_prev = averaging_callback_cache
+    @unpack vorticity_prev = averaging_callback_cache
 
-  # Calculate vorticity for initial solution
-  calc_vorticity!(vorticity_prev, u, mesh, equations, dg, cache)
+    # Calculate vorticity for initial solution
+    calc_vorticity!(vorticity_prev, u, mesh, equations, dg, cache)
 
-  return nothing
+    return nothing
 end
 
-
 # Update mean values using the trapezoidal rule
-function calc_mean_values!(mean_values, averaging_callback_cache, u, u_prev, integration_constant,
-                           mesh::TreeMesh{2}, equations::AbstractCompressibleEulerEquations{2},
+function calc_mean_values!(mean_values, averaging_callback_cache, u, u_prev,
+                           integration_constant,
+                           mesh::TreeMesh{2},
+                           equations::AbstractCompressibleEulerEquations{2},
                            dg::DGSEM, cache)
-  @unpack v_mean, c_mean, rho_mean, vorticity_mean = mean_values
-  @unpack vorticity_prev = averaging_callback_cache
-
-  @threaded for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      vorticity = calc_vorticity_node(u, mesh, equations, dg, cache, i, j, element)
-      vorticity_prev_node = vorticity_prev[i, j, element]
-      vorticity_prev[i, j, element] = vorticity # Cache current vorticity for the next time step
-
-      u_node_prim = cons2prim(get_node_vars(u, equations, dg, i, j, element), equations)
-      u_prev_node_prim = cons2prim(get_node_vars(u_prev, equations, dg, i, j, element), equations)
-
-      rho,      v1,      v2,      p      = u_node_prim
-      rho_prev, v1_prev, v2_prev, p_prev = u_prev_node_prim
-
-      c = sqrt(equations.gamma * p / rho)
-      c_prev = sqrt(equations.gamma * p_prev / rho_prev)
-
-      # Calculate the contribution to the mean values using the trapezoidal rule
-      vorticity_mean[i, j, element] += integration_constant * (vorticity_prev_node + vorticity)
-      v_mean[1, i, j, element]      += integration_constant * (v1_prev + v1)
-      v_mean[2, i, j, element]      += integration_constant * (v2_prev + v2)
-      c_mean[i, j, element]         += integration_constant * (c_prev + c)
-      rho_mean[i, j, element]       += integration_constant * (rho_prev + rho)
+    @unpack v_mean, c_mean, rho_mean, vorticity_mean = mean_values
+    @unpack vorticity_prev = averaging_callback_cache
+
+    @threaded for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            vorticity = calc_vorticity_node(u, mesh, equations, dg, cache, i, j,
+                                            element)
+            vorticity_prev_node = vorticity_prev[i, j, element]
+            vorticity_prev[i, j, element] = vorticity # Cache current vorticity for the next time step
+
+            u_node_prim = cons2prim(get_node_vars(u, equations, dg, i, j, element),
+                                    equations)
+            u_prev_node_prim = cons2prim(get_node_vars(u_prev, equations, dg, i, j,
+                                                       element), equations)
+
+            rho, v1, v2, p = u_node_prim
+            rho_prev, v1_prev, v2_prev, p_prev = u_prev_node_prim
+
+            c = sqrt(equations.gamma * p / rho)
+            c_prev = sqrt(equations.gamma * p_prev / rho_prev)
+
+            # Calculate the contribution to the mean values using the trapezoidal rule
+            vorticity_mean[i, j, element] += integration_constant *
+                                             (vorticity_prev_node + vorticity)
+            v_mean[1, i, j, element] += integration_constant * (v1_prev + v1)
+            v_mean[2, i, j, element] += integration_constant * (v2_prev + v2)
+            c_mean[i, j, element] += integration_constant * (c_prev + c)
+            rho_mean[i, j, element] += integration_constant * (rho_prev + rho)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-end # @muladd
\ No newline at end of file
+end # @muladd
diff --git a/src/callbacks_step/callbacks_step.jl b/src/callbacks_step/callbacks_step.jl
index 0b2c4ef4d5f..09d197bf225 100644
--- a/src/callbacks_step/callbacks_step.jl
+++ b/src/callbacks_step/callbacks_step.jl
@@ -3,32 +3,33 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # overload this function for specific callbacks which use element element variables
 # that should be saved
-get_element_variables!(element_variables, u, mesh, equations, solver, cache,
-                       callback; kwargs...) = nothing
+function get_element_variables!(element_variables, u, mesh, equations, solver, cache,
+                                callback; kwargs...)
+    nothing
+end
 
 @inline function get_element_variables!(element_variables, u_ode,
-                                        semi::AbstractSemidiscretization, cb::DiscreteCallback;
+                                        semi::AbstractSemidiscretization,
+                                        cb::DiscreteCallback;
                                         kwargs...)
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  u = wrap_array(u_ode, mesh, equations, solver, cache)
-  get_element_variables!(element_variables, u, mesh, equations, solver, cache,
-                         cb.affect!; kwargs...)
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
+    get_element_variables!(element_variables, u, mesh, equations, solver, cache,
+                           cb.affect!; kwargs...)
 end
 
-
 @inline function isfinished(integrator)
-  # Checking for floating point equality is OK here as `DifferentialEquations.jl`
-  # sets the time exactly to the final time in the last iteration
-  return integrator.t == last(integrator.sol.prob.tspan) ||
-         isempty(integrator.opts.tstops) ||
-         integrator.iter == integrator.opts.maxiters
+    # Checking for floating point equality is OK here as `DifferentialEquations.jl`
+    # sets the time exactly to the final time in the last iteration
+    return integrator.t == last(integrator.sol.prob.tspan) ||
+           isempty(integrator.opts.tstops) ||
+           integrator.iter == integrator.opts.maxiters
 end
 
-
 # `include` callback definitions in the order that we currently prefer
 # when combining them into a `CallbackSet` which is called *after* a complete step
 # The motivation is as follows: The first callbacks belong to the current time step iteration:
@@ -64,12 +65,10 @@ include("glm_speed.jl")
 include("lbm_collision.jl")
 include("euler_acoustics_coupling.jl")
 
-
 # The `TrivialCallback` purposely does nothing: It allows to quickly disable specific callbacks
 # when using `trixi_include` or `test_trixi_include`
 include("trivial.jl")
 
 # DGMulti callbacks
 include("analysis_dgmulti.jl")
-
 end # @muladd
diff --git a/src/callbacks_step/euler_acoustics_coupling.jl b/src/callbacks_step/euler_acoustics_coupling.jl
index 8847fc62b23..ea33175d0c5 100644
--- a/src/callbacks_step/euler_acoustics_coupling.jl
+++ b/src/callbacks_step/euler_acoustics_coupling.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     EulerAcousticsCouplingCallback
@@ -34,34 +34,37 @@ the [`AveragingCallback`](@ref).
   A direct-hybrid method for aeroacoustic analysis
   [DOI: 10.18154/RWTH-2017-04082](https://doi.org/10.18154/RWTH-2017-04082)
 """
-mutable struct EulerAcousticsCouplingCallback{RealT<:Real, MeanValues, IntegratorEuler}
-  stepsize_callback_acoustics::StepsizeCallback{RealT}
-  stepsize_callback_euler::StepsizeCallback{RealT}
-  mean_values::MeanValues
-  integrator_euler::IntegratorEuler
+mutable struct EulerAcousticsCouplingCallback{RealT <: Real, MeanValues, IntegratorEuler
+                                              }
+    stepsize_callback_acoustics::StepsizeCallback{RealT}
+    stepsize_callback_euler::StepsizeCallback{RealT}
+    mean_values::MeanValues
+    integrator_euler::IntegratorEuler
 end
 
+function Base.show(io::IO,
+                   cb::DiscreteCallback{<:Any, <:EulerAcousticsCouplingCallback})
+    @nospecialize cb # reduce precompilation time
+    euler_acoustics_coupling = cb.affect!
 
-function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:EulerAcousticsCouplingCallback})
-  @nospecialize cb # reduce precompilation time
-  euler_acoustics_coupling = cb.affect!
-
-  print(io, "EulerAcousticsCouplingCallback(")
-  print(io,       euler_acoustics_coupling.stepsize_callback_acoustics)
-  print(io, ", ", euler_acoustics_coupling.stepsize_callback_euler, ")")
+    print(io, "EulerAcousticsCouplingCallback(")
+    print(io, euler_acoustics_coupling.stepsize_callback_acoustics)
+    print(io, ", ", euler_acoustics_coupling.stepsize_callback_euler, ")")
 end
 
-function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:EulerAcousticsCouplingCallback})
-  @nospecialize cb # reduce precompilation time
-  euler_acoustics_coupling = cb.affect!
-
-  summary_header(io, "EulerAcousticsCouplingCallback")
-  summary_line(io, "acoustics StepsizeCallback", euler_acoustics_coupling.stepsize_callback_acoustics)
-  summary_line(io, "Euler StepsizeCallback", euler_acoustics_coupling.stepsize_callback_euler)
-  summary_footer(io)
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{<:Any, <:EulerAcousticsCouplingCallback})
+    @nospecialize cb # reduce precompilation time
+    euler_acoustics_coupling = cb.affect!
+
+    summary_header(io, "EulerAcousticsCouplingCallback")
+    summary_line(io, "acoustics StepsizeCallback",
+                 euler_acoustics_coupling.stepsize_callback_acoustics)
+    summary_line(io, "Euler StepsizeCallback",
+                 euler_acoustics_coupling.stepsize_callback_euler)
+    summary_footer(io)
 end
 
-
 """
     EulerAcousticsCouplingCallback(ode_euler,
                                    averaging_callback::DiscreteCallback{<:Any, <:AveragingCallback},
@@ -81,12 +84,16 @@ The mean values for the acoustic perturbation equations are read from `averaging
 (see [`AveragingCallback`](@ref)).
 """
 function EulerAcousticsCouplingCallback(ode_euler,
-                                        averaging_callback::DiscreteCallback{<:Any, <:AveragingCallback},
-                                        alg, cfl_acoustics::Real, cfl_euler::Real; kwargs...)
-  @unpack mean_values = averaging_callback.affect!
-
-  return EulerAcousticsCouplingCallback(ode_euler, mean_values, alg, cfl_acoustics, cfl_euler;
+                                        averaging_callback::DiscreteCallback{<:Any,
+                                                                             <:AveragingCallback
+                                                                             },
+                                        alg, cfl_acoustics::Real, cfl_euler::Real;
                                         kwargs...)
+    @unpack mean_values = averaging_callback.affect!
+
+    return EulerAcousticsCouplingCallback(ode_euler, mean_values, alg, cfl_acoustics,
+                                          cfl_euler;
+                                          kwargs...)
 end
 
 """
@@ -108,99 +115,106 @@ The mean values for the acoustic perturbation equations are read from `averaging
 """
 function EulerAcousticsCouplingCallback(ode_euler, averaging_file::AbstractString, alg,
                                         cfl_acoustics::Real, cfl_euler::Real; kwargs...)
-  semi_euler = ode_euler.p
-  mean_values = load_averaging_file(averaging_file, semi_euler)
+    semi_euler = ode_euler.p
+    mean_values = load_averaging_file(averaging_file, semi_euler)
 
-  return EulerAcousticsCouplingCallback(ode_euler, mean_values, alg, cfl_acoustics, cfl_euler;
-                                        kwargs...)
+    return EulerAcousticsCouplingCallback(ode_euler, mean_values, alg, cfl_acoustics,
+                                          cfl_euler;
+                                          kwargs...)
 end
 
-function EulerAcousticsCouplingCallback(ode_euler, mean_values, alg, cfl_acoustics, cfl_euler;
+function EulerAcousticsCouplingCallback(ode_euler, mean_values, alg, cfl_acoustics,
+                                        cfl_euler;
                                         kwargs...)
-  # Set up ODE Integrator for Euler equations
-  integrator_euler = init(ode_euler, alg, save_everystep=false, dt=1.0; kwargs...) # dt will be overwritten
-
-  euler_acoustics_coupling = EulerAcousticsCouplingCallback{typeof(cfl_acoustics),
-                                                            typeof(mean_values),
-                                                            typeof(integrator_euler)}(
-    StepsizeCallback(cfl_acoustics), StepsizeCallback(cfl_euler), mean_values, integrator_euler)
-  condition = (u, t, integrator) -> true
-
-  return DiscreteCallback(condition, euler_acoustics_coupling, save_positions=(false, false),
-                          initialize=initialize!)
+    # Set up ODE Integrator for Euler equations
+    integrator_euler = init(ode_euler, alg, save_everystep = false, dt = 1.0; kwargs...) # dt will be overwritten
+
+    euler_acoustics_coupling = EulerAcousticsCouplingCallback{typeof(cfl_acoustics),
+                                                              typeof(mean_values),
+                                                              typeof(integrator_euler)}(StepsizeCallback(cfl_acoustics),
+                                                                                        StepsizeCallback(cfl_euler),
+                                                                                        mean_values,
+                                                                                        integrator_euler)
+    condition = (u, t, integrator) -> true
+
+    return DiscreteCallback(condition, euler_acoustics_coupling,
+                            save_positions = (false, false),
+                            initialize = initialize!)
 end
 
-
 # This is called before the main loop and initializes the mean values in u_ode
-function initialize!(cb::DiscreteCallback{Condition,Affect!}, u_ode, t, integrator_acoustics) where {Condition, Affect!<:EulerAcousticsCouplingCallback}
-  euler_acoustics_coupling = cb.affect!
-  semi = integrator_acoustics.p
-  @unpack semi_acoustics = semi
-
-  # Initialize mean values in u_ode
-  u_acoustics = wrap_array(u_ode, semi_acoustics)
-  @unpack mean_values = euler_acoustics_coupling
-  @views @. u_acoustics[4:5, .., :] = mean_values.v_mean
-  @views @. u_acoustics[6, .., :] = mean_values.c_mean
-  @views @. u_acoustics[7, .., :] = mean_values.rho_mean
-
-  # Adjust stepsize, advance the flow solver by one time step
-  cb.affect!(integrator_acoustics)
-
-  return nothing
+function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, t,
+                     integrator_acoustics) where {Condition,
+                                                  Affect! <:
+                                                  EulerAcousticsCouplingCallback}
+    euler_acoustics_coupling = cb.affect!
+    semi = integrator_acoustics.p
+    @unpack semi_acoustics = semi
+
+    # Initialize mean values in u_ode
+    u_acoustics = wrap_array(u_ode, semi_acoustics)
+    @unpack mean_values = euler_acoustics_coupling
+    @views @. u_acoustics[4:5, .., :] = mean_values.v_mean
+    @views @. u_acoustics[6, .., :] = mean_values.c_mean
+    @views @. u_acoustics[7, .., :] = mean_values.rho_mean
+
+    # Adjust stepsize, advance the flow solver by one time step
+    cb.affect!(integrator_acoustics)
+
+    return nothing
 end
 
-
 # This function is called at the end of every time step and advances the Euler solution by one
 # time step, manages the time stepsize for both the acoustics and Euler solvers and calculates the
 # acoustic sources for the next acoustics time step
 function (euler_acoustics_coupling::EulerAcousticsCouplingCallback)(integrator_acoustics)
-  @unpack stepsize_callback_acoustics, stepsize_callback_euler, integrator_euler = euler_acoustics_coupling
-
-  @assert integrator_acoustics.t == integrator_euler.t
-
-  # Use the minimum of the acoustics and Euler stepsizes for both solvers
-  stepsize_callback_acoustics(integrator_acoustics)
-  stepsize_callback_euler(integrator_euler)
-  dt = min(get_proposed_dt(integrator_acoustics), get_proposed_dt(integrator_euler))
-
-  set_proposed_dt!(integrator_acoustics, dt)
-  integrator_acoustics.opts.dtmax = dt
-  integrator_acoustics.dtcache = dt
-
-  set_proposed_dt!(integrator_euler, dt)
-  integrator_euler.opts.dtmax = dt
-  integrator_euler.dtcache = dt
-
-  # Advance the Euler solution by one step and check for errors
-  if !isfinished(integrator_euler)
-    @trixi_timeit timer() "Euler solver" step!(integrator_euler)
-    return_code = check_error(integrator_euler)
-    if !(SciMLBase.successful_retcode(return_code) ||
-         return_code != SciMLBase.ReturnCode.Default)
-      error("Error during compressible Euler time integration. Received return code $(return_code)")
+    @unpack stepsize_callback_acoustics, stepsize_callback_euler, integrator_euler = euler_acoustics_coupling
+
+    @assert integrator_acoustics.t == integrator_euler.t
+
+    # Use the minimum of the acoustics and Euler stepsizes for both solvers
+    stepsize_callback_acoustics(integrator_acoustics)
+    stepsize_callback_euler(integrator_euler)
+    dt = min(get_proposed_dt(integrator_acoustics), get_proposed_dt(integrator_euler))
+
+    set_proposed_dt!(integrator_acoustics, dt)
+    integrator_acoustics.opts.dtmax = dt
+    integrator_acoustics.dtcache = dt
+
+    set_proposed_dt!(integrator_euler, dt)
+    integrator_euler.opts.dtmax = dt
+    integrator_euler.dtcache = dt
+
+    # Advance the Euler solution by one step and check for errors
+    if !isfinished(integrator_euler)
+        @trixi_timeit timer() "Euler solver" step!(integrator_euler)
+        return_code = check_error(integrator_euler)
+        if !(SciMLBase.successful_retcode(return_code) ||
+             return_code != SciMLBase.ReturnCode.Default)
+            error("Error during compressible Euler time integration. Received return code $(return_code)")
+        end
     end
-  end
 
-  # Calculate acoustic sources based on linearized lamb vector
-  semi = integrator_acoustics.p
-  semi_euler = integrator_euler.p
-  u_acoustics = wrap_array(integrator_acoustics.u, semi)
-  u_euler = wrap_array(integrator_euler.u, semi_euler)
-  @unpack acoustic_source_terms, coupled_element_ids = semi.cache
-  @unpack vorticity_mean = euler_acoustics_coupling.mean_values
-
-  @trixi_timeit timer() "calc acoustic source terms" calc_acoustic_sources!(
-    acoustic_source_terms, u_euler, u_acoustics, vorticity_mean, coupled_element_ids,
-    mesh_equations_solver_cache(semi_euler)...)
+    # Calculate acoustic sources based on linearized lamb vector
+    semi = integrator_acoustics.p
+    semi_euler = integrator_euler.p
+    u_acoustics = wrap_array(integrator_acoustics.u, semi)
+    u_euler = wrap_array(integrator_euler.u, semi_euler)
+    @unpack acoustic_source_terms, coupled_element_ids = semi.cache
+    @unpack vorticity_mean = euler_acoustics_coupling.mean_values
+
+    @trixi_timeit timer() "calc acoustic source terms" begin
+        calc_acoustic_sources!(acoustic_source_terms, u_euler, u_acoustics,
+                               vorticity_mean, coupled_element_ids,
+                               mesh_equations_solver_cache(semi_euler)...)
+    end
 
-  # avoid re-evaluation possible FSAL stages
-  u_modified!(integrator_acoustics, false)
-  u_modified!(integrator_euler, false)
+    # avoid re-evaluation possible FSAL stages
+    u_modified!(integrator_acoustics, false)
+    u_modified!(integrator_euler, false)
 
-  return nothing
+    return nothing
 end
 
 include("euler_acoustics_coupling_dg2d.jl")
-
 end # @muladd
diff --git a/src/callbacks_step/euler_acoustics_coupling_dg2d.jl b/src/callbacks_step/euler_acoustics_coupling_dg2d.jl
index 0891515038c..16fac4f2d8d 100644
--- a/src/callbacks_step/euler_acoustics_coupling_dg2d.jl
+++ b/src/callbacks_step/euler_acoustics_coupling_dg2d.jl
@@ -3,38 +3,42 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
-
-function calc_acoustic_sources!(acoustic_source_terms, u_euler, u_acoustics, vorticity_mean,
+function calc_acoustic_sources!(acoustic_source_terms, u_euler, u_acoustics,
+                                vorticity_mean,
                                 coupled_element_ids, mesh,
-                                equations::AbstractCompressibleEulerEquations{2}, dg::DGSEM, cache)
-
-  acoustic_source_terms .= zero(eltype(acoustic_source_terms))
-
-  @threaded for k in 1:length(coupled_element_ids)
-    element = coupled_element_ids[k]
-
-    for j in eachnode(dg), i in eachnode(dg)
-      vorticity = calc_vorticity_node(u_euler, mesh, equations, dg, cache, i, j, element)
-
-      prim_euler = cons2prim(get_node_vars(u_euler, equations, dg, i, j, element), equations)
-      v1 = prim_euler[2]
-      v2 = prim_euler[3]
-      v1_mean = u_acoustics[4, i, j, element]
-      v2_mean = u_acoustics[5, i, j, element]
-
-      vorticity_prime = vorticity - vorticity_mean[i, j, element]
-      v1_prime = v1 - v1_mean
-      v2_prime = v2 - v2_mean
-
-      acoustic_source_terms[1, i, j, k] -= -vorticity_prime * v2_mean -
-                                            vorticity_mean[i, j, element] * v2_prime
-      acoustic_source_terms[2, i, j, k] -=  vorticity_prime * v1_mean +
-                                            vorticity_mean[i, j, element] * v1_prime
+                                equations::AbstractCompressibleEulerEquations{2},
+                                dg::DGSEM, cache)
+    acoustic_source_terms .= zero(eltype(acoustic_source_terms))
+
+    @threaded for k in 1:length(coupled_element_ids)
+        element = coupled_element_ids[k]
+
+        for j in eachnode(dg), i in eachnode(dg)
+            vorticity = calc_vorticity_node(u_euler, mesh, equations, dg, cache, i, j,
+                                            element)
+
+            prim_euler = cons2prim(get_node_vars(u_euler, equations, dg, i, j, element),
+                                   equations)
+            v1 = prim_euler[2]
+            v2 = prim_euler[3]
+            v1_mean = u_acoustics[4, i, j, element]
+            v2_mean = u_acoustics[5, i, j, element]
+
+            vorticity_prime = vorticity - vorticity_mean[i, j, element]
+            v1_prime = v1 - v1_mean
+            v2_prime = v2 - v2_mean
+
+            acoustic_source_terms[1, i, j, k] -= -vorticity_prime * v2_mean -
+                                                 vorticity_mean[i, j, element] *
+                                                 v2_prime
+            acoustic_source_terms[2, i, j, k] -= vorticity_prime * v1_mean +
+                                                 vorticity_mean[i, j, element] *
+                                                 v1_prime
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-end # @muladd
\ No newline at end of file
+end # @muladd
diff --git a/src/callbacks_step/glm_speed.jl b/src/callbacks_step/glm_speed.jl
index 03809c97e83..036f61a522b 100644
--- a/src/callbacks_step/glm_speed.jl
+++ b/src/callbacks_step/glm_speed.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     GlmSpeedCallback(; glm_scale=0.5, cfl)
@@ -15,82 +15,74 @@ The `cfl` number should be set to the same value as for the time step size calcu
 solution and should thus be set to a value within the interval [0,1]. Note that `glm_scale = 0`
 deactivates the divergence cleaning.
 """
-struct GlmSpeedCallback{RealT<:Real}
-  glm_scale::RealT
-  cfl::RealT
+struct GlmSpeedCallback{RealT <: Real}
+    glm_scale::RealT
+    cfl::RealT
 end
 
-
 function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:GlmSpeedCallback})
-  @nospecialize cb # reduce precompilation time
-
-  glm_speed_callback = cb.affect!
-  @unpack glm_scale, cfl = glm_speed_callback
-  print(io, "GlmSpeedCallback(glm_scale=", glm_scale, ", cfl=", cfl, ")")
-end
+    @nospecialize cb # reduce precompilation time
 
-
-function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:GlmSpeedCallback})
-  @nospecialize cb # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, cb)
-  else
     glm_speed_callback = cb.affect!
-
-    setup = [
-             "GLM wave speed scaling" => glm_speed_callback.glm_scale,
-             "Expected CFL number" => glm_speed_callback.cfl,
-            ]
-    summary_box(io, "GlmSpeedCallback", setup)
-  end
+    @unpack glm_scale, cfl = glm_speed_callback
+    print(io, "GlmSpeedCallback(glm_scale=", glm_scale, ", cfl=", cfl, ")")
 end
 
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{<:Any, <:GlmSpeedCallback})
+    @nospecialize cb # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        glm_speed_callback = cb.affect!
+
+        setup = [
+            "GLM wave speed scaling" => glm_speed_callback.glm_scale,
+            "Expected CFL number" => glm_speed_callback.cfl,
+        ]
+        summary_box(io, "GlmSpeedCallback", setup)
+    end
+end
 
-function GlmSpeedCallback(; glm_scale=0.5, cfl)
-
-  @assert 0 <= glm_scale <= 1 "glm_scale must be between 0 and 1"
+function GlmSpeedCallback(; glm_scale = 0.5, cfl)
+    @assert 0<=glm_scale<=1 "glm_scale must be between 0 and 1"
 
-  glm_speed_callback = GlmSpeedCallback(glm_scale, cfl)
+    glm_speed_callback = GlmSpeedCallback(glm_scale, cfl)
 
-  DiscreteCallback(glm_speed_callback, glm_speed_callback, # the first one is the condition, the second the affect!
-                   save_positions=(false,false),
-                   initialize=initialize!)
+    DiscreteCallback(glm_speed_callback, glm_speed_callback, # the first one is the condition, the second the affect!
+                     save_positions = (false, false),
+                     initialize = initialize!)
 end
 
-
-function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:GlmSpeedCallback}
-  cb.affect!(integrator)
+function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t,
+                     integrator) where {Condition, Affect! <: GlmSpeedCallback}
+    cb.affect!(integrator)
 end
 
-
 # this method is called to determine whether the callback should be activated
 function (glm_speed_callback::GlmSpeedCallback)(u, t, integrator)
-  return true
+    return true
 end
 
-
 # This method is called as callback after the StepsizeCallback during the time integration.
 @inline function (glm_speed_callback::GlmSpeedCallback)(integrator)
+    dt = get_proposed_dt(integrator)
+    semi = integrator.p
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    @unpack glm_scale, cfl = glm_speed_callback
 
-  dt = get_proposed_dt(integrator)
-  semi = integrator.p
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  @unpack glm_scale, cfl = glm_speed_callback
+    # compute time step for GLM linear advection equation with c_h=1 (redone due to the possible AMR)
+    c_h_deltat = calc_dt_for_cleaning_speed(cfl, mesh, equations, solver, cache)
 
-  # compute time step for GLM linear advection equation with c_h=1 (redone due to the possible AMR)
-  c_h_deltat = calc_dt_for_cleaning_speed(cfl, mesh, equations, solver, cache)
+    # c_h is proportional to its own time step divided by the complete MHD time step
+    equations.c_h = glm_scale * c_h_deltat / dt
 
-  # c_h is proportional to its own time step divided by the complete MHD time step
-  equations.c_h = glm_scale * c_h_deltat / dt
+    # avoid re-evaluating possible FSAL stages
+    u_modified!(integrator, false)
 
-  # avoid re-evaluating possible FSAL stages
-  u_modified!(integrator, false)
-
-  return nothing
+    return nothing
 end
 
 include("glm_speed_dg.jl")
-
-
 end # @muladd
diff --git a/src/callbacks_step/glm_speed_dg.jl b/src/callbacks_step/glm_speed_dg.jl
index eef01ed0471..0686c547a34 100644
--- a/src/callbacks_step/glm_speed_dg.jl
+++ b/src/callbacks_step/glm_speed_dg.jl
@@ -3,35 +3,38 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function calc_dt_for_cleaning_speed(cfl::Real, mesh,
-                                    equations::Union{AbstractIdealGlmMhdEquations, AbstractIdealGlmMhdMulticomponentEquations}, dg::DG, cache)
-# compute time step for GLM linear advection equation with c_h=1 for the DG discretization on
-# Cartesian meshes
-  max_scaled_speed_for_c_h = maximum(cache.elements.inverse_jacobian) * ndims(equations)
-  # OBS! This depends on the implementation details of the StepsizeCallback and needs to be adapted
-  #      as well if that callback changes.
-  return cfl * 2 / (nnodes(dg) * max_scaled_speed_for_c_h)
+                                    equations::Union{AbstractIdealGlmMhdEquations,
+                                                     AbstractIdealGlmMhdMulticomponentEquations
+                                                     }, dg::DG, cache)
+    # compute time step for GLM linear advection equation with c_h=1 for the DG discretization on
+    # Cartesian meshes
+    max_scaled_speed_for_c_h = maximum(cache.elements.inverse_jacobian) *
+                               ndims(equations)
+    # OBS! This depends on the implementation details of the StepsizeCallback and needs to be adapted
+    #      as well if that callback changes.
+    return cfl * 2 / (nnodes(dg) * max_scaled_speed_for_c_h)
 end
 
 function calc_dt_for_cleaning_speed(cfl::Real, mesh,
-                                    equations::Union{AbstractIdealGlmMhdEquations, AbstractIdealGlmMhdMulticomponentEquations},
+                                    equations::Union{AbstractIdealGlmMhdEquations,
+                                                     AbstractIdealGlmMhdMulticomponentEquations
+                                                     },
                                     dg::DGMulti, cache)
-  rd = dg.basis
-  md = mesh.md
+    rd = dg.basis
+    md = mesh.md
 
-  # Compute time step for GLM linear advection equation with c_h=1 for a DGMulti discretization.
-  # Copies implementation behavior of `calc_dt_for_cleaning_speed` for DGSEM discretizations.
-  max_scaled_speed_for_c_h = inv(minimum(md.J)) * ndims(equations)
+    # Compute time step for GLM linear advection equation with c_h=1 for a DGMulti discretization.
+    # Copies implementation behavior of `calc_dt_for_cleaning_speed` for DGSEM discretizations.
+    max_scaled_speed_for_c_h = inv(minimum(md.J)) * ndims(equations)
 
-  # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by
-  # `polydeg+1`. This is because `nnodes(dg)` returns the total number of
-  # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns
-  # the number of 1D nodes for `DGSEM` solvers.
-  polydeg = rd.N
-  return cfl * 2 / ((polydeg + 1) * max_scaled_speed_for_c_h)
+    # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by
+    # `polydeg+1`. This is because `nnodes(dg)` returns the total number of
+    # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns
+    # the number of 1D nodes for `DGSEM` solvers.
+    polydeg = rd.N
+    return cfl * 2 / ((polydeg + 1) * max_scaled_speed_for_c_h)
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/lbm_collision.jl b/src/callbacks_step/lbm_collision.jl
index 7bd11830c63..33c2806d6a6 100644
--- a/src/callbacks_step/lbm_collision.jl
+++ b/src/callbacks_step/lbm_collision.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     LBMCollisionCallback()
@@ -12,57 +12,55 @@ Apply the Lattice-Boltzmann method (LBM) collision operator before each time ste
 See [`LatticeBoltzmannEquations2D`](@ref) for further details.
 """
 function LBMCollisionCallback()
-  DiscreteCallback(lbm_collision_callback, lbm_collision_callback,
-                   save_positions=(false,false),
-                   initialize=initialize!)
+    DiscreteCallback(lbm_collision_callback, lbm_collision_callback,
+                     save_positions = (false, false),
+                     initialize = initialize!)
 end
 
 # Always execute collision step after a time step, but not after the last step
 lbm_collision_callback(u, t, integrator) = !isfinished(integrator)
 
+function Base.show(io::IO,
+                   cb::DiscreteCallback{<:Any, <:typeof(lbm_collision_callback)})
+    @nospecialize cb # reduce precompilation time
 
-function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:typeof(lbm_collision_callback)})
-  @nospecialize cb # reduce precompilation time
-
-  print(io, "LBMCollisionCallback()")
+    print(io, "LBMCollisionCallback()")
 end
 
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{<:Any, <:typeof(lbm_collision_callback)})
+    @nospecialize cb # reduce precompilation time
 
-function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:typeof(lbm_collision_callback)})
-  @nospecialize cb # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, cb)
-  else
-    summary_box(io, "LBMCollisionCallback")
-  end
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        summary_box(io, "LBMCollisionCallback")
+    end
 end
 
-
 # Execute collision step once in the very beginning
-function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:typeof(lbm_collision_callback)}
-  cb.affect!(integrator)
+function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t,
+                     integrator) where {Condition,
+                                        Affect! <: typeof(lbm_collision_callback)}
+    cb.affect!(integrator)
 end
 
-
 # This method is called as callback after the StepsizeCallback during the time integration.
 @inline function lbm_collision_callback(integrator)
+    dt = get_proposed_dt(integrator)
+    semi = integrator.p
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    @unpack collision_op = equations
 
-  dt = get_proposed_dt(integrator)
-  semi = integrator.p
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  @unpack collision_op = equations
-
-  u_ode = integrator.u
-  u = wrap_array(u_ode, mesh, equations, solver, cache)
+    u_ode = integrator.u
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
 
-  @trixi_timeit timer() "LBM collision" apply_collision!(u, dt, collision_op, mesh, equations, solver, cache)
+    @trixi_timeit timer() "LBM collision" apply_collision!(u, dt, collision_op, mesh,
+                                                           equations, solver, cache)
 
-  return nothing
+    return nothing
 end
 
 include("lbm_collision_dg2d.jl")
 include("lbm_collision_dg3d.jl")
-
-
 end # @muladd
diff --git a/src/callbacks_step/lbm_collision_dg2d.jl b/src/callbacks_step/lbm_collision_dg2d.jl
index 3a6cdaddac1..932edfd61f6 100644
--- a/src/callbacks_step/lbm_collision_dg2d.jl
+++ b/src/callbacks_step/lbm_collision_dg2d.jl
@@ -3,21 +3,18 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function apply_collision!(u, dt, collision_op,
                           mesh::AbstractMesh{2}, equations, dg::DG, cache)
-
-  @threaded for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, element)
-      update = collision_op(u_node, dt, equations)
-      add_to_node_vars!(u, update, equations, dg, i, j, element)
+    @threaded for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, element)
+            update = collision_op(u_node, dt, equations)
+            add_to_node_vars!(u, update, equations, dg, i, j, element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/lbm_collision_dg3d.jl b/src/callbacks_step/lbm_collision_dg3d.jl
index 4c1326b3608..0620f77159d 100644
--- a/src/callbacks_step/lbm_collision_dg3d.jl
+++ b/src/callbacks_step/lbm_collision_dg3d.jl
@@ -3,21 +3,18 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function apply_collision!(u, dt, collision_op,
                           mesh::AbstractMesh{3}, equations, dg::DG, cache)
-
-  @threaded for element in eachelement(dg, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, k, element)
-      update = collision_op(u_node, dt, equations)
-      add_to_node_vars!(u, update, equations, dg, i, j, k, element)
+    @threaded for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, k, element)
+            update = collision_op(u_node, dt, equations)
+            add_to_node_vars!(u, update, equations, dg, i, j, k, element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/save_restart.jl b/src/callbacks_step/save_restart.jl
index 4597c3ce920..e23f58f26ea 100644
--- a/src/callbacks_step/save_restart.jl
+++ b/src/callbacks_step/save_restart.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     SaveRestartCallback(; interval=0,
@@ -13,132 +13,126 @@
 Save the current numerical solution in a restart file every `interval` time steps.
 """
 mutable struct SaveRestartCallback
-  interval::Int
-  save_final_restart::Bool
-  output_directory::String
+    interval::Int
+    save_final_restart::Bool
+    output_directory::String
 end
 
-
 function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:SaveRestartCallback})
-  @nospecialize cb # reduce precompilation time
+    @nospecialize cb # reduce precompilation time
 
-  restart_callback = cb.affect!
-  print(io, "SaveRestartCallback(interval=", restart_callback.interval, ")")
+    restart_callback = cb.affect!
+    print(io, "SaveRestartCallback(interval=", restart_callback.interval, ")")
 end
 
-function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:SaveRestartCallback})
-  @nospecialize cb # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, cb)
-  else
-    save_restart_callback = cb.affect!
-
-    setup = [
-             "interval" => save_restart_callback.interval,
-             "save final solution" => save_restart_callback.save_final_restart ? "yes" : "no",
-             "output directory" => abspath(normpath(save_restart_callback.output_directory)),
-            ]
-    summary_box(io, "SaveRestartCallback", setup)
-  end
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{<:Any, <:SaveRestartCallback})
+    @nospecialize cb # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        save_restart_callback = cb.affect!
+
+        setup = [
+            "interval" => save_restart_callback.interval,
+            "save final solution" => save_restart_callback.save_final_restart ? "yes" :
+                                     "no",
+            "output directory" => abspath(normpath(save_restart_callback.output_directory)),
+        ]
+        summary_box(io, "SaveRestartCallback", setup)
+    end
 end
 
+function SaveRestartCallback(; interval = 0,
+                             save_final_restart = true,
+                             output_directory = "out")
+    restart_callback = SaveRestartCallback(interval, save_final_restart,
+                                           output_directory)
 
-function SaveRestartCallback(; interval=0,
-                               save_final_restart=true,
-                               output_directory="out")
-
-  restart_callback = SaveRestartCallback(interval, save_final_restart,
-                                         output_directory)
-
-  DiscreteCallback(restart_callback, restart_callback, # the first one is the condition, the second the affect!
-                   save_positions=(false,false),
-                   initialize=initialize!)
+    DiscreteCallback(restart_callback, restart_callback, # the first one is the condition, the second the affect!
+                     save_positions = (false, false),
+                     initialize = initialize!)
 end
 
+function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t,
+                     integrator) where {Condition, Affect! <: SaveRestartCallback}
+    restart_callback = cb.affect!
 
-function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:SaveRestartCallback}
-  restart_callback = cb.affect!
-
-  mpi_isroot() && mkpath(restart_callback.output_directory)
+    mpi_isroot() && mkpath(restart_callback.output_directory)
 
-  semi = integrator.p
-  mesh, _, _, _ = mesh_equations_solver_cache(semi)
-  @trixi_timeit timer() "I/O" begin
-    if mesh.unsaved_changes
-      mesh.current_filename = save_mesh_file(mesh, restart_callback.output_directory)
-      mesh.unsaved_changes = false
+    semi = integrator.p
+    mesh, _, _, _ = mesh_equations_solver_cache(semi)
+    @trixi_timeit timer() "I/O" begin
+        if mesh.unsaved_changes
+            mesh.current_filename = save_mesh_file(mesh,
+                                                   restart_callback.output_directory)
+            mesh.unsaved_changes = false
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # this method is called to determine whether the callback should be activated
 function (restart_callback::SaveRestartCallback)(u, t, integrator)
-  @unpack interval, save_final_restart = restart_callback
-
-  # With error-based step size control, some steps can be rejected. Thus,
-  #   `integrator.iter >= integrator.stats.naccept`
-  #    (total #steps)       (#accepted steps)
-  # We need to check the number of accepted steps since callbacks are not
-  # activated after a rejected step.
-  return interval > 0 && (
-    ((integrator.stats.naccept % interval == 0) && !(integrator.stats.naccept == 0 && integrator.iter > 0)) ||
-    (save_final_restart && isfinished(integrator)))
+    @unpack interval, save_final_restart = restart_callback
+
+    # With error-based step size control, some steps can be rejected. Thus,
+    #   `integrator.iter >= integrator.stats.naccept`
+    #    (total #steps)       (#accepted steps)
+    # We need to check the number of accepted steps since callbacks are not
+    # activated after a rejected step.
+    return interval > 0 && (((integrator.stats.naccept % interval == 0) &&
+             !(integrator.stats.naccept == 0 && integrator.iter > 0)) ||
+            (save_final_restart && isfinished(integrator)))
 end
 
-
 # this method is called when the callback is activated
 function (restart_callback::SaveRestartCallback)(integrator)
-  u_ode = integrator.u
-  @unpack t, dt = integrator
-  iter = integrator.stats.naccept
-  semi = integrator.p
-  mesh, _, _, _ = mesh_equations_solver_cache(semi)
-
-  @trixi_timeit timer() "I/O" begin
-    if mesh.unsaved_changes
-      mesh.current_filename = save_mesh_file(mesh, restart_callback.output_directory, iter)
-      mesh.unsaved_changes = false
+    u_ode = integrator.u
+    @unpack t, dt = integrator
+    iter = integrator.stats.naccept
+    semi = integrator.p
+    mesh, _, _, _ = mesh_equations_solver_cache(semi)
+
+    @trixi_timeit timer() "I/O" begin
+        if mesh.unsaved_changes
+            mesh.current_filename = save_mesh_file(mesh,
+                                                   restart_callback.output_directory,
+                                                   iter)
+            mesh.unsaved_changes = false
+        end
+
+        save_restart_file(u_ode, t, dt, iter, semi, restart_callback)
     end
 
-    save_restart_file(u_ode, t, dt, iter, semi, restart_callback)
-  end
-
-  # avoid re-evaluating possible FSAL stages
-  u_modified!(integrator, false)
-  return nothing
+    # avoid re-evaluating possible FSAL stages
+    u_modified!(integrator, false)
+    return nothing
 end
 
-
 @inline function save_restart_file(u_ode, t, dt, iter,
                                    semi::AbstractSemidiscretization, restart_callback)
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  u = wrap_array_native(u_ode, mesh, equations, solver, cache)
-  save_restart_file(u, t, dt, iter, mesh, equations, solver, cache, restart_callback)
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    u = wrap_array_native(u_ode, mesh, equations, solver, cache)
+    save_restart_file(u, t, dt, iter, mesh, equations, solver, cache, restart_callback)
 end
 
-
 """
     load_time(restart_file::AbstractString)
 
 Load the time saved in a `restart_file`.
 """
 function load_time(restart_file::AbstractString)
-  h5open(restart_file, "r") do file
-    read(attributes(file)["time"])
-  end
+    h5open(restart_file, "r") do file
+        read(attributes(file)["time"])
+    end
 end
 
-
 function load_restart_file(semi::AbstractSemidiscretization, restart_file)
-  load_restart_file(mesh_equations_solver_cache(semi)..., restart_file)
+    load_restart_file(mesh_equations_solver_cache(semi)..., restart_file)
 end
 
-
 include("save_restart_dg.jl")
-
-
 end # @muladd
diff --git a/src/callbacks_step/save_restart_dg.jl b/src/callbacks_step/save_restart_dg.jl
index a46a8bc856b..5695eb8bede 100644
--- a/src/callbacks_step/save_restart_dg.jl
+++ b/src/callbacks_step/save_restart_dg.jl
@@ -3,324 +3,327 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function save_restart_file(u, time, dt, timestep,
-                           mesh::Union{SerialTreeMesh, StructuredMesh, UnstructuredMesh2D, SerialP4estMesh},
+                           mesh::Union{SerialTreeMesh, StructuredMesh,
+                                       UnstructuredMesh2D, SerialP4estMesh},
                            equations, dg::DG, cache,
                            restart_callback)
-
-  @unpack output_directory = restart_callback
-
-  # Filename based on current time step
-  filename = joinpath(output_directory, @sprintf("restart_%06d.h5", timestep))
-
-  # Restart files always store conservative variables
-  data = u
-
-  # Open file (clobber existing content)
-  h5open(filename, "w") do file
-    # Add context information as attributes
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["equations"] = get_name(equations)
-    attributes(file)["polydeg"] = polydeg(dg)
-    attributes(file)["n_vars"] = nvariables(equations)
-    attributes(file)["n_elements"] = nelements(dg, cache)
-    attributes(file)["mesh_type"] = get_name(mesh)
-    attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
-    attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
-    attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
-    attributes(file)["timestep"] = timestep
-
-    # Store each variable of the solution
-    for v in eachvariable(equations)
-      # Convert to 1D array
-      file["variables_$v"] = vec(data[v, .., :])
-
-      # Add variable name as attribute
-      var = file["variables_$v"]
-      attributes(var)["name"] = varnames(cons2cons, equations)[v]
+    @unpack output_directory = restart_callback
+
+    # Filename based on current time step
+    filename = joinpath(output_directory, @sprintf("restart_%06d.h5", timestep))
+
+    # Restart files always store conservative variables
+    data = u
+
+    # Open file (clobber existing content)
+    h5open(filename, "w") do file
+        # Add context information as attributes
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["equations"] = get_name(equations)
+        attributes(file)["polydeg"] = polydeg(dg)
+        attributes(file)["n_vars"] = nvariables(equations)
+        attributes(file)["n_elements"] = nelements(dg, cache)
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
+        attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
+        attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
+        attributes(file)["timestep"] = timestep
+
+        # Store each variable of the solution
+        for v in eachvariable(equations)
+            # Convert to 1D array
+            file["variables_$v"] = vec(data[v, .., :])
+
+            # Add variable name as attribute
+            var = file["variables_$v"]
+            attributes(var)["name"] = varnames(cons2cons, equations)[v]
+        end
     end
-  end
 
-  return filename
+    return filename
 end
 
-
-function load_restart_file(mesh::Union{SerialTreeMesh, StructuredMesh, UnstructuredMesh2D, SerialP4estMesh},
+function load_restart_file(mesh::Union{SerialTreeMesh, StructuredMesh,
+                                       UnstructuredMesh2D, SerialP4estMesh},
                            equations, dg::DG, cache, restart_file)
 
-  # allocate memory
-  u_ode = allocate_coefficients(mesh, equations, dg, cache)
-  u = wrap_array_native(u_ode, mesh, equations, dg, cache)
-
-  h5open(restart_file, "r") do file
-    # Read attributes to perform some sanity checks
-    if read(attributes(file)["ndims"]) != ndims(mesh)
-      error("restart mismatch: ndims differs from value in restart file")
-    end
-    if read(attributes(file)["equations"]) != get_name(equations)
-      error("restart mismatch: equations differ from value in restart file")
-    end
-    if read(attributes(file)["polydeg"]) != polydeg(dg)
-      error("restart mismatch: polynomial degree in solver differs from value in restart file")
-    end
-    if read(attributes(file)["n_elements"]) != nelements(dg, cache)
-      error("restart mismatch: number of elements in solver differs from value in restart file")
-    end
-
-    # Read data
-    for v in eachvariable(equations)
-      # Check if variable name matches
-      var = file["variables_$v"]
-      if (name = read(attributes(var)["name"])) != varnames(cons2cons, equations)[v]
-        error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'")
-      end
-
-      # Read variable
-      u[v, .., :] = read(file["variables_$v"])
+    # allocate memory
+    u_ode = allocate_coefficients(mesh, equations, dg, cache)
+    u = wrap_array_native(u_ode, mesh, equations, dg, cache)
+
+    h5open(restart_file, "r") do file
+        # Read attributes to perform some sanity checks
+        if read(attributes(file)["ndims"]) != ndims(mesh)
+            error("restart mismatch: ndims differs from value in restart file")
+        end
+        if read(attributes(file)["equations"]) != get_name(equations)
+            error("restart mismatch: equations differ from value in restart file")
+        end
+        if read(attributes(file)["polydeg"]) != polydeg(dg)
+            error("restart mismatch: polynomial degree in solver differs from value in restart file")
+        end
+        if read(attributes(file)["n_elements"]) != nelements(dg, cache)
+            error("restart mismatch: number of elements in solver differs from value in restart file")
+        end
+
+        # Read data
+        for v in eachvariable(equations)
+            # Check if variable name matches
+            var = file["variables_$v"]
+            if (name = read(attributes(var)["name"])) !=
+               varnames(cons2cons, equations)[v]
+                error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'")
+            end
+
+            # Read variable
+            u[v, .., :] = read(file["variables_$v"])
+        end
     end
-  end
 
-  return u_ode
+    return u_ode
 end
 
-
 function save_restart_file(u, time, dt, timestep,
-                           mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache,
+                           mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations,
+                           dg::DG, cache,
                            restart_callback)
+    @unpack output_directory = restart_callback
+    # Filename based on current time step
+    filename = joinpath(output_directory, @sprintf("restart_%06d.h5", timestep))
 
-  @unpack output_directory = restart_callback
-  # Filename based on current time step
-  filename = joinpath(output_directory, @sprintf("restart_%06d.h5", timestep))
-
-  if HDF5.has_parallel()
-    save_restart_file_parallel(u, time, dt, timestep, mesh, equations, dg, cache, filename)
-  else
-    save_restart_file_on_root(u, time, dt, timestep, mesh, equations, dg, cache, filename)
-  end
+    if HDF5.has_parallel()
+        save_restart_file_parallel(u, time, dt, timestep, mesh, equations, dg, cache,
+                                   filename)
+    else
+        save_restart_file_on_root(u, time, dt, timestep, mesh, equations, dg, cache,
+                                  filename)
+    end
 end
 
-
 function save_restart_file_parallel(u, time, dt, timestep,
-                                    mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache,
+                                    mesh::Union{ParallelTreeMesh, ParallelP4estMesh},
+                                    equations, dg::DG, cache,
                                     filename)
 
-  # Restart files always store conservative variables
-  data = u
-
-  # Calculate element and node counts by MPI rank
-  element_size = nnodes(dg)^ndims(mesh)
-  element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
-  node_counts = element_counts * Cint(element_size)
-  # Cumulative sum of nodes per rank starting with an additional 0
-  cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts))
-
-  # Open file (clobber existing content)
-  h5open(filename, "w", mpi_comm()) do file
-    # Add context information as attributes
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["equations"] = get_name(equations)
-    attributes(file)["polydeg"] = polydeg(dg)
-    attributes(file)["n_vars"] = nvariables(equations)
-    attributes(file)["n_elements"] = nelementsglobal(dg, cache)
-    attributes(file)["mesh_type"] = get_name(mesh)
-    attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
-    attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
-    attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
-    attributes(file)["timestep"] = timestep
-
-    # Store each variable of the solution
-    for v in eachvariable(equations)
-      # Need to create dataset explicitly in parallel case
-      var = create_dataset(file, "/variables_$v", datatype(eltype(data)), dataspace((ndofsglobal(mesh, dg, cache),)))
-      # Write data of each process in slices (ranks start with 0)
-      slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2]
-      # Convert to 1D array
-      var[slice] = vec(data[v, .., :])
-      # Add variable name as attribute
-      attributes(var)["name"] = varnames(cons2cons, equations)[v]
+    # Restart files always store conservative variables
+    data = u
+
+    # Calculate element and node counts by MPI rank
+    element_size = nnodes(dg)^ndims(mesh)
+    element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
+    node_counts = element_counts * Cint(element_size)
+    # Cumulative sum of nodes per rank starting with an additional 0
+    cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts))
+
+    # Open file (clobber existing content)
+    h5open(filename, "w", mpi_comm()) do file
+        # Add context information as attributes
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["equations"] = get_name(equations)
+        attributes(file)["polydeg"] = polydeg(dg)
+        attributes(file)["n_vars"] = nvariables(equations)
+        attributes(file)["n_elements"] = nelementsglobal(dg, cache)
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
+        attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
+        attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
+        attributes(file)["timestep"] = timestep
+
+        # Store each variable of the solution
+        for v in eachvariable(equations)
+            # Need to create dataset explicitly in parallel case
+            var = create_dataset(file, "/variables_$v", datatype(eltype(data)),
+                                 dataspace((ndofsglobal(mesh, dg, cache),)))
+            # Write data of each process in slices (ranks start with 0)
+            slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2]
+            # Convert to 1D array
+            var[slice] = vec(data[v, .., :])
+            # Add variable name as attribute
+            attributes(var)["name"] = varnames(cons2cons, equations)[v]
+        end
     end
-  end
 
-  return filename
+    return filename
 end
 
-
 function save_restart_file_on_root(u, time, dt, timestep,
-                                   mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache,
+                                   mesh::Union{ParallelTreeMesh, ParallelP4estMesh},
+                                   equations, dg::DG, cache,
                                    filename)
 
-  # Restart files always store conservative variables
-  data = u
+    # Restart files always store conservative variables
+    data = u
 
-  # Calculate element and node counts by MPI rank
-  element_size = nnodes(dg)^ndims(mesh)
-  element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
-  node_counts = element_counts * Cint(element_size)
+    # Calculate element and node counts by MPI rank
+    element_size = nnodes(dg)^ndims(mesh)
+    element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
+    node_counts = element_counts * Cint(element_size)
 
-  # non-root ranks only send data
-  if !mpi_isroot()
-    # Send nodal data to root
-    for v in eachvariable(equations)
-      MPI.Gatherv!(vec(data[v, .., :]), nothing, mpi_root(), mpi_comm())
-    end
+    # non-root ranks only send data
+    if !mpi_isroot()
+        # Send nodal data to root
+        for v in eachvariable(equations)
+            MPI.Gatherv!(vec(data[v, .., :]), nothing, mpi_root(), mpi_comm())
+        end
 
-    return filename
-  end
-
-  # Open file (clobber existing content)
-  h5open(filename, "w") do file
-    # Add context information as attributes
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["equations"] = get_name(equations)
-    attributes(file)["polydeg"] = polydeg(dg)
-    attributes(file)["n_vars"] = nvariables(equations)
-    attributes(file)["n_elements"] = nelements(dg, cache)
-    attributes(file)["mesh_type"] = get_name(mesh)
-    attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
-    attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
-    attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
-    attributes(file)["timestep"] = timestep
-
-    # Store each variable of the solution
-    for v in eachvariable(equations)
-      # Convert to 1D array
-      recv = Vector{eltype(data)}(undef, sum(node_counts))
-      MPI.Gatherv!(vec(data[v, .., :]), MPI.VBuffer(recv, node_counts), mpi_root(), mpi_comm())
-      file["variables_$v"] = recv
-
-      # Add variable name as attribute
-      var = file["variables_$v"]
-      attributes(var)["name"] = varnames(cons2cons, equations)[v]
+        return filename
     end
-  end
-
-  return filename
-end
-
 
-function load_restart_file(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, restart_file)
+    # Open file (clobber existing content)
+    h5open(filename, "w") do file
+        # Add context information as attributes
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["equations"] = get_name(equations)
+        attributes(file)["polydeg"] = polydeg(dg)
+        attributes(file)["n_vars"] = nvariables(equations)
+        attributes(file)["n_elements"] = nelements(dg, cache)
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
+        attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
+        attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
+        attributes(file)["timestep"] = timestep
+
+        # Store each variable of the solution
+        for v in eachvariable(equations)
+            # Convert to 1D array
+            recv = Vector{eltype(data)}(undef, sum(node_counts))
+            MPI.Gatherv!(vec(data[v, .., :]), MPI.VBuffer(recv, node_counts),
+                         mpi_root(), mpi_comm())
+            file["variables_$v"] = recv
+
+            # Add variable name as attribute
+            var = file["variables_$v"]
+            attributes(var)["name"] = varnames(cons2cons, equations)[v]
+        end
+    end
 
-  if HDF5.has_parallel()
-    load_restart_file_parallel(mesh, equations, dg, cache, restart_file)
-  else
-    load_restart_file_on_root(mesh, equations, dg, cache, restart_file)
-  end
+    return filename
 end
 
-
-function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, restart_file)
-
-  # Calculate element and node counts by MPI rank
-  element_size = nnodes(dg)^ndims(mesh)
-  element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
-  node_counts = element_counts * Cint(element_size)
-  # Cumulative sum of nodes per rank starting with an additional 0
-  cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts))
-
-  # allocate memory
-  u_ode = allocate_coefficients(mesh, equations, dg, cache)
-  u = wrap_array_native(u_ode, mesh, equations, dg, cache)
-
-  # read in parallel
-  h5open(restart_file, "r", mpi_comm()) do file
-    # Read attributes to perform some sanity checks
-    if read(attributes(file)["ndims"]) != ndims(mesh)
-      error("restart mismatch: ndims differs from value in restart file")
-    end
-    if read(attributes(file)["equations"]) != get_name(equations)
-      error("restart mismatch: equations differ from value in restart file")
-    end
-    if read(attributes(file)["polydeg"]) != polydeg(dg)
-      error("restart mismatch: polynomial degree in solver differs from value in restart file")
-    end
-    if read(attributes(file)["n_elements"]) != nelementsglobal(dg, cache)
-      error("restart mismatch: number of elements in solver differs from value in restart file")
-    end
-
-    # Read data
-    for v in eachvariable(equations)
-      # Check if variable name matches
-      var = file["variables_$v"]
-      if (name = read(attributes(var)["name"])) != varnames(cons2cons, equations)[v]
-        error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'")
-      end
-
-      # Read variable
-      mpi_println("Reading variables_$v ($name)...")
-      # Read data of each process in slices (ranks start with 0)
-      slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2]
-      # Convert 1D array back to actual size of `u`
-      u[v, .., :] = reshape(read(var)[slice], size(@view u[v, .., :]))
+function load_restart_file(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations,
+                           dg::DG, cache, restart_file)
+    if HDF5.has_parallel()
+        load_restart_file_parallel(mesh, equations, dg, cache, restart_file)
+    else
+        load_restart_file_on_root(mesh, equations, dg, cache, restart_file)
     end
-  end
-
-  return u_ode
 end
 
-
-function load_restart_file_on_root(mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache, restart_file)
-
-  # Calculate element and node counts by MPI rank
-  element_size = nnodes(dg)^ndims(mesh)
-  element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
-  node_counts = element_counts * Cint(element_size)
-
-  # allocate memory
-  u_ode = allocate_coefficients(mesh, equations, dg, cache)
-  u = wrap_array_native(u_ode, mesh, equations, dg, cache)
-
-  # non-root ranks only receive data
-  if !mpi_isroot()
-    # Receive nodal data from root
-    for v in eachvariable(equations)
-      # put Scatterv in both blocks of the if condition to avoid type instability
-      if isempty(u)
-        data = eltype(u)[]
-        MPI.Scatterv!(nothing, data, mpi_root(), mpi_comm())
-      else
-        data = @view u[v, .., :]
-        MPI.Scatterv!(nothing, data, mpi_root(), mpi_comm())
-      end
+function load_restart_file_parallel(mesh::Union{ParallelTreeMesh, ParallelP4estMesh},
+                                    equations, dg::DG, cache, restart_file)
+
+    # Calculate element and node counts by MPI rank
+    element_size = nnodes(dg)^ndims(mesh)
+    element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
+    node_counts = element_counts * Cint(element_size)
+    # Cumulative sum of nodes per rank starting with an additional 0
+    cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts))
+
+    # allocate memory
+    u_ode = allocate_coefficients(mesh, equations, dg, cache)
+    u = wrap_array_native(u_ode, mesh, equations, dg, cache)
+
+    # read in parallel
+    h5open(restart_file, "r", mpi_comm()) do file
+        # Read attributes to perform some sanity checks
+        if read(attributes(file)["ndims"]) != ndims(mesh)
+            error("restart mismatch: ndims differs from value in restart file")
+        end
+        if read(attributes(file)["equations"]) != get_name(equations)
+            error("restart mismatch: equations differ from value in restart file")
+        end
+        if read(attributes(file)["polydeg"]) != polydeg(dg)
+            error("restart mismatch: polynomial degree in solver differs from value in restart file")
+        end
+        if read(attributes(file)["n_elements"]) != nelementsglobal(dg, cache)
+            error("restart mismatch: number of elements in solver differs from value in restart file")
+        end
+
+        # Read data
+        for v in eachvariable(equations)
+            # Check if variable name matches
+            var = file["variables_$v"]
+            if (name = read(attributes(var)["name"])) !=
+               varnames(cons2cons, equations)[v]
+                error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'")
+            end
+
+            # Read variable
+            mpi_println("Reading variables_$v ($name)...")
+            # Read data of each process in slices (ranks start with 0)
+            slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2]
+            # Convert 1D array back to actual size of `u`
+            u[v, .., :] = reshape(read(var)[slice], size(@view u[v, .., :]))
+        end
     end
 
     return u_ode
-  end
+end
 
-  # read only on MPI root
-  h5open(restart_file, "r") do file
-    # Read attributes to perform some sanity checks
-    if read(attributes(file)["ndims"]) != ndims(mesh)
-      error("restart mismatch: ndims differs from value in restart file")
-    end
-    if read(attributes(file)["equations"]) != get_name(equations)
-      error("restart mismatch: equations differ from value in restart file")
-    end
-    if read(attributes(file)["polydeg"]) != polydeg(dg)
-      error("restart mismatch: polynomial degree in solver differs from value in restart file")
-    end
-    if read(attributes(file)["n_elements"]) != nelements(dg, cache)
-      error("restart mismatch: number of elements in solver differs from value in restart file")
+function load_restart_file_on_root(mesh::Union{ParallelTreeMesh, ParallelP4estMesh},
+                                   equations, dg::DG, cache, restart_file)
+
+    # Calculate element and node counts by MPI rank
+    element_size = nnodes(dg)^ndims(mesh)
+    element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
+    node_counts = element_counts * Cint(element_size)
+
+    # allocate memory
+    u_ode = allocate_coefficients(mesh, equations, dg, cache)
+    u = wrap_array_native(u_ode, mesh, equations, dg, cache)
+
+    # non-root ranks only receive data
+    if !mpi_isroot()
+        # Receive nodal data from root
+        for v in eachvariable(equations)
+            # put Scatterv in both blocks of the if condition to avoid type instability
+            if isempty(u)
+                data = eltype(u)[]
+                MPI.Scatterv!(nothing, data, mpi_root(), mpi_comm())
+            else
+                data = @view u[v, .., :]
+                MPI.Scatterv!(nothing, data, mpi_root(), mpi_comm())
+            end
+        end
+
+        return u_ode
     end
 
-    # Read data
-    for v in eachvariable(equations)
-      # Check if variable name matches
-      var = file["variables_$v"]
-      if (name = read(attributes(var)["name"])) != varnames(cons2cons, equations)[v]
-        error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'")
-      end
-
-      # Read variable
-      println("Reading variables_$v ($name)...")
-      sendbuf = MPI.VBuffer(read(file["variables_$v"]), node_counts)
-      MPI.Scatterv!(sendbuf, @view(u[v, .., :]), mpi_root(), mpi_comm())
+    # read only on MPI root
+    h5open(restart_file, "r") do file
+        # Read attributes to perform some sanity checks
+        if read(attributes(file)["ndims"]) != ndims(mesh)
+            error("restart mismatch: ndims differs from value in restart file")
+        end
+        if read(attributes(file)["equations"]) != get_name(equations)
+            error("restart mismatch: equations differ from value in restart file")
+        end
+        if read(attributes(file)["polydeg"]) != polydeg(dg)
+            error("restart mismatch: polynomial degree in solver differs from value in restart file")
+        end
+        if read(attributes(file)["n_elements"]) != nelements(dg, cache)
+            error("restart mismatch: number of elements in solver differs from value in restart file")
+        end
+
+        # Read data
+        for v in eachvariable(equations)
+            # Check if variable name matches
+            var = file["variables_$v"]
+            if (name = read(attributes(var)["name"])) !=
+               varnames(cons2cons, equations)[v]
+                error("mismatch: variables_$v should be '$(varnames(cons2cons, equations)[v])', but found '$name'")
+            end
+
+            # Read variable
+            println("Reading variables_$v ($name)...")
+            sendbuf = MPI.VBuffer(read(file["variables_$v"]), node_counts)
+            MPI.Scatterv!(sendbuf, @view(u[v, .., :]), mpi_root(), mpi_comm())
+        end
     end
-  end
 
-  return u_ode
+    return u_ode
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/save_solution.jl b/src/callbacks_step/save_solution.jl
index 6cccbc9d3f9..55f17bbc1c7 100644
--- a/src/callbacks_step/save_solution.jl
+++ b/src/callbacks_step/save_solution.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     SaveSolutionCallback(; interval::Integer=0,
@@ -22,200 +22,211 @@ to `solution_variables` will be the set of conservative variables
 and the second parameter is the equation struct.
 """
 mutable struct SaveSolutionCallback{IntervalType, SolutionVariablesType}
-  interval_or_dt::IntervalType
-  save_initial_solution::Bool
-  save_final_solution::Bool
-  output_directory::String
-  solution_variables::SolutionVariablesType
+    interval_or_dt::IntervalType
+    save_initial_solution::Bool
+    save_final_solution::Bool
+    output_directory::String
+    solution_variables::SolutionVariablesType
 end
 
-
 function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:SaveSolutionCallback})
-  @nospecialize cb # reduce precompilation time
+    @nospecialize cb # reduce precompilation time
 
-  save_solution_callback = cb.affect!
-  print(io, "SaveSolutionCallback(interval=", save_solution_callback.interval_or_dt, ")")
+    save_solution_callback = cb.affect!
+    print(io, "SaveSolutionCallback(interval=", save_solution_callback.interval_or_dt,
+          ")")
 end
 
 function Base.show(io::IO,
-                   cb::DiscreteCallback{<:Any, <:PeriodicCallbackAffect{<:SaveSolutionCallback}})
-  @nospecialize cb # reduce precompilation time
+                   cb::DiscreteCallback{<:Any,
+                                        <:PeriodicCallbackAffect{<:SaveSolutionCallback
+                                                                 }})
+    @nospecialize cb # reduce precompilation time
 
-  save_solution_callback = cb.affect!.affect!
-  print(io, "SaveSolutionCallback(dt=", save_solution_callback.interval_or_dt, ")")
+    save_solution_callback = cb.affect!.affect!
+    print(io, "SaveSolutionCallback(dt=", save_solution_callback.interval_or_dt, ")")
 end
 
-function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:SaveSolutionCallback})
-  @nospecialize cb # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, cb)
-  else
-    save_solution_callback = cb.affect!
-
-    setup = [
-             "interval" => save_solution_callback.interval_or_dt,
-             "solution variables" => save_solution_callback.solution_variables,
-             "save initial solution" => save_solution_callback.save_initial_solution ? "yes" : "no",
-             "save final solution" => save_solution_callback.save_final_solution ? "yes" : "no",
-             "output directory" => abspath(normpath(save_solution_callback.output_directory)),
-            ]
-    summary_box(io, "SaveSolutionCallback", setup)
-  end
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{<:Any, <:SaveSolutionCallback})
+    @nospecialize cb # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        save_solution_callback = cb.affect!
+
+        setup = [
+            "interval" => save_solution_callback.interval_or_dt,
+            "solution variables" => save_solution_callback.solution_variables,
+            "save initial solution" => save_solution_callback.save_initial_solution ?
+                                       "yes" : "no",
+            "save final solution" => save_solution_callback.save_final_solution ?
+                                     "yes" : "no",
+            "output directory" => abspath(normpath(save_solution_callback.output_directory)),
+        ]
+        summary_box(io, "SaveSolutionCallback", setup)
+    end
 end
 
 function Base.show(io::IO, ::MIME"text/plain",
-                   cb::DiscreteCallback{<:Any, <:PeriodicCallbackAffect{<:SaveSolutionCallback}})
-  @nospecialize cb # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, cb)
-  else
-    save_solution_callback = cb.affect!.affect!
-
-    setup = [
-             "dt" => save_solution_callback.interval_or_dt,
-             "solution variables" => save_solution_callback.solution_variables,
-             "save initial solution" => save_solution_callback.save_initial_solution ? "yes" : "no",
-             "save final solution" => save_solution_callback.save_final_solution ? "yes" : "no",
-             "output directory" => abspath(normpath(save_solution_callback.output_directory)),
-            ]
-    summary_box(io, "SaveSolutionCallback", setup)
-  end
+                   cb::DiscreteCallback{<:Any,
+                                        <:PeriodicCallbackAffect{<:SaveSolutionCallback
+                                                                 }})
+    @nospecialize cb # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        save_solution_callback = cb.affect!.affect!
+
+        setup = [
+            "dt" => save_solution_callback.interval_or_dt,
+            "solution variables" => save_solution_callback.solution_variables,
+            "save initial solution" => save_solution_callback.save_initial_solution ?
+                                       "yes" : "no",
+            "save final solution" => save_solution_callback.save_final_solution ?
+                                     "yes" : "no",
+            "output directory" => abspath(normpath(save_solution_callback.output_directory)),
+        ]
+        summary_box(io, "SaveSolutionCallback", setup)
+    end
 end
 
+function SaveSolutionCallback(; interval::Integer = 0,
+                              dt = nothing,
+                              save_initial_solution = true,
+                              save_final_solution = true,
+                              output_directory = "out",
+                              solution_variables = cons2prim)
+    if !isnothing(dt) && interval > 0
+        throw(ArgumentError("You can either set the number of steps between output (using `interval`) or the time between outputs (using `dt`) but not both simultaneously"))
+    end
 
-function SaveSolutionCallback(; interval::Integer=0,
-                                dt=nothing,
-                                save_initial_solution=true,
-                                save_final_solution=true,
-                                output_directory="out",
-                                solution_variables=cons2prim)
-
-  if !isnothing(dt) && interval > 0
-    throw(ArgumentError("You can either set the number of steps between output (using `interval`) or the time between outputs (using `dt`) but not both simultaneously"))
-  end
-
-  # Expected most frequent behavior comes first
-  if isnothing(dt)
-    interval_or_dt = interval
-  else # !isnothing(dt)
-    interval_or_dt = dt
-  end
-
-  solution_callback = SaveSolutionCallback(interval_or_dt,
-                                           save_initial_solution, save_final_solution,
-                                           output_directory, solution_variables)
-
-  # Expected most frequent behavior comes first
-  if isnothing(dt)
-    # Save every `interval` (accepted) time steps
-    # The first one is the condition, the second the affect!
-    return DiscreteCallback(solution_callback, solution_callback,
-                            save_positions=(false,false),
-                            initialize=initialize_save_cb!)
-  else
-    # Add a `tstop` every `dt`, and save the final solution.
-    return PeriodicCallback(solution_callback, dt,
-                            save_positions=(false, false),
-                            initialize=initialize_save_cb!,
-                            final_affect=save_final_solution)
-  end
-end
+    # Expected most frequent behavior comes first
+    if isnothing(dt)
+        interval_or_dt = interval
+    else # !isnothing(dt)
+        interval_or_dt = dt
+    end
 
+    solution_callback = SaveSolutionCallback(interval_or_dt,
+                                             save_initial_solution, save_final_solution,
+                                             output_directory, solution_variables)
+
+    # Expected most frequent behavior comes first
+    if isnothing(dt)
+        # Save every `interval` (accepted) time steps
+        # The first one is the condition, the second the affect!
+        return DiscreteCallback(solution_callback, solution_callback,
+                                save_positions = (false, false),
+                                initialize = initialize_save_cb!)
+    else
+        # Add a `tstop` every `dt`, and save the final solution.
+        return PeriodicCallback(solution_callback, dt,
+                                save_positions = (false, false),
+                                initialize = initialize_save_cb!,
+                                final_affect = save_final_solution)
+    end
+end
 
 function initialize_save_cb!(cb, u, t, integrator)
-  # The SaveSolutionCallback is either cb.affect! (with DiscreteCallback)
-  # or cb.affect!.affect! (with PeriodicCallback).
-  # Let recursive dispatch handle this.
-  initialize_save_cb!(cb.affect!, u, t, integrator)
+    # The SaveSolutionCallback is either cb.affect! (with DiscreteCallback)
+    # or cb.affect!.affect! (with PeriodicCallback).
+    # Let recursive dispatch handle this.
+    initialize_save_cb!(cb.affect!, u, t, integrator)
 end
 
 function initialize_save_cb!(solution_callback::SaveSolutionCallback, u, t, integrator)
-  mpi_isroot() && mkpath(solution_callback.output_directory)
-
-  semi = integrator.p
-  mesh, _, _, _ = mesh_equations_solver_cache(semi)
-  @trixi_timeit timer() "I/O" begin
-    if mesh.unsaved_changes
-      mesh.current_filename = save_mesh_file(mesh, solution_callback.output_directory)
-      mesh.unsaved_changes = false
+    mpi_isroot() && mkpath(solution_callback.output_directory)
+
+    semi = integrator.p
+    mesh, _, _, _ = mesh_equations_solver_cache(semi)
+    @trixi_timeit timer() "I/O" begin
+        if mesh.unsaved_changes
+            mesh.current_filename = save_mesh_file(mesh,
+                                                   solution_callback.output_directory)
+            mesh.unsaved_changes = false
+        end
     end
-  end
 
-  if solution_callback.save_initial_solution
-    solution_callback(integrator)
-  end
+    if solution_callback.save_initial_solution
+        solution_callback(integrator)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 # this method is called to determine whether the callback should be activated
 function (solution_callback::SaveSolutionCallback)(u, t, integrator)
-  @unpack interval_or_dt, save_final_solution = solution_callback
-
-  # With error-based step size control, some steps can be rejected. Thus,
-  #   `integrator.iter >= integrator.stats.naccept`
-  #    (total #steps)       (#accepted steps)
-  # We need to check the number of accepted steps since callbacks are not
-  # activated after a rejected step.
-  return interval_or_dt > 0 && (
-    ((integrator.stats.naccept % interval_or_dt == 0) && !(integrator.stats.naccept == 0 && integrator.iter > 0)) ||
-    (save_final_solution && isfinished(integrator)))
+    @unpack interval_or_dt, save_final_solution = solution_callback
+
+    # With error-based step size control, some steps can be rejected. Thus,
+    #   `integrator.iter >= integrator.stats.naccept`
+    #    (total #steps)       (#accepted steps)
+    # We need to check the number of accepted steps since callbacks are not
+    # activated after a rejected step.
+    return interval_or_dt > 0 && (((integrator.stats.naccept % interval_or_dt == 0) &&
+             !(integrator.stats.naccept == 0 && integrator.iter > 0)) ||
+            (save_final_solution && isfinished(integrator)))
 end
 
-
 # this method is called when the callback is activated
 function (solution_callback::SaveSolutionCallback)(integrator)
-  u_ode = integrator.u
-  @unpack t, dt = integrator
-  iter = integrator.stats.naccept
-  semi = integrator.p
-  mesh, _, _, _ = mesh_equations_solver_cache(semi)
-
-  @trixi_timeit timer() "I/O" begin
-    @trixi_timeit timer() "save mesh" if mesh.unsaved_changes
-      mesh.current_filename = save_mesh_file(mesh, solution_callback.output_directory, iter)
-      mesh.unsaved_changes = false
-    end
-
-    element_variables = Dict{Symbol, Any}()
-    @trixi_timeit timer() "get element variables" begin
-      get_element_variables!(element_variables, u_ode, semi)
-      callbacks = integrator.opts.callback
-      if callbacks isa CallbackSet
-        for cb in callbacks.continuous_callbacks
-          get_element_variables!(element_variables, u_ode, semi, cb; t=integrator.t, iter=integrator.stats.naccept)
+    u_ode = integrator.u
+    @unpack t, dt = integrator
+    iter = integrator.stats.naccept
+    semi = integrator.p
+    mesh, _, _, _ = mesh_equations_solver_cache(semi)
+
+    @trixi_timeit timer() "I/O" begin
+        @trixi_timeit timer() "save mesh" if mesh.unsaved_changes
+            mesh.current_filename = save_mesh_file(mesh,
+                                                   solution_callback.output_directory,
+                                                   iter)
+            mesh.unsaved_changes = false
         end
-        for cb in callbacks.discrete_callbacks
-          get_element_variables!(element_variables, u_ode, semi, cb; t=integrator.t, iter=integrator.stats.naccept)
+
+        element_variables = Dict{Symbol, Any}()
+        @trixi_timeit timer() "get element variables" begin
+            get_element_variables!(element_variables, u_ode, semi)
+            callbacks = integrator.opts.callback
+            if callbacks isa CallbackSet
+                for cb in callbacks.continuous_callbacks
+                    get_element_variables!(element_variables, u_ode, semi, cb;
+                                           t = integrator.t,
+                                           iter = integrator.stats.naccept)
+                end
+                for cb in callbacks.discrete_callbacks
+                    get_element_variables!(element_variables, u_ode, semi, cb;
+                                           t = integrator.t,
+                                           iter = integrator.stats.naccept)
+                end
+            end
         end
-      end
-    end
 
-    @trixi_timeit timer() "save solution" save_solution_file(u_ode, t, dt, iter, semi, solution_callback, element_variables)
-  end
+        @trixi_timeit timer() "save solution" save_solution_file(u_ode, t, dt, iter,
+                                                                 semi,
+                                                                 solution_callback,
+                                                                 element_variables)
+    end
 
-  # avoid re-evaluating possible FSAL stages
-  u_modified!(integrator, false)
-  return nothing
+    # avoid re-evaluating possible FSAL stages
+    u_modified!(integrator, false)
+    return nothing
 end
 
-
 @inline function save_solution_file(u_ode, t, dt, iter,
                                     semi::AbstractSemidiscretization, solution_callback,
-                                    element_variables=Dict{Symbol,Any}())
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  u = wrap_array_native(u_ode, mesh, equations, solver, cache)
-  save_solution_file(u, t, dt, iter, mesh, equations, solver, cache, solution_callback, element_variables)
+                                    element_variables = Dict{Symbol, Any}())
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    u = wrap_array_native(u_ode, mesh, equations, solver, cache)
+    save_solution_file(u, t, dt, iter, mesh, equations, solver, cache,
+                       solution_callback, element_variables)
 end
 
-
 # TODO: Taal refactor, move save_mesh_file?
 # function save_mesh_file(mesh::TreeMesh, output_directory, timestep=-1) in io/io.jl
 
 include("save_solution_dg.jl")
-
-
 end # @muladd
diff --git a/src/callbacks_step/save_solution_dg.jl b/src/callbacks_step/save_solution_dg.jl
index 6d1cdf0151b..6cd4a0ec9c1 100644
--- a/src/callbacks_step/save_solution_dg.jl
+++ b/src/callbacks_step/save_solution_dg.jl
@@ -3,238 +3,253 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function save_solution_file(u, time, dt, timestep,
-                            mesh::Union{SerialTreeMesh, StructuredMesh, UnstructuredMesh2D, SerialP4estMesh},
+                            mesh::Union{SerialTreeMesh, StructuredMesh,
+                                        UnstructuredMesh2D, SerialP4estMesh},
                             equations, dg::DG, cache,
-                            solution_callback, element_variables=Dict{Symbol,Any}();
-                            system="")
-
-  @unpack output_directory, solution_variables = solution_callback
-
-  # Filename based on current time step
-  if isempty(system)
-    filename = joinpath(output_directory, @sprintf("solution_%06d.h5", timestep))
-  else
-    filename = joinpath(output_directory, @sprintf("solution_%s_%06d.h5", system, timestep))
-  end
-
-  # Convert to different set of variables if requested
-  if solution_variables === cons2cons
-    data = u
-    n_vars = nvariables(equations)
-  else
-    # Reinterpret the solution array as an array of conservative variables,
-    # compute the solution variables via broadcasting, and reinterpret the
-    # result as a plain array of floating point numbers
-    data = Array(reinterpret(eltype(u),
-           solution_variables.(reinterpret(SVector{nvariables(equations),eltype(u)}, u),
-                      Ref(equations))))
-
-    # Find out variable count by looking at output from `solution_variables` function
-    n_vars = size(data, 1)
-  end
-
-  # Open file (clobber existing content)
-  h5open(filename, "w") do file
-    # Add context information as attributes
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["equations"] = get_name(equations)
-    attributes(file)["polydeg"] = polydeg(dg)
-    attributes(file)["n_vars"] = n_vars
-    attributes(file)["n_elements"] = nelements(dg, cache)
-    attributes(file)["mesh_type"] = get_name(mesh)
-    attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
-    attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
-    attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
-    attributes(file)["timestep"] = timestep
-
-    # Store each variable of the solution data
-    for v in 1:n_vars
-      # Convert to 1D array
-      file["variables_$v"] = vec(data[v, .., :])
-
-      # Add variable name as attribute
-      var = file["variables_$v"]
-      attributes(var)["name"] = varnames(solution_variables, equations)[v]
+                            solution_callback, element_variables = Dict{Symbol, Any}();
+                            system = "")
+    @unpack output_directory, solution_variables = solution_callback
+
+    # Filename based on current time step
+    if isempty(system)
+        filename = joinpath(output_directory, @sprintf("solution_%06d.h5", timestep))
+    else
+        filename = joinpath(output_directory,
+                            @sprintf("solution_%s_%06d.h5", system, timestep))
     end
 
-    # Store element variables
-    for (v, (key, element_variable)) in enumerate(element_variables)
-      # Add to file
-      file["element_variables_$v"] = element_variable
+    # Convert to different set of variables if requested
+    if solution_variables === cons2cons
+        data = u
+        n_vars = nvariables(equations)
+    else
+        # Reinterpret the solution array as an array of conservative variables,
+        # compute the solution variables via broadcasting, and reinterpret the
+        # result as a plain array of floating point numbers
+        data = Array(reinterpret(eltype(u),
+                                 solution_variables.(reinterpret(SVector{
+                                                                         nvariables(equations),
+                                                                         eltype(u)}, u),
+                                                     Ref(equations))))
+
+        # Find out variable count by looking at output from `solution_variables` function
+        n_vars = size(data, 1)
+    end
 
-      # Add variable name as attribute
-      var = file["element_variables_$v"]
-      attributes(var)["name"] = string(key)
+    # Open file (clobber existing content)
+    h5open(filename, "w") do file
+        # Add context information as attributes
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["equations"] = get_name(equations)
+        attributes(file)["polydeg"] = polydeg(dg)
+        attributes(file)["n_vars"] = n_vars
+        attributes(file)["n_elements"] = nelements(dg, cache)
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
+        attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
+        attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
+        attributes(file)["timestep"] = timestep
+
+        # Store each variable of the solution data
+        for v in 1:n_vars
+            # Convert to 1D array
+            file["variables_$v"] = vec(data[v, .., :])
+
+            # Add variable name as attribute
+            var = file["variables_$v"]
+            attributes(var)["name"] = varnames(solution_variables, equations)[v]
+        end
+
+        # Store element variables
+        for (v, (key, element_variable)) in enumerate(element_variables)
+            # Add to file
+            file["element_variables_$v"] = element_variable
+
+            # Add variable name as attribute
+            var = file["element_variables_$v"]
+            attributes(var)["name"] = string(key)
+        end
     end
-  end
 
-  return filename
+    return filename
 end
 
-
 function save_solution_file(u, time, dt, timestep,
-                            mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache,
-                            solution_callback, element_variables=Dict{Symbol,Any}();
-                            system="")
-
-  @unpack output_directory, solution_variables = solution_callback
-
-  # Filename based on current time step
-  if isempty(system)
-    filename = joinpath(output_directory, @sprintf("solution_%06d.h5", timestep))
-  else
-    filename = joinpath(output_directory, @sprintf("solution_%s_%06d.h5", system, timestep))
-  end
-
-  # Convert to different set of variables if requested
-  if solution_variables === cons2cons
-    data = u
-    n_vars = nvariables(equations)
-  else
-    # Reinterpret the solution array as an array of conservative variables,
-    # compute the solution variables via broadcasting, and reinterpret the
-    # result as a plain array of floating point numbers
-    data = Array(reinterpret(eltype(u),
-           solution_variables.(reinterpret(SVector{nvariables(equations),eltype(u)}, u),
-                      Ref(equations))))
-
-    # Find out variable count by looking at output from `solution_variables` function
-    n_vars = size(data, 1)
-  end
-
-  if HDF5.has_parallel()
-    save_solution_file_parallel(data, time, dt, timestep, n_vars, mesh, equations, dg, cache, solution_variables, filename, element_variables)
-  else
-    save_solution_file_on_root(data, time, dt, timestep, n_vars, mesh, equations, dg, cache, solution_variables, filename, element_variables)
-  end
-end
+                            mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations,
+                            dg::DG, cache,
+                            solution_callback, element_variables = Dict{Symbol, Any}();
+                            system = "")
+    @unpack output_directory, solution_variables = solution_callback
+
+    # Filename based on current time step
+    if isempty(system)
+        filename = joinpath(output_directory, @sprintf("solution_%06d.h5", timestep))
+    else
+        filename = joinpath(output_directory,
+                            @sprintf("solution_%s_%06d.h5", system, timestep))
+    end
 
+    # Convert to different set of variables if requested
+    if solution_variables === cons2cons
+        data = u
+        n_vars = nvariables(equations)
+    else
+        # Reinterpret the solution array as an array of conservative variables,
+        # compute the solution variables via broadcasting, and reinterpret the
+        # result as a plain array of floating point numbers
+        data = Array(reinterpret(eltype(u),
+                                 solution_variables.(reinterpret(SVector{
+                                                                         nvariables(equations),
+                                                                         eltype(u)}, u),
+                                                     Ref(equations))))
+
+        # Find out variable count by looking at output from `solution_variables` function
+        n_vars = size(data, 1)
+    end
 
-function save_solution_file_parallel(data, time, dt, timestep, n_vars,
-                                     mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache,
-                                     solution_variables, filename, element_variables=Dict{Symbol,Any}())
-
-  # Calculate element and node counts by MPI rank
-  element_size = nnodes(dg)^ndims(mesh)
-  element_counts = cache.mpi_cache.n_elements_by_rank
-  node_counts = element_counts * element_size
-  # Cumulative sum of elements per rank starting with an additional 0
-  cum_element_counts = append!(zeros(eltype(element_counts), 1), cumsum(element_counts))
-  # Cumulative sum of nodes per rank starting with an additional 0
-  cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts))
-
-  # Open file using parallel HDF5 (clobber existing content)
-  h5open(filename, "w", mpi_comm()) do file
-    # Add context information as attributes
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["equations"] = get_name(equations)
-    attributes(file)["polydeg"] = polydeg(dg)
-    attributes(file)["n_vars"] = n_vars
-    attributes(file)["n_elements"] = nelementsglobal(dg, cache)
-    attributes(file)["mesh_type"] = get_name(mesh)
-    attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
-    attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
-    attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
-    attributes(file)["timestep"] = timestep
-
-    # Store each variable of the solution data
-    for v in 1:n_vars
-      # Need to create dataset explicitly in parallel case
-      var = create_dataset(file, "/variables_$v", datatype(eltype(data)), dataspace((ndofsglobal(mesh, dg, cache),)))
-      # Write data of each process in slices (ranks start with 0)
-      slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2]
-      # Convert to 1D array
-      var[slice] = vec(data[v, .., :])
-      # Add variable name as attribute
-      attributes(var)["name"] = varnames(solution_variables, equations)[v]
+    if HDF5.has_parallel()
+        save_solution_file_parallel(data, time, dt, timestep, n_vars, mesh, equations,
+                                    dg, cache, solution_variables, filename,
+                                    element_variables)
+    else
+        save_solution_file_on_root(data, time, dt, timestep, n_vars, mesh, equations,
+                                   dg, cache, solution_variables, filename,
+                                   element_variables)
     end
+end
 
-    # Store element variables
-    for (v, (key, element_variable)) in enumerate(element_variables)
-      # Need to create dataset explicitly in parallel case
-      var = create_dataset(file, "/element_variables_$v", datatype(eltype(element_variable)), dataspace((nelementsglobal(dg, cache),)))
-
-      # Write data of each process in slices (ranks start with 0)
-      slice = (cum_element_counts[mpi_rank() + 1] + 1):cum_element_counts[mpi_rank() + 2]
-      # Add to file
-      var[slice] = element_variable
-      # Add variable name as attribute
-      attributes(var)["name"] = string(key)
+function save_solution_file_parallel(data, time, dt, timestep, n_vars,
+                                     mesh::Union{ParallelTreeMesh, ParallelP4estMesh},
+                                     equations, dg::DG, cache,
+                                     solution_variables, filename,
+                                     element_variables = Dict{Symbol, Any}())
+
+    # Calculate element and node counts by MPI rank
+    element_size = nnodes(dg)^ndims(mesh)
+    element_counts = cache.mpi_cache.n_elements_by_rank
+    node_counts = element_counts * element_size
+    # Cumulative sum of elements per rank starting with an additional 0
+    cum_element_counts = append!(zeros(eltype(element_counts), 1),
+                                 cumsum(element_counts))
+    # Cumulative sum of nodes per rank starting with an additional 0
+    cum_node_counts = append!(zeros(eltype(node_counts), 1), cumsum(node_counts))
+
+    # Open file using parallel HDF5 (clobber existing content)
+    h5open(filename, "w", mpi_comm()) do file
+        # Add context information as attributes
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["equations"] = get_name(equations)
+        attributes(file)["polydeg"] = polydeg(dg)
+        attributes(file)["n_vars"] = n_vars
+        attributes(file)["n_elements"] = nelementsglobal(dg, cache)
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
+        attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
+        attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
+        attributes(file)["timestep"] = timestep
+
+        # Store each variable of the solution data
+        for v in 1:n_vars
+            # Need to create dataset explicitly in parallel case
+            var = create_dataset(file, "/variables_$v", datatype(eltype(data)),
+                                 dataspace((ndofsglobal(mesh, dg, cache),)))
+            # Write data of each process in slices (ranks start with 0)
+            slice = (cum_node_counts[mpi_rank() + 1] + 1):cum_node_counts[mpi_rank() + 2]
+            # Convert to 1D array
+            var[slice] = vec(data[v, .., :])
+            # Add variable name as attribute
+            attributes(var)["name"] = varnames(solution_variables, equations)[v]
+        end
+
+        # Store element variables
+        for (v, (key, element_variable)) in enumerate(element_variables)
+            # Need to create dataset explicitly in parallel case
+            var = create_dataset(file, "/element_variables_$v",
+                                 datatype(eltype(element_variable)),
+                                 dataspace((nelementsglobal(dg, cache),)))
+
+            # Write data of each process in slices (ranks start with 0)
+            slice = (cum_element_counts[mpi_rank() + 1] + 1):cum_element_counts[mpi_rank() + 2]
+            # Add to file
+            var[slice] = element_variable
+            # Add variable name as attribute
+            attributes(var)["name"] = string(key)
+        end
     end
-  end
 
-  return filename
+    return filename
 end
 
-
 function save_solution_file_on_root(data, time, dt, timestep, n_vars,
-                                    mesh::Union{ParallelTreeMesh, ParallelP4estMesh}, equations, dg::DG, cache,
-                                    solution_variables, filename, element_variables=Dict{Symbol,Any}())
-
-  # Calculate element and node counts by MPI rank
-  element_size = nnodes(dg)^ndims(mesh)
-  element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
-  node_counts = element_counts * Cint(element_size)
-
-  # non-root ranks only send data
-  if !mpi_isroot()
-    # Send nodal data to root
-    for v in 1:n_vars
-      MPI.Gatherv!(vec(data[v, .., :]), nothing, mpi_root(), mpi_comm())
+                                    mesh::Union{ParallelTreeMesh, ParallelP4estMesh},
+                                    equations, dg::DG, cache,
+                                    solution_variables, filename,
+                                    element_variables = Dict{Symbol, Any}())
+
+    # Calculate element and node counts by MPI rank
+    element_size = nnodes(dg)^ndims(mesh)
+    element_counts = convert(Vector{Cint}, collect(cache.mpi_cache.n_elements_by_rank))
+    node_counts = element_counts * Cint(element_size)
+
+    # non-root ranks only send data
+    if !mpi_isroot()
+        # Send nodal data to root
+        for v in 1:n_vars
+            MPI.Gatherv!(vec(data[v, .., :]), nothing, mpi_root(), mpi_comm())
+        end
+
+        # Send element data to root
+        for (key, element_variable) in element_variables
+            MPI.Gatherv!(element_variable, nothing, mpi_root(), mpi_comm())
+        end
+
+        return filename
     end
 
-    # Send element data to root
-    for (key, element_variable) in element_variables
-      MPI.Gatherv!(element_variable, nothing, mpi_root(), mpi_comm())
+    # Open file (clobber existing content)
+    h5open(filename, "w") do file
+        # Add context information as attributes
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["equations"] = get_name(equations)
+        attributes(file)["polydeg"] = polydeg(dg)
+        attributes(file)["n_vars"] = n_vars
+        attributes(file)["n_elements"] = nelementsglobal(dg, cache)
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
+        attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
+        attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
+        attributes(file)["timestep"] = timestep
+
+        # Store each variable of the solution data
+        for v in 1:n_vars
+            # Convert to 1D array
+            recv = Vector{eltype(data)}(undef, sum(node_counts))
+            MPI.Gatherv!(vec(data[v, .., :]), MPI.VBuffer(recv, node_counts),
+                         mpi_root(), mpi_comm())
+            file["variables_$v"] = recv
+
+            # Add variable name as attribute
+            var = file["variables_$v"]
+            attributes(var)["name"] = varnames(solution_variables, equations)[v]
+        end
+
+        # Store element variables
+        for (v, (key, element_variable)) in enumerate(element_variables)
+            # Add to file
+            recv = Vector{eltype(data)}(undef, sum(element_counts))
+            MPI.Gatherv!(element_variable, MPI.VBuffer(recv, element_counts),
+                         mpi_root(), mpi_comm())
+            file["element_variables_$v"] = recv
+
+            # Add variable name as attribute
+            var = file["element_variables_$v"]
+            attributes(var)["name"] = string(key)
+        end
     end
 
     return filename
-  end
-
-  # Open file (clobber existing content)
-  h5open(filename, "w") do file
-    # Add context information as attributes
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["equations"] = get_name(equations)
-    attributes(file)["polydeg"] = polydeg(dg)
-    attributes(file)["n_vars"] = n_vars
-    attributes(file)["n_elements"] = nelementsglobal(dg, cache)
-    attributes(file)["mesh_type"] = get_name(mesh)
-    attributes(file)["mesh_file"] = splitdir(mesh.current_filename)[2]
-    attributes(file)["time"] = convert(Float64, time) # Ensure that `time` is written as a double precision scalar
-    attributes(file)["dt"] = convert(Float64, dt) # Ensure that `dt` is written as a double precision scalar
-    attributes(file)["timestep"] = timestep
-
-    # Store each variable of the solution data
-    for v in 1:n_vars
-      # Convert to 1D array
-      recv = Vector{eltype(data)}(undef, sum(node_counts))
-      MPI.Gatherv!(vec(data[v, .., :]), MPI.VBuffer(recv, node_counts), mpi_root(), mpi_comm())
-      file["variables_$v"] = recv
-
-      # Add variable name as attribute
-      var = file["variables_$v"]
-      attributes(var)["name"] = varnames(solution_variables, equations)[v]
-    end
-
-    # Store element variables
-    for (v, (key, element_variable)) in enumerate(element_variables)
-      # Add to file
-      recv = Vector{eltype(data)}(undef, sum(element_counts))
-      MPI.Gatherv!(element_variable, MPI.VBuffer(recv, element_counts), mpi_root(), mpi_comm())
-      file["element_variables_$v"] = recv
-
-      # Add variable name as attribute
-      var = file["element_variables_$v"]
-      attributes(var)["name"] = string(key)
-    end
-  end
-
-  return filename
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/steady_state.jl b/src/callbacks_step/steady_state.jl
index 66d04fea704..15c2e834285 100644
--- a/src/callbacks_step/steady_state.jl
+++ b/src/callbacks_step/steady_state.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     SteadyStateCallback(; abstol=1.0e-8, reltol=1.0e-6)
@@ -11,74 +11,71 @@
 Terminates the integration when the [`residual_steady_state(du, equations)`](@ref)
 falls below the threshold specified by `abstol, reltol`.
 """
-mutable struct SteadyStateCallback{RealT<:Real}
-  abstol::RealT
-  reltol::RealT
+mutable struct SteadyStateCallback{RealT <: Real}
+    abstol::RealT
+    reltol::RealT
 end
 
-function SteadyStateCallback(; abstol=1.0e-8, reltol=1.0e-6)
-  abstol, reltol = promote(abstol, reltol)
-  steady_state_callback = SteadyStateCallback(abstol, reltol)
+function SteadyStateCallback(; abstol = 1.0e-8, reltol = 1.0e-6)
+    abstol, reltol = promote(abstol, reltol)
+    steady_state_callback = SteadyStateCallback(abstol, reltol)
 
-  DiscreteCallback(steady_state_callback, steady_state_callback,
-                   save_positions=(false,false))
+    DiscreteCallback(steady_state_callback, steady_state_callback,
+                     save_positions = (false, false))
 end
 
-
 function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:SteadyStateCallback})
-  @nospecialize cb # reduce precompilation time
-
-  steady_state_callback = cb.affect!
-  print(io, "SteadyStateCallback(abstol=", steady_state_callback.abstol, ", ",
-                                "reltol=", steady_state_callback.reltol, ")")
-end
-
-function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:SteadyStateCallback})
-  @nospecialize cb # reduce precompilation time
+    @nospecialize cb # reduce precompilation time
 
-  if get(io, :compact, false)
-    show(io, cb)
-  else
     steady_state_callback = cb.affect!
-
-    setup = [
-             "absolute tolerance" => steady_state_callback.abstol,
-             "relative tolerance" => steady_state_callback.reltol,
-            ]
-    summary_box(io, "SteadyStateCallback", setup)
-  end
+    print(io, "SteadyStateCallback(abstol=", steady_state_callback.abstol, ", ",
+          "reltol=", steady_state_callback.reltol, ")")
 end
 
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{<:Any, <:SteadyStateCallback})
+    @nospecialize cb # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        steady_state_callback = cb.affect!
+
+        setup = [
+            "absolute tolerance" => steady_state_callback.abstol,
+            "relative tolerance" => steady_state_callback.reltol,
+        ]
+        summary_box(io, "SteadyStateCallback", setup)
+    end
+end
 
 # affect!
 (::SteadyStateCallback)(integrator) = terminate!(integrator)
 
-
 # the condition
 function (steady_state_callback::SteadyStateCallback)(u_ode, t, integrator)
-  semi = integrator.p
-
-  u  = wrap_array(u_ode, semi)
-  du = wrap_array(get_du(integrator), semi)
-  terminate = steady_state_callback(du, u, semi)
-  if mpi_isparallel()
-    # MPI.jl doesn't seem to have MPI_C_BOOL
-    terminate_integer = Int(terminate)
-    terminate = !iszero(MPI.Allreduce!(Ref(terminate_integer), +, mpi_comm())[])
-  end
-  if mpi_isroot() && terminate
-    @info "  Steady state tolerance reached" steady_state_callback t
-  end
-  return terminate
+    semi = integrator.p
+
+    u = wrap_array(u_ode, semi)
+    du = wrap_array(get_du(integrator), semi)
+    terminate = steady_state_callback(du, u, semi)
+    if mpi_isparallel()
+        # MPI.jl doesn't seem to have MPI_C_BOOL
+        terminate_integer = Int(terminate)
+        terminate = !iszero(MPI.Allreduce!(Ref(terminate_integer), +, mpi_comm())[])
+    end
+    if mpi_isroot() && terminate
+        @info "  Steady state tolerance reached" steady_state_callback t
+    end
+    return terminate
 end
 
-function (steady_state_callback::SteadyStateCallback)(du, u, semi::AbstractSemidiscretization)
-  steady_state_callback(du, u, mesh_equations_solver_cache(semi)...)
+function (steady_state_callback::SteadyStateCallback)(du, u,
+                                                      semi::AbstractSemidiscretization)
+    steady_state_callback(du, u, mesh_equations_solver_cache(semi)...)
 end
 
 include("steady_state_dg1d.jl")
 include("steady_state_dg2d.jl")
 include("steady_state_dg3d.jl")
-
-
 end # @muladd
diff --git a/src/callbacks_step/steady_state_dg1d.jl b/src/callbacks_step/steady_state_dg1d.jl
index 65951f95d82..9b895de06d5 100644
--- a/src/callbacks_step/steady_state_dg1d.jl
+++ b/src/callbacks_step/steady_state_dg1d.jl
@@ -3,24 +3,23 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function (steady_state_callback::SteadyStateCallback)(du, u, mesh::AbstractMesh{1},
                                                       equations, dg::DG, cache)
-  @unpack abstol, reltol = steady_state_callback
+    @unpack abstol, reltol = steady_state_callback
 
-  terminate = true
-  for element in eachelement(dg, cache)
-    for i in eachnode(dg)
-      u_local  = get_node_vars(u,  equations, dg, i, element)
-      du_local = get_node_vars(du, equations, dg, i, element)
-      threshold = abstol + reltol * residual_steady_state(u_local, equations)
-      terminate = terminate && residual_steady_state(du_local, equations) <= threshold
+    terminate = true
+    for element in eachelement(dg, cache)
+        for i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, element)
+            du_local = get_node_vars(du, equations, dg, i, element)
+            threshold = abstol + reltol * residual_steady_state(u_local, equations)
+            terminate = terminate &&
+                        residual_steady_state(du_local, equations) <= threshold
+        end
     end
-  end
 
-  return terminate
+    return terminate
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/steady_state_dg2d.jl b/src/callbacks_step/steady_state_dg2d.jl
index 4837e77899d..ebb48ce4581 100644
--- a/src/callbacks_step/steady_state_dg2d.jl
+++ b/src/callbacks_step/steady_state_dg2d.jl
@@ -3,24 +3,23 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function (steady_state_callback::SteadyStateCallback)(du, u, mesh::AbstractMesh{2},
                                                       equations, dg::DG, cache)
-  @unpack abstol, reltol = steady_state_callback
+    @unpack abstol, reltol = steady_state_callback
 
-  terminate = true
-  for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      u_local  = get_node_vars(u,  equations, dg, i, j, element)
-      du_local = get_node_vars(du, equations, dg, i, j, element)
-      threshold = abstol + reltol * residual_steady_state(u_local, equations)
-      terminate = terminate && residual_steady_state(du_local, equations) <= threshold
+    terminate = true
+    for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, j, element)
+            du_local = get_node_vars(du, equations, dg, i, j, element)
+            threshold = abstol + reltol * residual_steady_state(u_local, equations)
+            terminate = terminate &&
+                        residual_steady_state(du_local, equations) <= threshold
+        end
     end
-  end
 
-  return terminate
+    return terminate
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/steady_state_dg3d.jl b/src/callbacks_step/steady_state_dg3d.jl
index d154d5e956d..69c172f9636 100644
--- a/src/callbacks_step/steady_state_dg3d.jl
+++ b/src/callbacks_step/steady_state_dg3d.jl
@@ -3,24 +3,23 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function (steady_state_callback::SteadyStateCallback)(du, u, mesh::AbstractMesh{3},
                                                       equations, dg::DG, cache)
-  @unpack abstol, reltol = steady_state_callback
+    @unpack abstol, reltol = steady_state_callback
 
-  terminate = true
-  for element in eachelement(dg, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      u_local  = get_node_vars(u,  equations, dg, i, j, k, element)
-      du_local = get_node_vars(du, equations, dg, i, j, k, element)
-      threshold = abstol + reltol * residual_steady_state(u_local, equations)
-      terminate = terminate && residual_steady_state(du_local, equations) <= threshold
+    terminate = true
+    for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, j, k, element)
+            du_local = get_node_vars(du, equations, dg, i, j, k, element)
+            threshold = abstol + reltol * residual_steady_state(u_local, equations)
+            terminate = terminate &&
+                        residual_steady_state(du_local, equations) <= threshold
+        end
     end
-  end
 
-  return terminate
+    return terminate
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl
index 13e4f9dfa54..9e9f2d4885b 100644
--- a/src/callbacks_step/stepsize.jl
+++ b/src/callbacks_step/stepsize.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     StepsizeCallback(; cfl=1.0)
@@ -12,100 +12,98 @@ Set the time step size according to a CFL condition with CFL number `cfl`
 if the time integration method isn't adaptive itself.
 """
 mutable struct StepsizeCallback{RealT}
-  cfl_number::RealT
+    cfl_number::RealT
 end
 
-
 function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:StepsizeCallback})
-  @nospecialize cb # reduce precompilation time
-
-  stepsize_callback = cb.affect!
-  @unpack cfl_number = stepsize_callback
-  print(io, "StepsizeCallback(cfl_number=", cfl_number, ")")
-end
-
-function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:StepsizeCallback})
-  @nospecialize cb # reduce precompilation time
+    @nospecialize cb # reduce precompilation time
 
-  if get(io, :compact, false)
-    show(io, cb)
-  else
     stepsize_callback = cb.affect!
-
-    setup = [
-             "CFL number" => stepsize_callback.cfl_number,
-            ]
-    summary_box(io, "StepsizeCallback", setup)
-  end
+    @unpack cfl_number = stepsize_callback
+    print(io, "StepsizeCallback(cfl_number=", cfl_number, ")")
 end
 
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{<:Any, <:StepsizeCallback})
+    @nospecialize cb # reduce precompilation time
 
-function StepsizeCallback(; cfl::Real=1.0)
-
-  stepsize_callback = StepsizeCallback(cfl)
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        stepsize_callback = cb.affect!
 
-  DiscreteCallback(stepsize_callback, stepsize_callback, # the first one is the condition, the second the affect!
-                   save_positions=(false,false),
-                   initialize=initialize!)
+        setup = [
+            "CFL number" => stepsize_callback.cfl_number,
+        ]
+        summary_box(io, "StepsizeCallback", setup)
+    end
 end
 
+function StepsizeCallback(; cfl::Real = 1.0)
+    stepsize_callback = StepsizeCallback(cfl)
 
-function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:StepsizeCallback}
-  cb.affect!(integrator)
+    DiscreteCallback(stepsize_callback, stepsize_callback, # the first one is the condition, the second the affect!
+                     save_positions = (false, false),
+                     initialize = initialize!)
 end
 
+function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t,
+                     integrator) where {Condition, Affect! <: StepsizeCallback}
+    cb.affect!(integrator)
+end
 
 # this method is called to determine whether the callback should be activated
 function (stepsize_callback::StepsizeCallback)(u, t, integrator)
-  return true
+    return true
 end
 
-
 # This method is called as callback during the time integration.
 @inline function (stepsize_callback::StepsizeCallback)(integrator)
-  # TODO: Taal decide, shall we set the time step even if the integrator is adaptive?
-  if !integrator.opts.adaptive
-    t = integrator.t
-    u_ode = integrator.u
-    semi = integrator.p
-    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-    @unpack cfl_number = stepsize_callback
-    u = wrap_array(u_ode, mesh, equations, solver, cache)
-
-    dt = @trixi_timeit timer() "calculate dt" cfl_number * max_dt(u, t, mesh,
-                                                                  have_constant_speed(equations), equations,
-                                                                  solver, cache)
-    set_proposed_dt!(integrator, dt)
-    integrator.opts.dtmax = dt
-    integrator.dtcache = dt
-  end
-
-  # avoid re-evaluating possible FSAL stages
-  u_modified!(integrator, false)
-  return nothing
+    # TODO: Taal decide, shall we set the time step even if the integrator is adaptive?
+    if !integrator.opts.adaptive
+        t = integrator.t
+        u_ode = integrator.u
+        semi = integrator.p
+        mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+        @unpack cfl_number = stepsize_callback
+        u = wrap_array(u_ode, mesh, equations, solver, cache)
+
+        dt = @trixi_timeit timer() "calculate dt" begin
+            cfl_number * max_dt(u, t, mesh, have_constant_speed(equations), equations,
+                   solver, cache)
+        end
+
+        set_proposed_dt!(integrator, dt)
+        integrator.opts.dtmax = dt
+        integrator.dtcache = dt
+    end
+
+    # avoid re-evaluating possible FSAL stages
+    u_modified!(integrator, false)
+    return nothing
 end
 
-
 # Time integration methods from the DiffEq ecosystem without adaptive time stepping on their own
 # such as `CarpenterKennedy2N54` require passing `dt=...` in `solve(ode, ...)`. Since we don't have
 # an integrator at this stage but only the ODE, this method will be used there. It's called in
 # many examples in `solve(ode, ..., dt=stepsize_callback(ode), ...)`.
-function (cb::DiscreteCallback{Condition,Affect!})(ode::ODEProblem) where {Condition, Affect!<:StepsizeCallback}
-  stepsize_callback = cb.affect!
-  @unpack cfl_number = stepsize_callback
-  u_ode = ode.u0
-  t = first(ode.tspan)
-  semi = ode.p
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-  u = wrap_array(u_ode, mesh, equations, solver, cache)
-
-  return cfl_number * max_dt(u, t, mesh, have_constant_speed(equations), equations, solver, cache)
-end
+function (cb::DiscreteCallback{Condition, Affect!})(ode::ODEProblem) where {Condition,
+                                                                            Affect! <:
+                                                                            StepsizeCallback
+                                                                            }
+    stepsize_callback = cb.affect!
+    @unpack cfl_number = stepsize_callback
+    u_ode = ode.u0
+    t = first(ode.tspan)
+    semi = ode.p
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
 
+    return cfl_number *
+           max_dt(u, t, mesh, have_constant_speed(equations), equations, solver, cache)
+end
 
 include("stepsize_dg1d.jl")
 include("stepsize_dg2d.jl")
 include("stepsize_dg3d.jl")
-
-
 end # @muladd
diff --git a/src/callbacks_step/stepsize_dg1d.jl b/src/callbacks_step/stepsize_dg1d.jl
index 0cb9932335d..edc25ec78f6 100644
--- a/src/callbacks_step/stepsize_dg1d.jl
+++ b/src/callbacks_step/stepsize_dg1d.jl
@@ -3,87 +3,82 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function max_dt(u, t, mesh::TreeMesh{1},
                 constant_speed::False, equations, dg::DG, cache)
-  # to avoid a division by zero if the speed vanishes everywhere,
-  # e.g. for steady-state linear advection
-  max_scaled_speed = nextfloat(zero(t))
-
-  for element in eachelement(dg, cache)
-    max_λ1 = zero(max_scaled_speed)
-    for i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, element)
-      λ1, = max_abs_speeds(u_node, equations)
-      max_λ1 = max(max_λ1, λ1)
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = nextfloat(zero(t))
+
+    for element in eachelement(dg, cache)
+        max_lambda1 = zero(max_scaled_speed)
+        for i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, element)
+            lambda1, = max_abs_speeds(u_node, equations)
+            max_lambda1 = max(max_lambda1, lambda1)
+        end
+        inv_jacobian = cache.elements.inverse_jacobian[element]
+        max_scaled_speed = max(max_scaled_speed, inv_jacobian * max_lambda1)
     end
-    inv_jacobian = cache.elements.inverse_jacobian[element]
-    max_scaled_speed = max(max_scaled_speed, inv_jacobian * max_λ1)
-  end
 
-  return 2 / (nnodes(dg) * max_scaled_speed)
+    return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-
 function max_dt(u, t, mesh::TreeMesh{1},
                 constant_speed::True, equations, dg::DG, cache)
-  # to avoid a division by zero if the speed vanishes everywhere,
-  # e.g. for steady-state linear advection
-  max_scaled_speed = nextfloat(zero(t))
-
-  for element in eachelement(dg, cache)
-    max_λ1, = max_abs_speeds(equations)
-    inv_jacobian = cache.elements.inverse_jacobian[element]
-    max_scaled_speed = max(max_scaled_speed, inv_jacobian * max_λ1)
-  end
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = nextfloat(zero(t))
+
+    for element in eachelement(dg, cache)
+        max_lambda1, = max_abs_speeds(equations)
+        inv_jacobian = cache.elements.inverse_jacobian[element]
+        max_scaled_speed = max(max_scaled_speed, inv_jacobian * max_lambda1)
+    end
 
-  return 2 / (nnodes(dg) * max_scaled_speed)
+    return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-
 function max_dt(u, t, mesh::StructuredMesh{1},
                 constant_speed::False, equations, dg::DG, cache)
-  # to avoid a division by zero if the speed vanishes everywhere,
-  # e.g. for steady-state linear advection
-  max_scaled_speed = nextfloat(zero(t))
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = nextfloat(zero(t))
 
-  for element in eachelement(dg, cache)
-    max_λ1 = zero(max_scaled_speed)
+    for element in eachelement(dg, cache)
+        max_lambda1 = zero(max_scaled_speed)
 
-    for i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, element)
-      λ1, = max_abs_speeds(u_node, equations)
+        for i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, element)
+            lambda1, = max_abs_speeds(u_node, equations)
 
-      inv_jacobian = cache.elements.inverse_jacobian[i, element]
+            inv_jacobian = cache.elements.inverse_jacobian[i, element]
 
-      max_λ1 = max(max_λ1, inv_jacobian * λ1)
-    end
+            max_lambda1 = max(max_lambda1, inv_jacobian * lambda1)
+        end
 
-    max_scaled_speed = max(max_scaled_speed, max_λ1)
-  end
+        max_scaled_speed = max(max_scaled_speed, max_lambda1)
+    end
 
-  return 2 / (nnodes(dg) * max_scaled_speed)
+    return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-
 function max_dt(u, t, mesh::StructuredMesh{1},
                 constant_speed::True, equations, dg::DG, cache)
-  # to avoid a division by zero if the speed vanishes everywhere,
-  # e.g. for steady-state linear advection
-  max_scaled_speed = nextfloat(zero(t))
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = nextfloat(zero(t))
 
-  for element in eachelement(dg, cache)
-    max_λ1, = max_abs_speeds(equations)
+    for element in eachelement(dg, cache)
+        max_lambda1, = max_abs_speeds(equations)
 
-    for i in eachnode(dg)
-      inv_jacobian = cache.elements.inverse_jacobian[i, element]
-      max_scaled_speed = max(max_scaled_speed, inv_jacobian * max_λ1)
+        for i in eachnode(dg)
+            inv_jacobian = cache.elements.inverse_jacobian[i, element]
+            max_scaled_speed = max(max_scaled_speed, inv_jacobian * max_lambda1)
+        end
     end
-  end
 
-  return 2 / (nnodes(dg) * max_scaled_speed)
+    return 2 / (nnodes(dg) * max_scaled_speed)
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl
index 3c7d288d8d2..89a2b2b8350 100644
--- a/src/callbacks_step/stepsize_dg2d.jl
+++ b/src/callbacks_step/stepsize_dg2d.jl
@@ -3,168 +3,171 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function max_dt(u, t, mesh::TreeMesh{2},
                 constant_speed::False, equations, dg::DG, cache)
-  # to avoid a division by zero if the speed vanishes everywhere,
-  # e.g. for steady-state linear advection
-  max_scaled_speed = nextfloat(zero(t))
-
-  for element in eachelement(dg, cache)
-    max_λ1 = max_λ2 = zero(max_scaled_speed)
-    for j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, element)
-      λ1, λ2 = max_abs_speeds(u_node, equations)
-      max_λ1 = max(max_λ1, λ1)
-      max_λ2 = max(max_λ2, λ2)
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = nextfloat(zero(t))
+
+    for element in eachelement(dg, cache)
+        max_lambda1 = max_lambda2 = zero(max_scaled_speed)
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, element)
+            lambda1, lambda2 = max_abs_speeds(u_node, equations)
+            max_lambda1 = max(max_lambda1, lambda1)
+            max_lambda2 = max(max_lambda2, lambda2)
+        end
+        inv_jacobian = cache.elements.inverse_jacobian[element]
+        max_scaled_speed = max(max_scaled_speed,
+                               inv_jacobian * (max_lambda1 + max_lambda2))
     end
-    inv_jacobian = cache.elements.inverse_jacobian[element]
-    max_scaled_speed = max(max_scaled_speed, inv_jacobian * (max_λ1 + max_λ2))
-  end
 
-  return 2 / (nnodes(dg) * max_scaled_speed)
+    return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-
 function max_dt(u, t, mesh::TreeMesh{2},
                 constant_speed::True, equations, dg::DG, cache)
-  # to avoid a division by zero if the speed vanishes everywhere,
-  # e.g. for steady-state linear advection
-  max_scaled_speed = nextfloat(zero(t))
-
-  for element in eachelement(dg, cache)
-    max_λ1, max_λ2 = max_abs_speeds(equations)
-    inv_jacobian = cache.elements.inverse_jacobian[element]
-    max_scaled_speed = max(max_scaled_speed, inv_jacobian * (max_λ1 + max_λ2))
-  end
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = nextfloat(zero(t))
+
+    for element in eachelement(dg, cache)
+        max_lambda1, max_lambda2 = max_abs_speeds(equations)
+        inv_jacobian = cache.elements.inverse_jacobian[element]
+        max_scaled_speed = max(max_scaled_speed,
+                               inv_jacobian * (max_lambda1 + max_lambda2))
+    end
 
-  return 2 / (nnodes(dg) * max_scaled_speed)
+    return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-
 function max_dt(u, t, mesh::ParallelTreeMesh{2},
                 constant_speed::False, equations, dg::DG, cache)
-  # call the method accepting a general `mesh::TreeMesh{2}`
-  # TODO: MPI, we should improve this; maybe we should dispatch on `u`
-  #       and create some MPI array type, overloading broadcasting and mapreduce etc.
-  #       Then, this specific array type should also work well with DiffEq etc.
-  dt = invoke(max_dt,
-    Tuple{typeof(u), typeof(t), TreeMesh{2},
-          typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)},
-    u, t, mesh, constant_speed, equations, dg, cache)
-  dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
-
-  return dt
+    # call the method accepting a general `mesh::TreeMesh{2}`
+    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
+    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
+    #       Then, this specific array type should also work well with DiffEq etc.
+    dt = invoke(max_dt,
+                Tuple{typeof(u), typeof(t), TreeMesh{2},
+                      typeof(constant_speed), typeof(equations), typeof(dg),
+                      typeof(cache)},
+                u, t, mesh, constant_speed, equations, dg, cache)
+    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+
+    return dt
 end
 
-
 function max_dt(u, t, mesh::ParallelTreeMesh{2},
                 constant_speed::True, equations, dg::DG, cache)
-  # call the method accepting a general `mesh::TreeMesh{2}`
-  # TODO: MPI, we should improve this; maybe we should dispatch on `u`
-  #       and create some MPI array type, overloading broadcasting and mapreduce etc.
-  #       Then, this specific array type should also work well with DiffEq etc.
-  dt = invoke(max_dt,
-    Tuple{typeof(u), typeof(t), TreeMesh{2},
-          typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)},
-    u, t, mesh, constant_speed, equations, dg, cache)
-  dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
-
-  return dt
+    # call the method accepting a general `mesh::TreeMesh{2}`
+    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
+    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
+    #       Then, this specific array type should also work well with DiffEq etc.
+    dt = invoke(max_dt,
+                Tuple{typeof(u), typeof(t), TreeMesh{2},
+                      typeof(constant_speed), typeof(equations), typeof(dg),
+                      typeof(cache)},
+                u, t, mesh, constant_speed, equations, dg, cache)
+    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+
+    return dt
 end
 
-
 function max_dt(u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
                 constant_speed::False, equations, dg::DG, cache)
-  # to avoid a division by zero if the speed vanishes everywhere,
-  # e.g. for steady-state linear advection
-  max_scaled_speed = nextfloat(zero(t))
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = nextfloat(zero(t))
 
-  @unpack contravariant_vectors, inverse_jacobian = cache.elements
+    @unpack contravariant_vectors, inverse_jacobian = cache.elements
 
-  for element in eachelement(dg, cache)
-    max_λ1 = max_λ2 = zero(max_scaled_speed)
-    for j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, element)
-      λ1, λ2 = max_abs_speeds(u_node, equations)
+    for element in eachelement(dg, cache)
+        max_lambda1 = max_lambda2 = zero(max_scaled_speed)
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, element)
+            lambda1, lambda2 = max_abs_speeds(u_node, equations)
 
-      # Local speeds transformed to the reference element
-      Ja11, Ja12     = get_contravariant_vector(1, contravariant_vectors, i, j, element)
-      λ1_transformed = abs(Ja11 * λ1 + Ja12 * λ2)
-      Ja21, Ja22     = get_contravariant_vector(2, contravariant_vectors, i, j, element)
-      λ2_transformed = abs(Ja21 * λ1 + Ja22 * λ2)
+            # Local speeds transformed to the reference element
+            Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j,
+                                                  element)
+            lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2)
+            Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j,
+                                                  element)
+            lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2)
 
-      inv_jacobian = abs(inverse_jacobian[i, j, element])
+            inv_jacobian = abs(inverse_jacobian[i, j, element])
 
-      max_λ1 = max(max_λ1, λ1_transformed * inv_jacobian)
-      max_λ2 = max(max_λ2, λ2_transformed * inv_jacobian)
-    end
+            max_lambda1 = max(max_lambda1, lambda1_transformed * inv_jacobian)
+            max_lambda2 = max(max_lambda2, lambda2_transformed * inv_jacobian)
+        end
 
-    max_scaled_speed = max(max_scaled_speed, max_λ1 + max_λ2)
-  end
+        max_scaled_speed = max(max_scaled_speed, max_lambda1 + max_lambda2)
+    end
 
-  return 2 / (nnodes(dg) * max_scaled_speed)
+    return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-
 function max_dt(u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
                 constant_speed::True, equations, dg::DG, cache)
-  @unpack contravariant_vectors, inverse_jacobian = cache.elements
-
-  # to avoid a division by zero if the speed vanishes everywhere,
-  # e.g. for steady-state linear advection
-  max_scaled_speed = nextfloat(zero(t))
-
-  max_λ1, max_λ2 = max_abs_speeds(equations)
-
-  for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      # Local speeds transformed to the reference element
-      Ja11, Ja12     = get_contravariant_vector(1, contravariant_vectors, i, j, element)
-      λ1_transformed = abs(Ja11 * max_λ1 + Ja12 * max_λ2)
-      Ja21, Ja22     = get_contravariant_vector(2, contravariant_vectors, i, j, element)
-      λ2_transformed = abs(Ja21 * max_λ1 + Ja22 * max_λ2)
-
-      inv_jacobian = abs(inverse_jacobian[i, j, element])
-      max_scaled_speed = max(max_scaled_speed, inv_jacobian * (λ1_transformed + λ2_transformed))
+    @unpack contravariant_vectors, inverse_jacobian = cache.elements
+
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = nextfloat(zero(t))
+
+    max_lambda1, max_lambda2 = max_abs_speeds(equations)
+
+    for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            # Local speeds transformed to the reference element
+            Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j,
+                                                  element)
+            lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2)
+            Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j,
+                                                  element)
+            lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2)
+
+            inv_jacobian = abs(inverse_jacobian[i, j, element])
+            max_scaled_speed = max(max_scaled_speed,
+                                   inv_jacobian *
+                                   (lambda1_transformed + lambda2_transformed))
+        end
     end
-  end
 
-  return 2 / (nnodes(dg) * max_scaled_speed)
+    return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-
 function max_dt(u, t, mesh::ParallelP4estMesh{2},
                 constant_speed::False, equations, dg::DG, cache)
-  # call the method accepting a general `mesh::P4estMesh{2}`
-  # TODO: MPI, we should improve this; maybe we should dispatch on `u`
-  #       and create some MPI array type, overloading broadcasting and mapreduce etc.
-  #       Then, this specific array type should also work well with DiffEq etc.
-  dt = invoke(max_dt,
-    Tuple{typeof(u), typeof(t), P4estMesh{2},
-          typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)},
-    u, t, mesh, constant_speed, equations, dg, cache)
-  dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
-
-  return dt
+    # call the method accepting a general `mesh::P4estMesh{2}`
+    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
+    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
+    #       Then, this specific array type should also work well with DiffEq etc.
+    dt = invoke(max_dt,
+                Tuple{typeof(u), typeof(t), P4estMesh{2},
+                      typeof(constant_speed), typeof(equations), typeof(dg),
+                      typeof(cache)},
+                u, t, mesh, constant_speed, equations, dg, cache)
+    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+
+    return dt
 end
 
-
 function max_dt(u, t, mesh::ParallelP4estMesh{2},
                 constant_speed::True, equations, dg::DG, cache)
-  # call the method accepting a general `mesh::P4estMesh{2}`
-  # TODO: MPI, we should improve this; maybe we should dispatch on `u`
-  #       and create some MPI array type, overloading broadcasting and mapreduce etc.
-  #       Then, this specific array type should also work well with DiffEq etc.
-  dt = invoke(max_dt,
-    Tuple{typeof(u), typeof(t), P4estMesh{2},
-          typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)},
-    u, t, mesh, constant_speed, equations, dg, cache)
-  dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
-
-  return dt
+    # call the method accepting a general `mesh::P4estMesh{2}`
+    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
+    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
+    #       Then, this specific array type should also work well with DiffEq etc.
+    dt = invoke(max_dt,
+                Tuple{typeof(u), typeof(t), P4estMesh{2},
+                      typeof(constant_speed), typeof(equations), typeof(dg),
+                      typeof(cache)},
+                u, t, mesh, constant_speed, equations, dg, cache)
+    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+
+    return dt
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/stepsize_dg3d.jl b/src/callbacks_step/stepsize_dg3d.jl
index 492ee3d9a08..c9ab7c478a8 100644
--- a/src/callbacks_step/stepsize_dg3d.jl
+++ b/src/callbacks_step/stepsize_dg3d.jl
@@ -3,142 +3,151 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function max_dt(u, t, mesh::TreeMesh{3},
                 constant_speed::False, equations, dg::DG, cache)
-  # to avoid a division by zero if the speed vanishes everywhere,
-  # e.g. for steady-state linear advection
-  max_scaled_speed = nextfloat(zero(t))
-
-  for element in eachelement(dg, cache)
-    max_λ1 = max_λ2 = max_λ3 = zero(max_scaled_speed)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, k, element)
-      λ1, λ2, λ3 = max_abs_speeds(u_node, equations)
-      max_λ1 = max(max_λ1, λ1)
-      max_λ2 = max(max_λ2, λ2)
-      max_λ3 = max(max_λ3, λ3)
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = nextfloat(zero(t))
+
+    for element in eachelement(dg, cache)
+        max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, k, element)
+            lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)
+            max_lambda1 = max(max_lambda1, lambda1)
+            max_lambda2 = max(max_lambda2, lambda2)
+            max_lambda3 = max(max_lambda3, lambda3)
+        end
+        inv_jacobian = cache.elements.inverse_jacobian[element]
+        max_scaled_speed = max(max_scaled_speed,
+                               inv_jacobian * (max_lambda1 + max_lambda2 + max_lambda3))
     end
-    inv_jacobian = cache.elements.inverse_jacobian[element]
-    max_scaled_speed = max(max_scaled_speed, inv_jacobian * (max_λ1 + max_λ2 + max_λ3))
-  end
 
-  return 2 / (nnodes(dg) * max_scaled_speed)
+    return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-
 function max_dt(u, t, mesh::TreeMesh{3},
                 constant_speed::True, equations, dg::DG, cache)
-  # to avoid a division by zero if the speed vanishes everywhere,
-  # e.g. for steady-state linear advection
-  max_scaled_speed = nextfloat(zero(t))
-
-  for element in eachelement(dg, cache)
-    max_λ1, max_λ2, max_λ3 = max_abs_speeds(equations)
-    inv_jacobian = cache.elements.inverse_jacobian[element]
-    max_scaled_speed = max(max_scaled_speed, inv_jacobian * (max_λ1 + max_λ2 + max_λ3))
-  end
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = nextfloat(zero(t))
+
+    for element in eachelement(dg, cache)
+        max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)
+        inv_jacobian = cache.elements.inverse_jacobian[element]
+        max_scaled_speed = max(max_scaled_speed,
+                               inv_jacobian * (max_lambda1 + max_lambda2 + max_lambda3))
+    end
 
-  return 2 / (nnodes(dg) * max_scaled_speed)
+    return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-
 function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}},
                 constant_speed::False, equations, dg::DG, cache)
-  # to avoid a division by zero if the speed vanishes everywhere,
-  # e.g. for steady-state linear advection
-  max_scaled_speed = nextfloat(zero(t))
-
-  @unpack contravariant_vectors = cache.elements
-
-  for element in eachelement(dg, cache)
-    max_λ1 = max_λ2 = max_λ3 = zero(max_scaled_speed)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, k, element)
-      λ1, λ2, λ3 = max_abs_speeds(u_node, equations)
-
-      Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
-      λ1_transformed   = abs(Ja11 * λ1 + Ja12 * λ2 + Ja13 * λ3)
-      Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
-      λ2_transformed   = abs(Ja21 * λ1 + Ja22 * λ2 + Ja23 * λ3)
-      Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
-      λ3_transformed   = abs(Ja31 * λ1 + Ja32 * λ2 + Ja33 * λ3)
-
-      inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element])
-
-      max_λ1 = max(max_λ1, inv_jacobian * λ1_transformed)
-      max_λ2 = max(max_λ2, inv_jacobian * λ2_transformed)
-      max_λ3 = max(max_λ3, inv_jacobian * λ3_transformed)
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = nextfloat(zero(t))
+
+    @unpack contravariant_vectors = cache.elements
+
+    for element in eachelement(dg, cache)
+        max_lambda1 = max_lambda2 = max_lambda3 = zero(max_scaled_speed)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, k, element)
+            lambda1, lambda2, lambda3 = max_abs_speeds(u_node, equations)
+
+            Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j,
+                                                        k, element)
+            lambda1_transformed = abs(Ja11 * lambda1 + Ja12 * lambda2 + Ja13 * lambda3)
+            Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j,
+                                                        k, element)
+            lambda2_transformed = abs(Ja21 * lambda1 + Ja22 * lambda2 + Ja23 * lambda3)
+            Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j,
+                                                        k, element)
+            lambda3_transformed = abs(Ja31 * lambda1 + Ja32 * lambda2 + Ja33 * lambda3)
+
+            inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element])
+
+            max_lambda1 = max(max_lambda1, inv_jacobian * lambda1_transformed)
+            max_lambda2 = max(max_lambda2, inv_jacobian * lambda2_transformed)
+            max_lambda3 = max(max_lambda3, inv_jacobian * lambda3_transformed)
+        end
+
+        max_scaled_speed = max(max_scaled_speed,
+                               max_lambda1 + max_lambda2 + max_lambda3)
     end
 
-    max_scaled_speed = max(max_scaled_speed, max_λ1 + max_λ2 + max_λ3)
-  end
-
-  return 2 / (nnodes(dg) * max_scaled_speed)
+    return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-
 function max_dt(u, t, mesh::Union{StructuredMesh{3}, P4estMesh{3}},
                 constant_speed::True, equations, dg::DG, cache)
-  # to avoid a division by zero if the speed vanishes everywhere,
-  # e.g. for steady-state linear advection
-  max_scaled_speed = nextfloat(zero(t))
-
-  @unpack contravariant_vectors = cache.elements
-
-  max_λ1, max_λ2, max_λ3 = max_abs_speeds(equations)
-
-  for element in eachelement(dg, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
-      λ1_transformed   = abs(Ja11 * max_λ1 + Ja12 * max_λ2 + Ja13 * max_λ3)
-      Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
-      λ2_transformed   = abs(Ja21 * max_λ1 + Ja22 * max_λ2 + Ja23 * max_λ3)
-      Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
-      λ3_transformed   = abs(Ja31 * max_λ1 + Ja32 * max_λ2 + Ja33 * max_λ3)
-
-      inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element])
-
-      max_scaled_speed = max(max_scaled_speed,
-                             inv_jacobian * (λ1_transformed + λ2_transformed + λ3_transformed))
+    # to avoid a division by zero if the speed vanishes everywhere,
+    # e.g. for steady-state linear advection
+    max_scaled_speed = nextfloat(zero(t))
+
+    @unpack contravariant_vectors = cache.elements
+
+    max_lambda1, max_lambda2, max_lambda3 = max_abs_speeds(equations)
+
+    for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j,
+                                                        k, element)
+            lambda1_transformed = abs(Ja11 * max_lambda1 + Ja12 * max_lambda2 +
+                                      Ja13 * max_lambda3)
+            Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j,
+                                                        k, element)
+            lambda2_transformed = abs(Ja21 * max_lambda1 + Ja22 * max_lambda2 +
+                                      Ja23 * max_lambda3)
+            Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j,
+                                                        k, element)
+            lambda3_transformed = abs(Ja31 * max_lambda1 + Ja32 * max_lambda2 +
+                                      Ja33 * max_lambda3)
+
+            inv_jacobian = abs(cache.elements.inverse_jacobian[i, j, k, element])
+
+            max_scaled_speed = max(max_scaled_speed,
+                                   inv_jacobian *
+                                   (lambda1_transformed + lambda2_transformed +
+                                    lambda3_transformed))
+        end
     end
-  end
 
-  return 2 / (nnodes(dg) * max_scaled_speed)
+    return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-
 function max_dt(u, t, mesh::ParallelP4estMesh{3},
                 constant_speed::False, equations, dg::DG, cache)
-  # call the method accepting a general `mesh::P4estMesh{3}`
-  # TODO: MPI, we should improve this; maybe we should dispatch on `u`
-  #       and create some MPI array type, overloading broadcasting and mapreduce etc.
-  #       Then, this specific array type should also work well with DiffEq etc.
-  dt = invoke(max_dt,
-    Tuple{typeof(u), typeof(t), P4estMesh{3},
-          typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)},
-    u, t, mesh, constant_speed, equations, dg, cache)
-  dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
-
-  return dt
+    # call the method accepting a general `mesh::P4estMesh{3}`
+    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
+    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
+    #       Then, this specific array type should also work well with DiffEq etc.
+    dt = invoke(max_dt,
+                Tuple{typeof(u), typeof(t), P4estMesh{3},
+                      typeof(constant_speed), typeof(equations), typeof(dg),
+                      typeof(cache)},
+                u, t, mesh, constant_speed, equations, dg, cache)
+    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+
+    return dt
 end
 
-
 function max_dt(u, t, mesh::ParallelP4estMesh{3},
                 constant_speed::True, equations, dg::DG, cache)
-  # call the method accepting a general `mesh::P4estMesh{3}`
-  # TODO: MPI, we should improve this; maybe we should dispatch on `u`
-  #       and create some MPI array type, overloading broadcasting and mapreduce etc.
-  #       Then, this specific array type should also work well with DiffEq etc.
-  dt = invoke(max_dt,
-    Tuple{typeof(u), typeof(t), P4estMesh{3},
-          typeof(constant_speed), typeof(equations), typeof(dg), typeof(cache)},
-    u, t, mesh, constant_speed, equations, dg, cache)
-  dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
-
-  return dt
+    # call the method accepting a general `mesh::P4estMesh{3}`
+    # TODO: MPI, we should improve this; maybe we should dispatch on `u`
+    #       and create some MPI array type, overloading broadcasting and mapreduce etc.
+    #       Then, this specific array type should also work well with DiffEq etc.
+    dt = invoke(max_dt,
+                Tuple{typeof(u), typeof(t), P4estMesh{3},
+                      typeof(constant_speed), typeof(equations), typeof(dg),
+                      typeof(cache)},
+                u, t, mesh, constant_speed, equations, dg, cache)
+    dt = MPI.Allreduce!(Ref(dt), min, mpi_comm())[]
+
+    return dt
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/summary.jl b/src/callbacks_step/summary.jl
index 37428f49651..a73b2a1913b 100644
--- a/src/callbacks_step/summary.jl
+++ b/src/callbacks_step/summary.jl
@@ -3,12 +3,11 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 summary_callback(u, t, integrator) = false # when used as condition; never call the summary callback during the simulation
 summary_callback(integrator) = u_modified!(integrator, false) # the summary callback does nothing when called accidentally
 
-
 """
     SummaryCallback()
 
@@ -17,211 +16,208 @@ beginning of a simulation and then resets the timer. When the returned callback
 directly, the current timer values are shown.
 """
 function SummaryCallback()
-  DiscreteCallback(summary_callback, summary_callback,
-                   save_positions=(false,false),
-                   initialize=initialize_summary_callback)
+    DiscreteCallback(summary_callback, summary_callback,
+                     save_positions = (false, false),
+                     initialize = initialize_summary_callback)
 end
 
-
 function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:typeof(summary_callback)})
-  @nospecialize cb # reduce precompilation time
+    @nospecialize cb # reduce precompilation time
 
-  print(io, "SummaryCallback")
+    print(io, "SummaryCallback")
 end
 
-
 # Format a key/value pair for output from the SummaryCallback
-function format_key_value_line(key::AbstractString, value::AbstractString, key_width, total_width;
-                               indentation_level=0, guide='…', filler='…', prefix="│ ", suffix=" │")
-  @assert key_width < total_width
-  line  = prefix
-  # Indent the key as requested (or not at all if `indentation_level == 0`)
-  indentation = prefix^indentation_level
-  reduced_key_width = key_width - length(indentation)
-  squeezed_key = indentation * squeeze(key, reduced_key_width, filler=filler)
-  line *= squeezed_key
-  line *= ": "
-  short = key_width - length(squeezed_key)
-  if short <= 1
-    line *= " "
-  else
-    line *= guide^(short-1) * " "
-  end
-  value_width = total_width - length(prefix) - length(suffix) - key_width - 2
-  squeezed_value = squeeze(value, value_width, filler=filler)
-  line *= squeezed_value
-  short = value_width - length(squeezed_value)
-  line *= " "^short
-  line *= suffix
-
-  @assert length(line) == total_width "should not happen: algorithm error!"
-
-  return line
+function format_key_value_line(key::AbstractString, value::AbstractString, key_width,
+                               total_width;
+                               indentation_level = 0, guide = '…', filler = '…',
+                               prefix = "│ ", suffix = " │")
+    @assert key_width < total_width
+    line = prefix
+    # Indent the key as requested (or not at all if `indentation_level == 0`)
+    indentation = prefix^indentation_level
+    reduced_key_width = key_width - length(indentation)
+    squeezed_key = indentation * squeeze(key, reduced_key_width, filler = filler)
+    line *= squeezed_key
+    line *= ": "
+    short = key_width - length(squeezed_key)
+    if short <= 1
+        line *= " "
+    else
+        line *= guide^(short - 1) * " "
+    end
+    value_width = total_width - length(prefix) - length(suffix) - key_width - 2
+    squeezed_value = squeeze(value, value_width, filler = filler)
+    line *= squeezed_value
+    short = value_width - length(squeezed_value)
+    line *= " "^short
+    line *= suffix
+
+    @assert length(line)==total_width "should not happen: algorithm error!"
+
+    return line
+end
+function format_key_value_line(key, value, args...; kwargs...)
+    format_key_value_line(string(key), string(value), args...; kwargs...)
 end
-format_key_value_line(key, value, args...; kwargs...) = format_key_value_line(string(key), string(value), args...; kwargs...)
 
 # Squeeze a string to fit into a maximum width by deleting characters from the center
-function squeeze(message, max_width; filler::Char='…')
-  @assert max_width >= 3 "squeezing works only for a minimum `max_width` of 3"
+function squeeze(message, max_width; filler::Char = '…')
+    @assert max_width>=3 "squeezing works only for a minimum `max_width` of 3"
 
-  length(message) <= max_width && return message
+    length(message) <= max_width && return message
 
-  keep_front = div(max_width, 2)
-  keep_back  = div(max_width, 2) - (isodd(max_width) ? 0 : 1)
-  remove_back  = length(message) - keep_front
-  remove_front = length(message) - keep_back
-  squeezed = (chop(message, head=0, tail=remove_back)
-              * filler *
-              chop(message, head=remove_front, tail=0))
+    keep_front = div(max_width, 2)
+    keep_back = div(max_width, 2) - (isodd(max_width) ? 0 : 1)
+    remove_back = length(message) - keep_front
+    remove_front = length(message) - keep_back
+    squeezed = (chop(message, head = 0, tail = remove_back)
+                * filler *
+                chop(message, head = remove_front, tail = 0))
 
-  @assert length(squeezed) == max_width "`$(length(squeezed)) != $max_width` should not happen: algorithm error!"
+    @assert length(squeezed)==max_width "`$(length(squeezed)) != $max_width` should not happen: algorithm error!"
 
-  return squeezed
+    return squeezed
 end
 
 # Print a summary with a box around it with a given heading and a setup of key=>value pairs
-function summary_box(io::IO, heading, setup=[])
-  summary_header(io, heading)
-  for (key, value) in setup
-    summary_line(io, key, value)
-  end
-  summary_footer(io)
+function summary_box(io::IO, heading, setup = [])
+    summary_header(io, heading)
+    for (key, value) in setup
+        summary_line(io, key, value)
+    end
+    summary_footer(io)
 end
 
-function summary_header(io, heading; total_width=100, indentation_level=0)
-  total_width = get(io, :total_width, total_width)
-  indentation_level = get(io, :indentation_level, indentation_level)
+function summary_header(io, heading; total_width = 100, indentation_level = 0)
+    total_width = get(io, :total_width, total_width)
+    indentation_level = get(io, :indentation_level, indentation_level)
 
-  @assert indentation_level >= 0 "indentation level may not be negative"
+    @assert indentation_level>=0 "indentation level may not be negative"
 
-  # If indentation level is greater than zero, we assume the header has already been printed
-  indentation_level > 0 && return
+    # If indentation level is greater than zero, we assume the header has already been printed
+    indentation_level > 0 && return
 
-  # Print header
-  println(io, "┌" * "─"^(total_width-2) * "┐")
-  println(io, "│ " * heading * " "^(total_width - length(heading) - 4) * " │")
-  println(io, "│ " * "═"^length(heading) * " "^(total_width - length(heading) - 4) * " │")
+    # Print header
+    println(io, "┌" * "─"^(total_width - 2) * "┐")
+    println(io, "│ " * heading * " "^(total_width - length(heading) - 4) * " │")
+    println(io,
+            "│ " * "═"^length(heading) * " "^(total_width - length(heading) - 4) * " │")
 end
 
-function summary_line(io, key, value; key_width=30, total_width=100, indentation_level=0)
-  # Printing is not performance-critical, so we can use `@nospecialize` to reduce latency
-  @nospecialize value # reduce precompilation time
+function summary_line(io, key, value; key_width = 30, total_width = 100,
+                      indentation_level = 0)
+    # Printing is not performance-critical, so we can use `@nospecialize` to reduce latency
+    @nospecialize value # reduce precompilation time
 
-  key_width = get(io, :key_width, key_width)
-  total_width = get(io, :total_width, total_width)
-  indentation_level = get(io, :indentation_level, indentation_level)
+    key_width = get(io, :key_width, key_width)
+    total_width = get(io, :total_width, total_width)
+    indentation_level = get(io, :indentation_level, indentation_level)
 
-  s = format_key_value_line(key, value, key_width, total_width,
-                            indentation_level=indentation_level)
+    s = format_key_value_line(key, value, key_width, total_width,
+                              indentation_level = indentation_level)
 
-  println(io, s)
+    println(io, s)
 end
 
-function summary_footer(io; total_width=100, indentation_level=0)
-  total_width = get(io, :total_width, 100)
-  indentation_level = get(io, :indentation_level, 0)
+function summary_footer(io; total_width = 100, indentation_level = 0)
+    total_width = get(io, :total_width, 100)
+    indentation_level = get(io, :indentation_level, 0)
 
-  if indentation_level == 0
-    s = "└" * "─"^(total_width-2) * "┘"
-  else
-    s = ""
-  end
+    if indentation_level == 0
+        s = "└" * "─"^(total_width - 2) * "┘"
+    else
+        s = ""
+    end
 
-  print(io, s)
+    print(io, s)
 end
 
-@inline increment_indent(io) = IOContext(io, :indentation_level => get(io, :indentation_level, 0) + 1)
-
+@inline function increment_indent(io)
+    IOContext(io, :indentation_level => get(io, :indentation_level, 0) + 1)
+end
 
 # Print information about the current simulation setup
 # Note: This is called *after* all initialization is done, but *before* the first time step
 function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator)
+    mpi_isroot() || return nothing
 
-  mpi_isroot() || return nothing
-
-  print_startup_message()
-
-  io = stdout
-  io_context = IOContext(io,
-                         :compact => false,
-                         :key_width => 30,
-                         :total_width => 100,
-                         :indentation_level => 0)
-
-  semi = integrator.p
-  show(io_context, MIME"text/plain"(), semi)
-  println(io, "\n")
-  mesh, equations, solver, _ = mesh_equations_solver_cache(semi)
-  show(io_context, MIME"text/plain"(), mesh)
-  println(io, "\n")
-  show(io_context, MIME"text/plain"(), equations)
-  println(io, "\n")
-  show(io_context, MIME"text/plain"(), solver)
-  println(io, "\n")
-
-  callbacks = integrator.opts.callback
-  if callbacks isa CallbackSet
-    for cb in callbacks.continuous_callbacks
-      show(io_context, MIME"text/plain"(), cb)
-      println(io, "\n")
-    end
-    for cb in callbacks.discrete_callbacks
-      # Do not show ourselves
-      cb.affect! === summary_callback && continue
+    print_startup_message()
 
-      show(io_context, MIME"text/plain"(), cb)
-      println(io, "\n")
-    end
-  else
-    show(io_context, MIME"text/plain"(), callbacks)
+    io = stdout
+    io_context = IOContext(io,
+                           :compact => false,
+                           :key_width => 30,
+                           :total_width => 100,
+                           :indentation_level => 0)
+
+    semi = integrator.p
+    show(io_context, MIME"text/plain"(), semi)
+    println(io, "\n")
+    mesh, equations, solver, _ = mesh_equations_solver_cache(semi)
+    show(io_context, MIME"text/plain"(), mesh)
+    println(io, "\n")
+    show(io_context, MIME"text/plain"(), equations)
+    println(io, "\n")
+    show(io_context, MIME"text/plain"(), solver)
     println(io, "\n")
-  end
-
-  # time integration
-  setup = Pair{String,Any}[
-           "Start time" => first(integrator.sol.prob.tspan),
-           "Final time" => last(integrator.sol.prob.tspan),
-           "time integrator" => integrator.alg |> typeof |> nameof,
-           "adaptive" => integrator.opts.adaptive,
-          ]
-  if integrator.opts.adaptive
-    push!(setup,
-      "abstol" => integrator.opts.abstol,
-      "reltol" => integrator.opts.reltol,
-      "controller" => integrator.opts.controller,
-    )
-  end
-  summary_box(io, "Time integration", setup)
-  println()
-
-  # technical details
-  setup = Pair{String,Any}[
-           "#threads" => Threads.nthreads(),
-          ]
-  if mpi_isparallel()
-    push!(setup,
-      "#MPI ranks" => mpi_nranks(),
-    )
-  end
-  summary_box(io, "Environment information", setup)
-  println()
-
-  reset_timer!(timer())
-
-  return nothing
-end
 
+    callbacks = integrator.opts.callback
+    if callbacks isa CallbackSet
+        for cb in callbacks.continuous_callbacks
+            show(io_context, MIME"text/plain"(), cb)
+            println(io, "\n")
+        end
+        for cb in callbacks.discrete_callbacks
+            # Do not show ourselves
+            cb.affect! === summary_callback && continue
+
+            show(io_context, MIME"text/plain"(), cb)
+            println(io, "\n")
+        end
+    else
+        show(io_context, MIME"text/plain"(), callbacks)
+        println(io, "\n")
+    end
 
-function (cb::DiscreteCallback{Condition,Affect!})(io::IO=stdout) where {Condition, Affect!<:typeof(summary_callback)}
+    # time integration
+    setup = Pair{String, Any}["Start time" => first(integrator.sol.prob.tspan),
+                              "Final time" => last(integrator.sol.prob.tspan),
+                              "time integrator" => integrator.alg |> typeof |> nameof,
+                              "adaptive" => integrator.opts.adaptive]
+    if integrator.opts.adaptive
+        push!(setup,
+              "abstol" => integrator.opts.abstol,
+              "reltol" => integrator.opts.reltol,
+              "controller" => integrator.opts.controller)
+    end
+    summary_box(io, "Time integration", setup)
+    println()
+
+    # technical details
+    setup = Pair{String, Any}["#threads" => Threads.nthreads()]
+    if mpi_isparallel()
+        push!(setup,
+              "#MPI ranks" => mpi_nranks())
+    end
+    summary_box(io, "Environment information", setup)
+    println()
 
-  mpi_isroot() || return nothing
+    reset_timer!(timer())
 
-  TimerOutputs.complement!(timer())
-  print_timer(io, timer(), title="Trixi.jl",
-              allocations=true, linechars=:unicode, compact=false)
-  println(io)
-  return nothing
+    return nothing
 end
 
-
+function (cb::DiscreteCallback{Condition, Affect!})(io::IO = stdout) where {Condition,
+                                                                            Affect! <:
+                                                                            typeof(summary_callback)
+                                                                            }
+    mpi_isroot() || return nothing
+
+    TimerOutputs.complement!(timer())
+    print_timer(io, timer(), title = "Trixi.jl",
+                allocations = true, linechars = :unicode, compact = false)
+    println(io)
+    return nothing
+end
 end # @muladd
diff --git a/src/callbacks_step/time_series.jl b/src/callbacks_step/time_series.jl
index 01282fbb2c3..7baa6b9c5a1 100644
--- a/src/callbacks_step/time_series.jl
+++ b/src/callbacks_step/time_series.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     TimeSeriesCallback(semi, point_coordinates;
@@ -26,190 +26,192 @@ types used in the solver and the cache.
 !!! warning "Experimental implementation"
     This is an experimental feature and may change in future releases.
 """
-mutable struct TimeSeriesCallback{RealT<:Real, uEltype<:Real, SolutionVariables, VariableNames, Cache}
-  interval::Int
-  solution_variables::SolutionVariables
-  variable_names::VariableNames
-  output_directory::String
-  filename::String
-  point_coordinates::Array{RealT, 2}
-  # Point data is stored as a vector of vectors of the solution data type:
-  # * the "outer" `Vector` contains one vector for each point at which a time_series is recorded
-  # * the "inner" `Vector` contains the actual time series for a single point,
-  #   with each record  adding "n_vars" entries
-  # The reason for using this data structure is that the length of the inner vectors needs to be
-  # increased for each record, which can only be realized in Julia using ordinary `Vector`s.
-  point_data::Vector{Vector{uEltype}}
-  time::Vector{RealT}
-  step::Vector{Int}
-  time_series_cache::Cache
+mutable struct TimeSeriesCallback{RealT <: Real, uEltype <: Real, SolutionVariables,
+                                  VariableNames, Cache}
+    interval::Int
+    solution_variables::SolutionVariables
+    variable_names::VariableNames
+    output_directory::String
+    filename::String
+    point_coordinates::Array{RealT, 2}
+    # Point data is stored as a vector of vectors of the solution data type:
+    # * the "outer" `Vector` contains one vector for each point at which a time_series is recorded
+    # * the "inner" `Vector` contains the actual time series for a single point,
+    #   with each record  adding "n_vars" entries
+    # The reason for using this data structure is that the length of the inner vectors needs to be
+    # increased for each record, which can only be realized in Julia using ordinary `Vector`s.
+    point_data::Vector{Vector{uEltype}}
+    time::Vector{RealT}
+    step::Vector{Int}
+    time_series_cache::Cache
 end
 
-
 function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:TimeSeriesCallback})
-  @nospecialize cb # reduce precompilation time
-
-  time_series_callback = cb.affect!
-  @unpack interval, solution_variables, output_directory, filename = time_series_callback
-  print(io, "TimeSeriesCallback(",
-            "interval=", interval, ", ",
-            "solution_variables=", interval, ", ",
-            "output_directory=", "\"output_directory\"", ", ",
-            "filename=", "\"filename\"",
-            ")")
-end
+    @nospecialize cb # reduce precompilation time
 
-function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{<:Any, <:TimeSeriesCallback})
-  @nospecialize cb # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, cb)
-  else
     time_series_callback = cb.affect!
-
-    setup = [
-             "#points" => size(time_series_callback.point_coordinates, 2),
-             "interval" => time_series_callback.interval,
-             "solution_variables" => time_series_callback.solution_variables,
-             "output_directory" => time_series_callback.output_directory,
-             "filename" => time_series_callback.filename,
-            ]
-    summary_box(io, "TimeSeriesCallback", setup)
-  end
+    @unpack interval, solution_variables, output_directory, filename = time_series_callback
+    print(io, "TimeSeriesCallback(",
+          "interval=", interval, ", ",
+          "solution_variables=", interval, ", ",
+          "output_directory=", "\"output_directory\"", ", ",
+          "filename=", "\"filename\"",
+          ")")
 end
 
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{<:Any, <:TimeSeriesCallback})
+    @nospecialize cb # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        time_series_callback = cb.affect!
+
+        setup = [
+            "#points" => size(time_series_callback.point_coordinates, 2),
+            "interval" => time_series_callback.interval,
+            "solution_variables" => time_series_callback.solution_variables,
+            "output_directory" => time_series_callback.output_directory,
+            "filename" => time_series_callback.filename,
+        ]
+        summary_box(io, "TimeSeriesCallback", setup)
+    end
+end
 
 # Main constructor
 function TimeSeriesCallback(mesh, equations, solver, cache, point_coordinates;
-                            interval::Integer=1,
-                            solution_variables=cons2cons,
-                            output_directory="out",
-                            filename="time_series.h5",
-                            RealT=real(solver),
-                            uEltype=eltype(cache.elements))
-  # check arguments
-  if !(interval isa Integer && interval >= 0)
-    throw(ArgumentError("`interval` must be a non-negative integer (provided `interval = $interval`)"))
-  end
-
-  if ndims(point_coordinates) != 2 || size(point_coordinates, 2) != ndims(mesh)
-    throw(ArgumentError("`point_coordinates` must be a matrix of size n_points × ndims"))
-  end
-
-  # Transpose point_coordinates to our usual format [ndims, n_points]
-  # Note: They are accepted in a different format to allow direct input from `readdlm`
-  point_coordinates_ = permutedims(point_coordinates)
-
-  # Invoke callback every `interval` time steps or after final step (for storing the data on disk)
-  if interval > 0
-    # With error-based step size control, some steps can be rejected. Thus,
-    #   `integrator.iter >= integrator.stats.naccept`
-    #    (total #steps)       (#accepted steps)
-    # We need to check the number of accepted steps since callbacks are not
-    # activated after a rejected step.
-    condition = (u, t, integrator) -> ( (integrator.stats.naccept % interval == 0 &&
-                                        !(integrator.stats.naccept == 0 && integrator.iter > 0)) ||
-                                      isfinished(integrator))
-  else # disable the callback for interval == 0
-    condition = (u, t, integrator) -> false
-  end
-
-  # Create data structures that are to be filled by the callback
-  variable_names = varnames(solution_variables, equations)
-  n_points = size(point_coordinates_, 2)
-  point_data = Vector{uEltype}[Vector{uEltype}() for _ in 1:n_points]
-  time = Vector{RealT}()
-  step = Vector{Int}()
-  time_series_cache = create_cache_time_series(point_coordinates_, mesh, solver, cache)
-
-  time_series_callback = TimeSeriesCallback(interval,
-                                           solution_variables,
-                                           variable_names,
-                                           output_directory,
-                                           filename,
-                                           point_coordinates_,
-                                           point_data,
-                                           time,
-                                           step,
-                                           time_series_cache)
-
-  return DiscreteCallback(condition, time_series_callback, save_positions=(false,false))
-end
+                            interval::Integer = 1,
+                            solution_variables = cons2cons,
+                            output_directory = "out",
+                            filename = "time_series.h5",
+                            RealT = real(solver),
+                            uEltype = eltype(cache.elements))
+    # check arguments
+    if !(interval isa Integer && interval >= 0)
+        throw(ArgumentError("`interval` must be a non-negative integer (provided `interval = $interval`)"))
+    end
 
+    if ndims(point_coordinates) != 2 || size(point_coordinates, 2) != ndims(mesh)
+        throw(ArgumentError("`point_coordinates` must be a matrix of size n_points × ndims"))
+    end
+
+    # Transpose point_coordinates to our usual format [ndims, n_points]
+    # Note: They are accepted in a different format to allow direct input from `readdlm`
+    point_coordinates_ = permutedims(point_coordinates)
+
+    # Invoke callback every `interval` time steps or after final step (for storing the data on disk)
+    if interval > 0
+        # With error-based step size control, some steps can be rejected. Thus,
+        #   `integrator.iter >= integrator.stats.naccept`
+        #    (total #steps)       (#accepted steps)
+        # We need to check the number of accepted steps since callbacks are not
+        # activated after a rejected step.
+        condition = (u, t, integrator) -> ((integrator.stats.naccept % interval == 0 &&
+                                            !(integrator.stats.naccept == 0 &&
+                                              integrator.iter > 0)) ||
+                                           isfinished(integrator))
+    else # disable the callback for interval == 0
+        condition = (u, t, integrator) -> false
+    end
+
+    # Create data structures that are to be filled by the callback
+    variable_names = varnames(solution_variables, equations)
+    n_points = size(point_coordinates_, 2)
+    point_data = Vector{uEltype}[Vector{uEltype}() for _ in 1:n_points]
+    time = Vector{RealT}()
+    step = Vector{Int}()
+    time_series_cache = create_cache_time_series(point_coordinates_, mesh, solver,
+                                                 cache)
+
+    time_series_callback = TimeSeriesCallback(interval,
+                                              solution_variables,
+                                              variable_names,
+                                              output_directory,
+                                              filename,
+                                              point_coordinates_,
+                                              point_data,
+                                              time,
+                                              step,
+                                              time_series_cache)
+
+    return DiscreteCallback(condition, time_series_callback,
+                            save_positions = (false, false))
+end
 
 # Convenience constructor that unpacks the semidiscretization into mesh, equations, solver, cache
 function TimeSeriesCallback(semi, point_coordinates; kwargs...)
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
 
-  return TimeSeriesCallback(mesh, equations, solver, cache, point_coordinates; kwargs...)
+    return TimeSeriesCallback(mesh, equations, solver, cache, point_coordinates;
+                              kwargs...)
 end
 
-
 # Convenience constructor that converts a vector of points into a Trixi.jl-style coordinate array
-function TimeSeriesCallback(mesh, equations, solver, cache, point_coordinates::AbstractVector;
+function TimeSeriesCallback(mesh, equations, solver, cache,
+                            point_coordinates::AbstractVector;
                             kwargs...)
-  # Coordinates are usually stored in [ndims, n_points], but here as [n_points, ndims]
-  n_points = length(point_coordinates)
-  point_coordinates_ = Matrix{eltype(eltype(point_coordinates))}(undef, n_points, ndims(mesh))
-
-  for p in 1:n_points
-    for d in 1:ndims(mesh)
-      point_coordinates_[p, d] = point_coordinates[p][d]
+    # Coordinates are usually stored in [ndims, n_points], but here as [n_points, ndims]
+    n_points = length(point_coordinates)
+    point_coordinates_ = Matrix{eltype(eltype(point_coordinates))}(undef, n_points,
+                                                                   ndims(mesh))
+
+    for p in 1:n_points
+        for d in 1:ndims(mesh)
+            point_coordinates_[p, d] = point_coordinates[p][d]
+        end
     end
-  end
 
-  return TimeSeriesCallback(mesh, equations, solver, cache, point_coordinates_; kwargs...)
+    return TimeSeriesCallback(mesh, equations, solver, cache, point_coordinates_;
+                              kwargs...)
 end
 
-
 # This method is called as callback during the time integration.
 function (time_series_callback::TimeSeriesCallback)(integrator)
-  # Ensure this is not accidentally used with AMR enabled
-  if uses_amr(integrator.opts.callback)
-    error("the TimeSeriesCallback does not work with AMR enabled")
-  end
-
-  @unpack interval = time_series_callback
-
-  # Create record if in correct interval (needs to be checked since the callback is also called
-  # after the final step for storing the data on disk, independent of the current interval)
-  if integrator.stats.naccept % interval == 0
-    @trixi_timeit timer() "time series" begin
-      # Store time and step
-      push!(time_series_callback.time, integrator.t)
-      push!(time_series_callback.step, integrator.stats.naccept)
-
-      # Unpack data
-      u_ode = integrator.u
-      semi = integrator.p
-      mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
-      u = wrap_array(u_ode, mesh, equations, solver, cache)
-
-      @unpack (point_data, solution_variables,
-              variable_names, time_series_cache) = time_series_callback
-
-      # Record state at points (solver/mesh-dependent implementation)
-      record_state_at_points!(point_data, u, solution_variables, length(variable_names), mesh,
-                              equations, solver, time_series_cache)
+    # Ensure this is not accidentally used with AMR enabled
+    if uses_amr(integrator.opts.callback)
+        error("the TimeSeriesCallback does not work with AMR enabled")
     end
-  end
 
-  # Store time_series if this is the last time step
-  if isfinished(integrator)
-    semi = integrator.p
-    mesh, equations, solver, _ = mesh_equations_solver_cache(semi)
-    save_time_series_file(time_series_callback, mesh, equations, solver)
-  end
+    @unpack interval = time_series_callback
+
+    # Create record if in correct interval (needs to be checked since the callback is also called
+    # after the final step for storing the data on disk, independent of the current interval)
+    if integrator.stats.naccept % interval == 0
+        @trixi_timeit timer() "time series" begin
+            # Store time and step
+            push!(time_series_callback.time, integrator.t)
+            push!(time_series_callback.step, integrator.stats.naccept)
+
+            # Unpack data
+            u_ode = integrator.u
+            semi = integrator.p
+            mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+            u = wrap_array(u_ode, mesh, equations, solver, cache)
+
+            @unpack (point_data, solution_variables,
+            variable_names, time_series_cache) = time_series_callback
+
+            # Record state at points (solver/mesh-dependent implementation)
+            record_state_at_points!(point_data, u, solution_variables,
+                                    length(variable_names), mesh,
+                                    equations, solver, time_series_cache)
+        end
+    end
 
-  # avoid re-evaluating possible FSAL stages
-  u_modified!(integrator, false)
+    # Store time_series if this is the last time step
+    if isfinished(integrator)
+        semi = integrator.p
+        mesh, equations, solver, _ = mesh_equations_solver_cache(semi)
+        save_time_series_file(time_series_callback, mesh, equations, solver)
+    end
 
-  return nothing
-end
+    # avoid re-evaluating possible FSAL stages
+    u_modified!(integrator, false)
 
+    return nothing
+end
 
 include("time_series_dg.jl")
 include("time_series_dg2d.jl")
-
-
 end # @muladd
diff --git a/src/callbacks_step/time_series_dg.jl b/src/callbacks_step/time_series_dg.jl
index 3a383fa1fd4..1b63979d579 100644
--- a/src/callbacks_step/time_series_dg.jl
+++ b/src/callbacks_step/time_series_dg.jl
@@ -3,35 +3,33 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Store time series file for a TreeMesh with a DG solver
 function save_time_series_file(time_series_callback, mesh::TreeMesh, equations, dg::DG)
-  @unpack (interval, solution_variables, variable_names,
-           output_directory, filename, point_coordinates,
-           point_data, time, step, time_series_cache) = time_series_callback
-  n_points = length(point_data)
+    @unpack (interval, solution_variables, variable_names,
+    output_directory, filename, point_coordinates,
+    point_data, time, step, time_series_cache) = time_series_callback
+    n_points = length(point_data)
 
-  h5open(joinpath(output_directory, filename), "w") do file
-    # Add context information as attributes
-    n_variables = length(variable_names)
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["equations"] = get_name(equations)
-    attributes(file)["polydeg"] = polydeg(dg)
-    attributes(file)["n_vars"] = n_variables
-    attributes(file)["n_points"] = n_points
-    attributes(file)["interval"] = interval
-    attributes(file)["variable_names"] = collect(variable_names)
+    h5open(joinpath(output_directory, filename), "w") do file
+        # Add context information as attributes
+        n_variables = length(variable_names)
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["equations"] = get_name(equations)
+        attributes(file)["polydeg"] = polydeg(dg)
+        attributes(file)["n_vars"] = n_variables
+        attributes(file)["n_points"] = n_points
+        attributes(file)["interval"] = interval
+        attributes(file)["variable_names"] = collect(variable_names)
 
-    file["time"] = time
-    file["timestep"] = step
-    file["point_coordinates"] = point_coordinates
-    for p in 1:n_points
-      # Store data as 2D array for convenience
-      file["point_data_$p"] = reshape(point_data[p], n_variables, length(time))
+        file["time"] = time
+        file["timestep"] = step
+        file["point_coordinates"] = point_coordinates
+        for p in 1:n_points
+            # Store data as 2D array for convenience
+            file["point_data_$p"] = reshape(point_data[p], n_variables, length(time))
+        end
     end
-  end
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/time_series_dg2d.jl b/src/callbacks_step/time_series_dg2d.jl
index 778739a824b..c15945d6e16 100644
--- a/src/callbacks_step/time_series_dg2d.jl
+++ b/src/callbacks_step/time_series_dg2d.jl
@@ -3,148 +3,151 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Creates cache for time series callback
 function create_cache_time_series(point_coordinates, mesh::TreeMesh{2}, dg, cache)
-  # Determine element ids for point coordinates
-  element_ids = get_elements_by_coordinates(point_coordinates, mesh, dg, cache)
+    # Determine element ids for point coordinates
+    element_ids = get_elements_by_coordinates(point_coordinates, mesh, dg, cache)
 
-  # Calculate & store Lagrange interpolation polynomials
-  interpolating_polynomials = calc_interpolating_polynomials(point_coordinates, element_ids, mesh,
-                                                             dg, cache)
+    # Calculate & store Lagrange interpolation polynomials
+    interpolating_polynomials = calc_interpolating_polynomials(point_coordinates,
+                                                               element_ids, mesh,
+                                                               dg, cache)
 
-  time_series_cache = (; element_ids, interpolating_polynomials)
+    time_series_cache = (; element_ids, interpolating_polynomials)
 
-  return time_series_cache
+    return time_series_cache
 end
 
-
 # Find element ids containing coordinates given as a matrix [ndims, npoints]
-function get_elements_by_coordinates!(element_ids, coordinates, mesh::TreeMesh, dg, cache)
-  if length(element_ids) != size(coordinates, 2)
-    throw(DimensionMismatch("storage length for element ids does not match the number of coordinates"))
-  end
-
-  @unpack cell_ids = cache.elements
-  @unpack tree = mesh
-
-  # Reset element ids - 0 indicates "not (yet) found"
-  element_ids .= 0
-  found_elements = 0
-
-  # Iterate over all elements
-  for element in eachelement(dg, cache)
-    # Get cell id
-    cell_id = cell_ids[element]
-
-    # Iterate over coordinates
-    for index in 1:length(element_ids)
-      # Skip coordinates for which an element has already been found
-      if element_ids[index] > 0
-        continue
-      end
-
-      # Construct point
-      x = SVector(ntuple(i -> coordinates[i, index], ndims(mesh)))
-
-      # Skip if point is not in cell
-      if !is_point_in_cell(tree, x, cell_id)
-        continue
-      end
-
-      # Otherwise point is in cell and thus in element
-      element_ids[index] = element
-      found_elements += 1
+function get_elements_by_coordinates!(element_ids, coordinates, mesh::TreeMesh, dg,
+                                      cache)
+    if length(element_ids) != size(coordinates, 2)
+        throw(DimensionMismatch("storage length for element ids does not match the number of coordinates"))
     end
 
-    # Exit loop if all elements have already been found
-    if found_elements == length(element_ids)
-      break
+    @unpack cell_ids = cache.elements
+    @unpack tree = mesh
+
+    # Reset element ids - 0 indicates "not (yet) found"
+    element_ids .= 0
+    found_elements = 0
+
+    # Iterate over all elements
+    for element in eachelement(dg, cache)
+        # Get cell id
+        cell_id = cell_ids[element]
+
+        # Iterate over coordinates
+        for index in 1:length(element_ids)
+            # Skip coordinates for which an element has already been found
+            if element_ids[index] > 0
+                continue
+            end
+
+            # Construct point
+            x = SVector(ntuple(i -> coordinates[i, index], ndims(mesh)))
+
+            # Skip if point is not in cell
+            if !is_point_in_cell(tree, x, cell_id)
+                continue
+            end
+
+            # Otherwise point is in cell and thus in element
+            element_ids[index] = element
+            found_elements += 1
+        end
+
+        # Exit loop if all elements have already been found
+        if found_elements == length(element_ids)
+            break
+        end
     end
-  end
 
-  return element_ids
+    return element_ids
 end
 
-
 function get_elements_by_coordinates(coordinates, mesh, dg, cache)
-  element_ids = Vector{Int}(undef, size(coordinates, 2))
-  get_elements_by_coordinates!(element_ids, coordinates, mesh, dg, cache)
+    element_ids = Vector{Int}(undef, size(coordinates, 2))
+    get_elements_by_coordinates!(element_ids, coordinates, mesh, dg, cache)
 
-  return element_ids
+    return element_ids
 end
 
-
 # Calculate the interpolating polynomials to extract data at the given coordinates
 # The coordinates are known to be located in the respective element in `element_ids`
-function calc_interpolating_polynomials!(interpolating_polynomials, coordinates, element_ids,
+function calc_interpolating_polynomials!(interpolating_polynomials, coordinates,
+                                         element_ids,
                                          mesh::TreeMesh, dg::DGSEM, cache)
-  @unpack tree = mesh
-  @unpack nodes = dg.basis
+    @unpack tree = mesh
+    @unpack nodes = dg.basis
 
-  wbary = barycentric_weights(nodes)
+    wbary = barycentric_weights(nodes)
 
-  for index in 1:length(element_ids)
-    # Construct point
-    x = SVector(ntuple(i -> coordinates[i, index], ndims(mesh)))
-
-    # Convert to unit coordinates
-    cell_id = cache.elements.cell_ids[element_ids[index]]
-    cell_coordinates_ = cell_coordinates(tree, cell_id)
-    cell_length = length_at_cell(tree, cell_id)
-    unit_coordinates = (x .- cell_coordinates_) * 2 / cell_length
-
-    # Calculate interpolating polynomial for each dimension, making use of tensor product structure
-    for d in 1:ndims(mesh)
-      interpolating_polynomials[:, d, index] .= lagrange_interpolating_polynomials(
-          unit_coordinates[d], nodes, wbary)
+    for index in 1:length(element_ids)
+        # Construct point
+        x = SVector(ntuple(i -> coordinates[i, index], ndims(mesh)))
+
+        # Convert to unit coordinates
+        cell_id = cache.elements.cell_ids[element_ids[index]]
+        cell_coordinates_ = cell_coordinates(tree, cell_id)
+        cell_length = length_at_cell(tree, cell_id)
+        unit_coordinates = (x .- cell_coordinates_) * 2 / cell_length
+
+        # Calculate interpolating polynomial for each dimension, making use of tensor product structure
+        for d in 1:ndims(mesh)
+            interpolating_polynomials[:, d, index] .= lagrange_interpolating_polynomials(unit_coordinates[d],
+                                                                                         nodes,
+                                                                                         wbary)
+        end
     end
-  end
 
-  return interpolating_polynomials
+    return interpolating_polynomials
 end
 
+function calc_interpolating_polynomials(coordinates, element_ids, mesh::TreeMesh, dg,
+                                        cache)
+    interpolating_polynomials = Array{real(dg), 3}(undef,
+                                                   nnodes(dg), ndims(mesh),
+                                                   length(element_ids))
+    calc_interpolating_polynomials!(interpolating_polynomials, coordinates, element_ids,
+                                    mesh, dg,
+                                    cache)
 
-function calc_interpolating_polynomials(coordinates, element_ids, mesh::TreeMesh, dg, cache)
-  interpolating_polynomials = Array{real(dg), 3}(undef,
-                                                 nnodes(dg), ndims(mesh), length(element_ids))
-  calc_interpolating_polynomials!(interpolating_polynomials, coordinates, element_ids, mesh, dg,
-                                  cache)
-
-  return interpolating_polynomials
+    return interpolating_polynomials
 end
 
-
 # Record the solution variables at each given point
-function record_state_at_points!(point_data, u, solution_variables, n_solution_variables,
-                                 mesh::TreeMesh{2}, equations, dg::DG, time_series_cache)
-  @unpack element_ids, interpolating_polynomials = time_series_cache
-  old_length = length(first(point_data))
-  new_length = old_length + n_solution_variables
-
-  # Loop over all points/elements that should be recorded
-  for index in 1:length(element_ids)
-    # Extract data array and element id
-    data = point_data[index]
-    element_id = element_ids[index]
-
-    # Make room for new data to be recorded
-    resize!(data, new_length)
-    data[(old_length+1):new_length] .= zero(eltype(data))
-
-    # Loop over all nodes to compute their contribution to the interpolated values
-    for j in eachnode(dg), i in eachnode(dg)
-      u_node = solution_variables(get_node_vars(u, equations, dg, i, j, element_id), equations)
-
-      for v in 1:length(u_node)
-        data[old_length + v] += (u_node[v]
-                                * interpolating_polynomials[i, 1, index]
-                                * interpolating_polynomials[j, 2, index])
-      end
+function record_state_at_points!(point_data, u, solution_variables,
+                                 n_solution_variables,
+                                 mesh::TreeMesh{2}, equations, dg::DG,
+                                 time_series_cache)
+    @unpack element_ids, interpolating_polynomials = time_series_cache
+    old_length = length(first(point_data))
+    new_length = old_length + n_solution_variables
+
+    # Loop over all points/elements that should be recorded
+    for index in 1:length(element_ids)
+        # Extract data array and element id
+        data = point_data[index]
+        element_id = element_ids[index]
+
+        # Make room for new data to be recorded
+        resize!(data, new_length)
+        data[(old_length + 1):new_length] .= zero(eltype(data))
+
+        # Loop over all nodes to compute their contribution to the interpolated values
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = solution_variables(get_node_vars(u, equations, dg, i, j,
+                                                      element_id), equations)
+
+            for v in 1:length(u_node)
+                data[old_length + v] += (u_node[v]
+                                         * interpolating_polynomials[i, 1, index]
+                                         * interpolating_polynomials[j, 2, index])
+            end
+        end
     end
-  end
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/trivial.jl b/src/callbacks_step/trivial.jl
index 5a16ab059a1..a55b7d85b13 100644
--- a/src/callbacks_step/trivial.jl
+++ b/src/callbacks_step/trivial.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     TrivialCallback()
@@ -12,26 +12,25 @@ A callback that does nothing. This can be useful to disable some callbacks
 easily via [`trixi_include`](@ref).
 """
 function TrivialCallback()
-  DiscreteCallback(trivial_callback, trivial_callback,
-                   save_positions=(false,false))
+    DiscreteCallback(trivial_callback, trivial_callback,
+                     save_positions = (false, false))
 end
 
 trivial_callback(u, t, integrator) = false
 trivial_callback(integrator) = u_modified!(integrator, false)
 
-
 function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:typeof(trivial_callback)})
-  @nospecialize cb # reduce precompilation time
+    @nospecialize cb # reduce precompilation time
 
-  print(io, "TrivialCallback()")
+    print(io, "TrivialCallback()")
 end
 
-
 # This allows to set `summary_callback = TrivialCallback()` in elixirs to suppress
 # output, e.g. in `convergence_test`.
-function (cb::DiscreteCallback{Condition,Affect!})(io::IO=stdout) where {Condition, Affect!<:typeof(trivial_callback)}
-  return nothing
+function (cb::DiscreteCallback{Condition, Affect!})(io::IO = stdout) where {Condition,
+                                                                            Affect! <:
+                                                                            typeof(trivial_callback)
+                                                                            }
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/callbacks_step/visualization.jl b/src/callbacks_step/visualization.jl
index 6eb04608368..98c0126a302 100644
--- a/src/callbacks_step/visualization.jl
+++ b/src/callbacks_step/visualization.jl
@@ -3,51 +3,58 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
-
-mutable struct VisualizationCallback{SolutionVariables, VariableNames, PlotDataCreator, PlotCreator}
-  interval::Int
-  solution_variables::SolutionVariables
-  variable_names::VariableNames
-  show_mesh::Bool
-  plot_data_creator::PlotDataCreator
-  plot_creator::PlotCreator
-  plot_arguments::Dict{Symbol,Any}
-end
-
-
-function Base.show(io::IO, cb::DiscreteCallback{Condition,Affect!}) where {Condition, Affect!<:VisualizationCallback}
-  visualization_callback = cb.affect!
-  @unpack interval, plot_arguments, solution_variables, variable_names, show_mesh, plot_creator, plot_data_creator = visualization_callback
-  print(io, "VisualizationCallback(",
-            "interval=", interval, ", ",
-            "solution_variables=", solution_variables, ", ",
-            "variable_names=", variable_names, ", ",
-            "show_mesh=", show_mesh, ", ",
-            "plot_data_creator=", plot_data_creator, ", ",
-            "plot_creator=", plot_creator, ", ",
-            "plot_arguments=", plot_arguments, ")")
+#! format: noindent
+
+mutable struct VisualizationCallback{SolutionVariables, VariableNames, PlotDataCreator,
+                                     PlotCreator}
+    interval::Int
+    solution_variables::SolutionVariables
+    variable_names::VariableNames
+    show_mesh::Bool
+    plot_data_creator::PlotDataCreator
+    plot_creator::PlotCreator
+    plot_arguments::Dict{Symbol, Any}
 end
 
-function Base.show(io::IO, ::MIME"text/plain", cb::DiscreteCallback{Condition,Affect!}) where {Condition, Affect!<:VisualizationCallback}
-  if get(io, :compact, false)
-    show(io, cb)
-  else
+function Base.show(io::IO,
+                   cb::DiscreteCallback{Condition, Affect!}) where {Condition,
+                                                                    Affect! <:
+                                                                    VisualizationCallback
+                                                                    }
     visualization_callback = cb.affect!
-
-    setup = [
-             "interval" => visualization_callback.interval,
-             "plot arguments" => visualization_callback.plot_arguments,
-             "solution variables" => visualization_callback.solution_variables,
-             "variable names" => visualization_callback.variable_names,
-             "show mesh" => visualization_callback.show_mesh,
-             "plot creator" => visualization_callback.plot_creator,
-             "plot data creator" => visualization_callback.plot_data_creator,
-            ]
-    summary_box(io, "VisualizationCallback", setup)
-  end
+    @unpack interval, plot_arguments, solution_variables, variable_names, show_mesh, plot_creator, plot_data_creator = visualization_callback
+    print(io, "VisualizationCallback(",
+          "interval=", interval, ", ",
+          "solution_variables=", solution_variables, ", ",
+          "variable_names=", variable_names, ", ",
+          "show_mesh=", show_mesh, ", ",
+          "plot_data_creator=", plot_data_creator, ", ",
+          "plot_creator=", plot_creator, ", ",
+          "plot_arguments=", plot_arguments, ")")
 end
 
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb::DiscreteCallback{Condition, Affect!}) where {Condition,
+                                                                    Affect! <:
+                                                                    VisualizationCallback
+                                                                    }
+    if get(io, :compact, false)
+        show(io, cb)
+    else
+        visualization_callback = cb.affect!
+
+        setup = [
+            "interval" => visualization_callback.interval,
+            "plot arguments" => visualization_callback.plot_arguments,
+            "solution variables" => visualization_callback.solution_variables,
+            "variable names" => visualization_callback.variable_names,
+            "show mesh" => visualization_callback.show_mesh,
+            "plot creator" => visualization_callback.plot_creator,
+            "plot data creator" => visualization_callback.plot_data_creator,
+        ]
+        summary_box(io, "VisualizationCallback", setup)
+    end
+end
 
 """
     VisualizationCallback(; interval=0,
@@ -75,92 +82,90 @@ To customize the generated figure, `plot_data_creator` allows to use different p
 same interface as the default implementation [`show_plot`](@ref). All remaining
 keyword arguments are collected and passed as additional arguments to the plotting command.
 """
-function VisualizationCallback(; interval=0,
-                                 solution_variables=cons2prim,
-                                 variable_names=[],
-                                 show_mesh=false,
-                                 plot_data_creator=PlotData2D,
-                                 plot_creator=show_plot,
-                                 plot_arguments...)
-  mpi_isparallel() && error("this callback does not work in parallel yet")
-
-  if variable_names isa String
-    variable_names = String[variable_names]
-  end
-
-  visualization_callback = VisualizationCallback(interval,
-                                                 solution_variables, variable_names, show_mesh,
-                                                 plot_data_creator, plot_creator,
-                                                 Dict{Symbol,Any}(plot_arguments))
-
-  # Warn users if they create a visualization callback without having loaded the Plots package
-  #
-  # Note: This warning is added for convenience, as Plots is the only "officially" supported
-  #       visualization package right now. However, in general nothing prevents anyone from using
-  #       other packages such as Makie, Gadfly etc., given that appropriate `plot_creator`s are
-  #       passed. This is also the reason why the visualization callback is not included via
-  #       Requires.jl only when Plots is present.
-  #       In the future, we should update/remove this warning if other plotting packages are
-  #       starting to be used.
-  if !(:Plots in names(@__MODULE__, all=true))
-    @warn "Package `Plots` not loaded but required by `VisualizationCallback` to visualize results"
-  end
-
-  DiscreteCallback(visualization_callback, visualization_callback, # the first one is the condition, the second the affect!
-                   save_positions=(false,false),
-                   initialize=initialize!)
+function VisualizationCallback(; interval = 0,
+                               solution_variables = cons2prim,
+                               variable_names = [],
+                               show_mesh = false,
+                               plot_data_creator = PlotData2D,
+                               plot_creator = show_plot,
+                               plot_arguments...)
+    mpi_isparallel() && error("this callback does not work in parallel yet")
+
+    if variable_names isa String
+        variable_names = String[variable_names]
+    end
+
+    visualization_callback = VisualizationCallback(interval,
+                                                   solution_variables, variable_names,
+                                                   show_mesh,
+                                                   plot_data_creator, plot_creator,
+                                                   Dict{Symbol, Any}(plot_arguments))
+
+    # Warn users if they create a visualization callback without having loaded the Plots package
+    #
+    # Note: This warning is added for convenience, as Plots is the only "officially" supported
+    #       visualization package right now. However, in general nothing prevents anyone from using
+    #       other packages such as Makie, Gadfly etc., given that appropriate `plot_creator`s are
+    #       passed. This is also the reason why the visualization callback is not included via
+    #       Requires.jl only when Plots is present.
+    #       In the future, we should update/remove this warning if other plotting packages are
+    #       starting to be used.
+    if !(:Plots in names(@__MODULE__, all = true))
+        @warn "Package `Plots` not loaded but required by `VisualizationCallback` to visualize results"
+    end
+
+    DiscreteCallback(visualization_callback, visualization_callback, # the first one is the condition, the second the affect!
+                     save_positions = (false, false),
+                     initialize = initialize!)
 end
 
+function initialize!(cb::DiscreteCallback{Condition, Affect!}, u, t,
+                     integrator) where {Condition, Affect! <: VisualizationCallback}
+    visualization_callback = cb.affect!
 
-function initialize!(cb::DiscreteCallback{Condition,Affect!}, u, t, integrator) where {Condition, Affect!<:VisualizationCallback}
-  visualization_callback = cb.affect!
-
-  visualization_callback(integrator)
+    visualization_callback(integrator)
 
-  return nothing
+    return nothing
 end
 
-
 # this method is called to determine whether the callback should be activated
 function (visualization_callback::VisualizationCallback)(u, t, integrator)
-  @unpack interval = visualization_callback
-
-  # With error-based step size control, some steps can be rejected. Thus,
-  #   `integrator.iter >= integrator.stats.naccept`
-  #    (total #steps)       (#accepted steps)
-  # We need to check the number of accepted steps since callbacks are not
-  # activated after a rejected step.
-  return interval > 0 && ( (integrator.stats.naccept % interval == 0 &&
-                           !(integrator.stats.naccept == 0 && integrator.iter > 0)) ||
-                          isfinished(integrator))
+    @unpack interval = visualization_callback
+
+    # With error-based step size control, some steps can be rejected. Thus,
+    #   `integrator.iter >= integrator.stats.naccept`
+    #    (total #steps)       (#accepted steps)
+    # We need to check the number of accepted steps since callbacks are not
+    # activated after a rejected step.
+    return interval > 0 && ((integrator.stats.naccept % interval == 0 &&
+             !(integrator.stats.naccept == 0 && integrator.iter > 0)) ||
+            isfinished(integrator))
 end
 
-
 # this method is called when the callback is activated
 function (visualization_callback::VisualizationCallback)(integrator)
-  u_ode = integrator.u
-  semi = integrator.p
-  @unpack plot_arguments, solution_variables, variable_names, show_mesh, plot_data_creator, plot_creator = visualization_callback
-
-  # Extract plot data
-  plot_data = plot_data_creator(u_ode, semi, solution_variables=solution_variables)
-
-  # If variable names were not specified, plot everything
-  if isempty(variable_names)
-    variable_names = String[keys(plot_data)...]
-  end
-
-  # Create plot
-  plot_creator(plot_data, variable_names;
-               show_mesh=show_mesh, plot_arguments=plot_arguments,
-               time=integrator.t, timestep=integrator.stats.naccept)
-
-  # avoid re-evaluating possible FSAL stages
-  u_modified!(integrator, false)
-  return nothing
+    u_ode = integrator.u
+    semi = integrator.p
+    @unpack plot_arguments, solution_variables, variable_names, show_mesh, plot_data_creator, plot_creator = visualization_callback
+
+    # Extract plot data
+    plot_data = plot_data_creator(u_ode, semi, solution_variables = solution_variables)
+
+    # If variable names were not specified, plot everything
+    if isempty(variable_names)
+        variable_names = String[keys(plot_data)...]
+    end
+
+    # Create plot
+    plot_creator(plot_data, variable_names;
+                 show_mesh = show_mesh, plot_arguments = plot_arguments,
+                 time = integrator.t, timestep = integrator.stats.naccept)
+
+    # avoid re-evaluating possible FSAL stages
+    u_modified!(integrator, false)
+    return nothing
 end
 
-
 """
     show_plot(plot_data, variable_names;
               show_mesh=true, plot_arguments=Dict{Symbol,Any}(),
@@ -179,41 +184,40 @@ This function is the default `plot_creator` argument for the [`VisualizationCall
 See also: [`VisualizationCallback`](@ref), [`save_plot`](@ref)
 """
 function show_plot(plot_data, variable_names;
-                   show_mesh=true, plot_arguments=Dict{Symbol,Any}(),
-                   time=nothing, timestep=nothing)
-  # Gather subplots
-  plots = []
-  for v in variable_names
-    push!(plots, Plots.plot(plot_data[v]; plot_arguments...))
-  end
-  if show_mesh
-    push!(plots, Plots.plot(getmesh(plot_data); plot_arguments...))
-  end
-
-  # Note, for the visualization callback to work for general equation systems
-  # this layout construction would need to use the if-logic below.
-  # Currently, there is no use case for this so it is left here as a note.
-  #
-  # Determine layout
-  # if length(plots) <= 3
-  #   cols = length(plots)
-  #   rows = 1
-  # else
-  #   cols = ceil(Int, sqrt(length(plots)))
-  #   rows = div(length(plots), cols, RoundUp)
-  # end
-  # layout = (rows, cols)
-
-  # Determine layout
-  cols = ceil(Int, sqrt(length(plots)))
-  rows = div(length(plots), cols, RoundUp)
-  layout = (rows, cols)
-
-  # Show plot
-  display(Plots.plot(plots..., layout=layout))
+                   show_mesh = true, plot_arguments = Dict{Symbol, Any}(),
+                   time = nothing, timestep = nothing)
+    # Gather subplots
+    plots = []
+    for v in variable_names
+        push!(plots, Plots.plot(plot_data[v]; plot_arguments...))
+    end
+    if show_mesh
+        push!(plots, Plots.plot(getmesh(plot_data); plot_arguments...))
+    end
+
+    # Note, for the visualization callback to work for general equation systems
+    # this layout construction would need to use the if-logic below.
+    # Currently, there is no use case for this so it is left here as a note.
+    #
+    # Determine layout
+    # if length(plots) <= 3
+    #   cols = length(plots)
+    #   rows = 1
+    # else
+    #   cols = ceil(Int, sqrt(length(plots)))
+    #   rows = div(length(plots), cols, RoundUp)
+    # end
+    # layout = (rows, cols)
+
+    # Determine layout
+    cols = ceil(Int, sqrt(length(plots)))
+    rows = div(length(plots), cols, RoundUp)
+    layout = (rows, cols)
+
+    # Show plot
+    display(Plots.plot(plots..., layout = layout))
 end
 
-
 """
     save_plot(plot_data, variable_names;
               show_mesh=true, plot_arguments=Dict{Symbol,Any}(),
@@ -232,29 +236,27 @@ The `timestep` is used in the filename. `time` is currently unused by this funct
 See also: [`VisualizationCallback`](@ref), [`show_plot`](@ref)
 """
 function save_plot(plot_data, variable_names;
-                   show_mesh=true, plot_arguments=Dict{Symbol,Any}(),
-                   time=nothing, timestep=nothing)
-  # Gather subplots
-  plots = []
-  for v in variable_names
-    push!(plots, Plots.plot(plot_data[v]; plot_arguments...))
-  end
-  if show_mesh
-    push!(plots, Plots.plot(getmesh(plot_data); plot_arguments...))
-  end
-
-  # Determine layout
-  cols = ceil(Int, sqrt(length(plots)))
-  rows = div(length(plots), cols, RoundUp)
-  layout = (rows, cols)
-
-  # Create plot
-  Plots.plot(plots..., layout=layout)
-
-  # Determine filename and save plot
-  filename = joinpath("out", @sprintf("solution_%06d.png", timestep))
-  Plots.savefig(filename)
+                   show_mesh = true, plot_arguments = Dict{Symbol, Any}(),
+                   time = nothing, timestep = nothing)
+    # Gather subplots
+    plots = []
+    for v in variable_names
+        push!(plots, Plots.plot(plot_data[v]; plot_arguments...))
+    end
+    if show_mesh
+        push!(plots, Plots.plot(getmesh(plot_data); plot_arguments...))
+    end
+
+    # Determine layout
+    cols = ceil(Int, sqrt(length(plots)))
+    rows = div(length(plots), cols, RoundUp)
+    layout = (rows, cols)
+
+    # Create plot
+    Plots.plot(plots..., layout = layout)
+
+    # Determine filename and save plot
+    filename = joinpath("out", @sprintf("solution_%06d.png", timestep))
+    Plots.savefig(filename)
 end
-
-
 end # @muladd
diff --git a/src/equations/acoustic_perturbation_2d.jl b/src/equations/acoustic_perturbation_2d.jl
index 9161de8da15..786630a14c7 100644
--- a/src/equations/acoustic_perturbation_2d.jl
+++ b/src/equations/acoustic_perturbation_2d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     AcousticPerturbationEquations2D(v_mean_global, c_mean_global, rho_mean_global)
@@ -47,41 +47,51 @@ The equations are based on the APE-4 system introduced in the following paper:
   Acoustic perturbation equations based on flow decomposition via source filtering
   [DOI: 10.1016/S0021-9991(03)00168-2](https://doi.org/10.1016/S0021-9991(03)00168-2)
 """
-struct AcousticPerturbationEquations2D{RealT<:Real} <: AbstractAcousticPerturbationEquations{2, 7}
-  v_mean_global::SVector{2, RealT}
-  c_mean_global::RealT
-  rho_mean_global::RealT
+struct AcousticPerturbationEquations2D{RealT <: Real} <:
+       AbstractAcousticPerturbationEquations{2, 7}
+    v_mean_global::SVector{2, RealT}
+    c_mean_global::RealT
+    rho_mean_global::RealT
 end
 
-function AcousticPerturbationEquations2D(v_mean_global::NTuple{2,<:Real}, c_mean_global::Real,
+function AcousticPerturbationEquations2D(v_mean_global::NTuple{2, <:Real},
+                                         c_mean_global::Real,
                                          rho_mean_global::Real)
-  return AcousticPerturbationEquations2D(SVector(v_mean_global), c_mean_global, rho_mean_global)
+    return AcousticPerturbationEquations2D(SVector(v_mean_global), c_mean_global,
+                                           rho_mean_global)
 end
 
-function AcousticPerturbationEquations2D(; v_mean_global::NTuple{2,<:Real}, c_mean_global::Real,
+function AcousticPerturbationEquations2D(; v_mean_global::NTuple{2, <:Real},
+                                         c_mean_global::Real,
                                          rho_mean_global::Real)
-  return AcousticPerturbationEquations2D(SVector(v_mean_global), c_mean_global, rho_mean_global)
+    return AcousticPerturbationEquations2D(SVector(v_mean_global), c_mean_global,
+                                           rho_mean_global)
 end
 
-
-varnames(::typeof(cons2cons), ::AcousticPerturbationEquations2D) = ("v1_prime", "v2_prime", "p_prime_scaled",
-                                                                    "v1_mean", "v2_mean", "c_mean", "rho_mean")
-varnames(::typeof(cons2prim), ::AcousticPerturbationEquations2D) = ("v1_prime", "v2_prime", "p_prime",
-                                                                    "v1_mean", "v2_mean", "c_mean", "rho_mean")
-
+function varnames(::typeof(cons2cons), ::AcousticPerturbationEquations2D)
+    ("v1_prime", "v2_prime", "p_prime_scaled",
+     "v1_mean", "v2_mean", "c_mean", "rho_mean")
+end
+function varnames(::typeof(cons2prim), ::AcousticPerturbationEquations2D)
+    ("v1_prime", "v2_prime", "p_prime",
+     "v1_mean", "v2_mean", "c_mean", "rho_mean")
+end
 
 # Convenience functions for retrieving state variables and mean variables
 function cons2state(u, equations::AcousticPerturbationEquations2D)
-  return SVector(u[1], u[2], u[3])
+    return SVector(u[1], u[2], u[3])
 end
 
 function cons2mean(u, equations::AcousticPerturbationEquations2D)
-  return SVector(u[4], u[5], u[6], u[7])
+    return SVector(u[4], u[5], u[6], u[7])
 end
 
-varnames(::typeof(cons2state), ::AcousticPerturbationEquations2D) = ("v1_prime", "v2_prime", "p_prime_scaled")
-varnames(::typeof(cons2mean), ::AcousticPerturbationEquations2D) = ("v1_mean", "v2_mean", "c_mean", "rho_mean")
-
+function varnames(::typeof(cons2state), ::AcousticPerturbationEquations2D)
+    ("v1_prime", "v2_prime", "p_prime_scaled")
+end
+function varnames(::typeof(cons2mean), ::AcousticPerturbationEquations2D)
+    ("v1_mean", "v2_mean", "c_mean", "rho_mean")
+end
 
 """
     global_mean_vars(equations::AcousticPerturbationEquations2D)
@@ -90,11 +100,11 @@ Returns the global mean variables stored in `equations`. This makes it easier
 to define flexible initial conditions for problems with constant mean flow.
 """
 function global_mean_vars(equations::AcousticPerturbationEquations2D)
-  return equations.v_mean_global[1], equations.v_mean_global[2], equations.c_mean_global,
-         equations.rho_mean_global
+    return equations.v_mean_global[1], equations.v_mean_global[2],
+           equations.c_mean_global,
+           equations.rho_mean_global
 end
 
-
 """
     initial_condition_constant(x, t, equations::AcousticPerturbationEquations2D)
 
@@ -102,36 +112,36 @@ A constant initial condition where the state variables are zero and the mean flo
 Uses the global mean values from `equations`.
 """
 function initial_condition_constant(x, t, equations::AcousticPerturbationEquations2D)
-  v1_prime = 0.0
-  v2_prime = 0.0
-  p_prime_scaled = 0.0
+    v1_prime = 0.0
+    v2_prime = 0.0
+    p_prime_scaled = 0.0
 
-  return SVector(v1_prime, v2_prime, p_prime_scaled, global_mean_vars(equations)...)
+    return SVector(v1_prime, v2_prime, p_prime_scaled, global_mean_vars(equations)...)
 end
 
-
 """
     initial_condition_convergence_test(x, t, equations::AcousticPerturbationEquations2D)
 
 A smooth initial condition used for convergence tests in combination with
 [`source_terms_convergence_test`](@ref). Uses the global mean values from `equations`.
 """
-function initial_condition_convergence_test(x, t, equations::AcousticPerturbationEquations2D)
-  c = 2.0
-  A = 0.2
-  L = 2.0
-  f = 2.0 / L
-  a = 1.0
-  omega = 2 * pi * f
-  init = c + A * sin(omega * (x[1] + x[2] - a*t))
-
-  v1_prime = init
-  v2_prime = init
-  p_prime = init^2
-
-  prim = SVector(v1_prime, v2_prime, p_prime, global_mean_vars(equations)...)
-
-  return prim2cons(prim, equations)
+function initial_condition_convergence_test(x, t,
+                                            equations::AcousticPerturbationEquations2D)
+    c = 2.0
+    A = 0.2
+    L = 2.0
+    f = 2.0 / L
+    a = 1.0
+    omega = 2 * pi * f
+    init = c + A * sin(omega * (x[1] + x[2] - a * t))
+
+    v1_prime = init
+    v2_prime = init
+    p_prime = init^2
+
+    prim = SVector(v1_prime, v2_prime, p_prime, global_mean_vars(equations)...)
+
+    return prim2cons(prim, equations)
 end
 
 """
@@ -140,73 +150,75 @@ end
 Source terms used for convergence tests in combination with
 [`initial_condition_convergence_test`](@ref).
 """
-function source_terms_convergence_test(u, x, t, equations::AcousticPerturbationEquations2D)
-  v1_mean, v2_mean, c_mean, rho_mean = cons2mean(u, equations)
+function source_terms_convergence_test(u, x, t,
+                                       equations::AcousticPerturbationEquations2D)
+    v1_mean, v2_mean, c_mean, rho_mean = cons2mean(u, equations)
 
-  c = 2.0
-  A = 0.2
-  L = 2.0
-  f = 2.0 / L
-  a = 1.0
-  omega = 2 * pi * f
+    c = 2.0
+    A = 0.2
+    L = 2.0
+    f = 2.0 / L
+    a = 1.0
+    omega = 2 * pi * f
 
-  si, co = sincos(omega * (x[1] + x[2] - a * t))
-  tmp = v1_mean + v2_mean - a
+    si, co = sincos(omega * (x[1] + x[2] - a * t))
+    tmp = v1_mean + v2_mean - a
 
-  du1 = du2 = A * omega * co * (2 * c/rho_mean + tmp + 2/rho_mean * A * si)
-  du3 = A * omega * co * (2 * c_mean^2 * rho_mean + 2 * c * tmp + 2 * A * tmp * si) / c_mean^2
+    du1 = du2 = A * omega * co * (2 * c / rho_mean + tmp + 2 / rho_mean * A * si)
+    du3 = A * omega * co * (2 * c_mean^2 * rho_mean + 2 * c * tmp + 2 * A * tmp * si) /
+          c_mean^2
 
-  du4 = du5 = du6 = du7 = 0.0
+    du4 = du5 = du6 = du7 = 0.0
 
-  return SVector(du1, du2, du3, du4, du5, du6, du7)
+    return SVector(du1, du2, du3, du4, du5, du6, du7)
 end
 
-
 """
     initial_condition_gauss(x, t, equations::AcousticPerturbationEquations2D)
 
 A Gaussian pulse in a constant mean flow. Uses the global mean values from `equations`.
 """
 function initial_condition_gauss(x, t, equations::AcousticPerturbationEquations2D)
-  v1_prime = 0.0
-  v2_prime = 0.0
-  p_prime = exp(-4*(x[1]^2 + x[2]^2))
+    v1_prime = 0.0
+    v2_prime = 0.0
+    p_prime = exp(-4 * (x[1]^2 + x[2]^2))
 
-  prim = SVector(v1_prime, v2_prime, p_prime, global_mean_vars(equations)...)
+    prim = SVector(v1_prime, v2_prime, p_prime, global_mean_vars(equations)...)
 
-  return prim2cons(prim, equations)
+    return prim2cons(prim, equations)
 end
 
-
 """
     boundary_condition_wall(u_inner, orientation, direction, x, t, surface_flux_function,
                             equations::AcousticPerturbationEquations2D)
 
 Boundary conditions for a solid wall.
 """
-function boundary_condition_wall(u_inner, orientation, direction, x, t, surface_flux_function,
+function boundary_condition_wall(u_inner, orientation, direction, x, t,
+                                 surface_flux_function,
                                  equations::AcousticPerturbationEquations2D)
-  # Boundary state is equal to the inner state except for the perturbed velocity. For boundaries
-  # in the -x/+x direction, we multiply the perturbed velocity in the x direction by -1.
-  # Similarly, for boundaries in the -y/+y direction, we multiply the perturbed velocity in the
-  # y direction by -1
-  if direction in (1, 2) # x direction
-    u_boundary = SVector(-u_inner[1], u_inner[2], u_inner[3], cons2mean(u_inner, equations)...)
-  else # y direction
-    u_boundary = SVector(u_inner[1], -u_inner[2], u_inner[3], cons2mean(u_inner, equations)...)
-  end
-
-  # Calculate boundary flux
-  if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    flux = surface_flux_function(u_boundary, u_inner, orientation, equations)
-  end
-
-  return flux
+    # Boundary state is equal to the inner state except for the perturbed velocity. For boundaries
+    # in the -x/+x direction, we multiply the perturbed velocity in the x direction by -1.
+    # Similarly, for boundaries in the -y/+y direction, we multiply the perturbed velocity in the
+    # y direction by -1
+    if direction in (1, 2) # x direction
+        u_boundary = SVector(-u_inner[1], u_inner[2], u_inner[3],
+                             cons2mean(u_inner, equations)...)
+    else # y direction
+        u_boundary = SVector(u_inner[1], -u_inner[2], u_inner[3],
+                             cons2mean(u_inner, equations)...)
+    end
+
+    # Calculate boundary flux
+    if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation, equations)
+    end
+
+    return flux
 end
 
-
 """
     boundary_condition_slip_wall(u_inner, normal_direction, x, t, surface_flux_function,
                                  equations::AcousticPerturbationEquations2D)
@@ -219,148 +231,152 @@ Further details are available in the paper:
   [DOI: 10.2514/1.J050333](https://doi.org/10.2514/1.J050333)
 """
 function boundary_condition_slip_wall(u_inner, normal_direction::AbstractVector, x, t,
-                                      surface_flux_function, equations::AcousticPerturbationEquations2D)
-  # normalize the outward pointing direction
-  normal = normal_direction / norm(normal_direction)
+                                      surface_flux_function,
+                                      equations::AcousticPerturbationEquations2D)
+    # normalize the outward pointing direction
+    normal = normal_direction / norm(normal_direction)
 
-  # compute the normal perturbed velocity
-  u_normal = normal[1] * u_inner[1] + normal[2] * u_inner[2]
+    # compute the normal perturbed velocity
+    u_normal = normal[1] * u_inner[1] + normal[2] * u_inner[2]
 
-  # create the "external" boundary solution state
-  u_boundary = SVector(u_inner[1] - 2.0 * u_normal * normal[1],
-                       u_inner[2] - 2.0 * u_normal * normal[2],
-                       u_inner[3], cons2mean(u_inner, equations)...)
+    # create the "external" boundary solution state
+    u_boundary = SVector(u_inner[1] - 2.0 * u_normal * normal[1],
+                         u_inner[2] - 2.0 * u_normal * normal[2],
+                         u_inner[3], cons2mean(u_inner, equations)...)
 
-  # calculate the boundary flux
-  flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations)
+    # calculate the boundary flux
+    flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations)
 
-  return flux
+    return flux
 end
 
-
 # Calculate 1D flux for a single point
-@inline function flux(u, orientation::Integer, equations::AcousticPerturbationEquations2D)
-  v1_prime, v2_prime, p_prime_scaled = cons2state(u, equations)
-  v1_mean, v2_mean, c_mean, rho_mean = cons2mean(u, equations)
-
-  # Calculate flux for conservative state variables
-  if orientation == 1
-    f1 = v1_mean * v1_prime + v2_mean * v2_prime + c_mean^2 * p_prime_scaled / rho_mean
-    f2 = zero(eltype(u))
-    f3 = rho_mean * v1_prime + v1_mean * p_prime_scaled
-  else
-    f1 = zero(eltype(u))
-    f2 = v1_mean * v1_prime + v2_mean * v2_prime + c_mean^2 * p_prime_scaled / rho_mean
-    f3 = rho_mean * v2_prime + v2_mean * p_prime_scaled
-  end
-
-  # The rest of the state variables are actually variable coefficients, hence the flux should be
-  # zero. See https://github.com/trixi-framework/Trixi.jl/issues/358#issuecomment-784828762
-  # for details.
-  f4 = f5 = f6 = f7 = zero(eltype(u))
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7)
+@inline function flux(u, orientation::Integer,
+                      equations::AcousticPerturbationEquations2D)
+    v1_prime, v2_prime, p_prime_scaled = cons2state(u, equations)
+    v1_mean, v2_mean, c_mean, rho_mean = cons2mean(u, equations)
+
+    # Calculate flux for conservative state variables
+    if orientation == 1
+        f1 = v1_mean * v1_prime + v2_mean * v2_prime +
+             c_mean^2 * p_prime_scaled / rho_mean
+        f2 = zero(eltype(u))
+        f3 = rho_mean * v1_prime + v1_mean * p_prime_scaled
+    else
+        f1 = zero(eltype(u))
+        f2 = v1_mean * v1_prime + v2_mean * v2_prime +
+             c_mean^2 * p_prime_scaled / rho_mean
+        f3 = rho_mean * v2_prime + v2_mean * p_prime_scaled
+    end
+
+    # The rest of the state variables are actually variable coefficients, hence the flux should be
+    # zero. See https://github.com/trixi-framework/Trixi.jl/issues/358#issuecomment-784828762
+    # for details.
+    f4 = f5 = f6 = f7 = zero(eltype(u))
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::AcousticPerturbationEquations2D)
-  # Calculate v = v_prime + v_mean
-  v_prime_ll = u_ll[orientation]
-  v_prime_rr = u_rr[orientation]
-  v_mean_ll = u_ll[orientation + 3]
-  v_mean_rr = u_rr[orientation + 3]
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::AcousticPerturbationEquations2D)
+    # Calculate v = v_prime + v_mean
+    v_prime_ll = u_ll[orientation]
+    v_prime_rr = u_rr[orientation]
+    v_mean_ll = u_ll[orientation + 3]
+    v_mean_rr = u_rr[orientation + 3]
 
-  v_ll = v_prime_ll + v_mean_ll
-  v_rr = v_prime_rr + v_mean_rr
+    v_ll = v_prime_ll + v_mean_ll
+    v_rr = v_prime_rr + v_mean_rr
 
-  c_mean_ll = u_ll[6]
-  c_mean_rr = u_rr[6]
+    c_mean_ll = u_ll[6]
+    c_mean_rr = u_rr[6]
 
-  λ_max = max(abs(v_ll), abs(v_rr)) + max(c_mean_ll, c_mean_rr)
+    λ_max = max(abs(v_ll), abs(v_rr)) + max(c_mean_ll, c_mean_rr)
 end
 
-
 # Calculate 1D flux for a single point in the normal direction
 # Note, this directional vector is not normalized
-@inline function flux(u, normal_direction::AbstractVector, equations::AcousticPerturbationEquations2D)
-  v1_prime, v2_prime, p_prime_scaled = cons2state(u, equations)
-  v1_mean, v2_mean, c_mean, rho_mean = cons2mean(u, equations)
-
-  f1 = normal_direction[1] * (v1_mean * v1_prime + v2_mean * v2_prime + c_mean^2 * p_prime_scaled / rho_mean)
-  f2 = normal_direction[2] * (v1_mean * v1_prime + v2_mean * v2_prime + c_mean^2 * p_prime_scaled / rho_mean)
-  f3 = ( normal_direction[1] * (rho_mean * v1_prime + v1_mean * p_prime_scaled)
-       + normal_direction[2] * (rho_mean * v2_prime + v2_mean * p_prime_scaled) )
-
-  # The rest of the state variables are actually variable coefficients, hence the flux should be
-  # zero. See https://github.com/trixi-framework/Trixi.jl/issues/358#issuecomment-784828762
-  # for details.
-  f4 = f5 = f6 = f7 = zero(eltype(u))
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7)
+@inline function flux(u, normal_direction::AbstractVector,
+                      equations::AcousticPerturbationEquations2D)
+    v1_prime, v2_prime, p_prime_scaled = cons2state(u, equations)
+    v1_mean, v2_mean, c_mean, rho_mean = cons2mean(u, equations)
+
+    f1 = normal_direction[1] * (v1_mean * v1_prime + v2_mean * v2_prime +
+          c_mean^2 * p_prime_scaled / rho_mean)
+    f2 = normal_direction[2] * (v1_mean * v1_prime + v2_mean * v2_prime +
+          c_mean^2 * p_prime_scaled / rho_mean)
+    f3 = (normal_direction[1] * (rho_mean * v1_prime + v1_mean * p_prime_scaled)
+          +
+          normal_direction[2] * (rho_mean * v2_prime + v2_mean * p_prime_scaled))
+
+    # The rest of the state variables are actually variable coefficients, hence the flux should be
+    # zero. See https://github.com/trixi-framework/Trixi.jl/issues/358#issuecomment-784828762
+    # for details.
+    f4 = f5 = f6 = f7 = zero(eltype(u))
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::AcousticPerturbationEquations2D)
-  # Calculate v = v_prime + v_mean
-  v_prime_ll = normal_direction[1]*u_ll[1] + normal_direction[2]*u_ll[2]
-  v_prime_rr = normal_direction[1]*u_rr[1] + normal_direction[2]*u_rr[2]
-  v_mean_ll = normal_direction[1]*u_ll[4] + normal_direction[2]*u_ll[5]
-  v_mean_rr = normal_direction[1]*u_rr[4] + normal_direction[2]*u_rr[5]
-
-  v_ll = v_prime_ll + v_mean_ll
-  v_rr = v_prime_rr + v_mean_rr
-
-  c_mean_ll = u_ll[6]
-  c_mean_rr = u_rr[6]
-
-  # The v_normals are already scaled by the norm
-  λ_max = max(abs(v_ll), abs(v_rr)) + max(c_mean_ll, c_mean_rr) * norm(normal_direction)
+@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::AcousticPerturbationEquations2D)
+    # Calculate v = v_prime + v_mean
+    v_prime_ll = normal_direction[1] * u_ll[1] + normal_direction[2] * u_ll[2]
+    v_prime_rr = normal_direction[1] * u_rr[1] + normal_direction[2] * u_rr[2]
+    v_mean_ll = normal_direction[1] * u_ll[4] + normal_direction[2] * u_ll[5]
+    v_mean_rr = normal_direction[1] * u_rr[4] + normal_direction[2] * u_rr[5]
+
+    v_ll = v_prime_ll + v_mean_ll
+    v_rr = v_prime_rr + v_mean_rr
+
+    c_mean_ll = u_ll[6]
+    c_mean_rr = u_rr[6]
+
+    # The v_normals are already scaled by the norm
+    λ_max = max(abs(v_ll), abs(v_rr)) +
+            max(c_mean_ll, c_mean_rr) * norm(normal_direction)
 end
 
-
 # Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the mean values
-@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, orientation_or_normal_direction,
+@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr,
+                                                              orientation_or_normal_direction,
                                                               equations::AcousticPerturbationEquations2D)
-  λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, equations)
-  diss = -0.5 * λ * (u_rr - u_ll)
-  z = zero(eltype(u_ll))
-  return SVector(diss[1], diss[2], diss[3], z, z, z, z)
+    λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction,
+                                  equations)
+    diss = -0.5 * λ * (u_rr - u_ll)
+    z = zero(eltype(u_ll))
+    return SVector(diss[1], diss[2], diss[3], z, z, z, z)
 end
 
-
 @inline have_constant_speed(::AcousticPerturbationEquations2D) = False()
 
 @inline function max_abs_speeds(u, equations::AcousticPerturbationEquations2D)
-  v1_mean = u[4]
-  v2_mean = u[5]
-  c_mean = u[6]
+    v1_mean = u[4]
+    v2_mean = u[5]
+    c_mean = u[6]
 
-  return abs(v1_mean) + c_mean, abs(v2_mean) + c_mean
+    return abs(v1_mean) + c_mean, abs(v2_mean) + c_mean
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::AcousticPerturbationEquations2D)
-  p_prime_scaled = u[3]
-  c_mean = u[6]
-  p_prime = p_prime_scaled * c_mean^2
+    p_prime_scaled = u[3]
+    c_mean = u[6]
+    p_prime = p_prime_scaled * c_mean^2
 
-  return SVector(u[1], u[2], p_prime, u[4], u[5], u[6], u[7])
+    return SVector(u[1], u[2], p_prime, u[4], u[5], u[6], u[7])
 end
 
 # Convert primitive variables to conservative
 @inline function prim2cons(u, equations::AcousticPerturbationEquations2D)
-  p_prime = u[3]
-  c_mean = u[6]
-  p_prime_scaled = p_prime / c_mean^2
+    p_prime = u[3]
+    c_mean = u[6]
+    p_prime_scaled = p_prime / c_mean^2
 
-  return SVector(u[1], u[2], p_prime_scaled, u[4], u[5], u[6], u[7])
+    return SVector(u[1], u[2], p_prime_scaled, u[4], u[5], u[6], u[7])
 end
 
 # Convert conservative variables to entropy variables
 @inline cons2entropy(u, equations::AcousticPerturbationEquations2D) = u
-
-
 end # @muladd
diff --git a/src/equations/compressible_euler_1d.jl b/src/equations/compressible_euler_1d.jl
index f1bb18070e1..f484f26a588 100644
--- a/src/equations/compressible_euler_1d.jl
+++ b/src/equations/compressible_euler_1d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     CompressibleEulerEquations1D(gamma)
@@ -31,34 +31,34 @@ p = (\gamma - 1) \left( \rho e - \frac{1}{2} \rho v_1^2 \right)
 ```
 the pressure.
 """
-struct CompressibleEulerEquations1D{RealT<:Real} <: AbstractCompressibleEulerEquations{1, 3}
-  gamma::RealT               # ratio of specific heats
-  inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
-
-  function CompressibleEulerEquations1D(gamma)
-    γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1))
-    new{typeof(γ)}(γ, inv_gamma_minus_one)
-  end
+struct CompressibleEulerEquations1D{RealT <: Real} <:
+       AbstractCompressibleEulerEquations{1, 3}
+    gamma::RealT               # ratio of specific heats
+    inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
+
+    function CompressibleEulerEquations1D(gamma)
+        γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1))
+        new{typeof(γ)}(γ, inv_gamma_minus_one)
+    end
 end
 
-
-varnames(::typeof(cons2cons), ::CompressibleEulerEquations1D) = ("rho", "rho_v1", "rho_e")
+function varnames(::typeof(cons2cons), ::CompressibleEulerEquations1D)
+    ("rho", "rho_v1", "rho_e")
+end
 varnames(::typeof(cons2prim), ::CompressibleEulerEquations1D) = ("rho", "v1", "p")
 
-
 """
     initial_condition_constant(x, t, equations::CompressibleEulerEquations1D)
 
 A constant initial condition to test free-stream preservation.
 """
 function initial_condition_constant(x, t, equations::CompressibleEulerEquations1D)
-  rho = 1.0
-  rho_v1 = 0.1
-  rho_e = 10.0
-  return SVector(rho, rho_v1, rho_e)
+    rho = 1.0
+    rho_v1 = 0.1
+    rho_e = 10.0
+    return SVector(rho, rho_v1, rho_e)
 end
 
-
 """
     initial_condition_convergence_test(x, t, equations::CompressibleEulerEquations1D)
 
@@ -66,19 +66,20 @@ A smooth initial condition used for convergence tests in combination with
 [`source_terms_convergence_test`](@ref)
 (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
 """
-function initial_condition_convergence_test(x, t, equations::CompressibleEulerEquations1D)
-  c = 2
-  A = 0.1
-  L = 2
-  f = 1/L
-  ω = 2 * pi * f
-  ini = c + A * sin(ω * (x[1] - t))
+function initial_condition_convergence_test(x, t,
+                                            equations::CompressibleEulerEquations1D)
+    c = 2
+    A = 0.1
+    L = 2
+    f = 1 / L
+    ω = 2 * pi * f
+    ini = c + A * sin(ω * (x[1] - t))
 
-  rho = ini
-  rho_v1 = ini
-  rho_e = ini^2
+    rho = ini
+    rho_v1 = ini
+    rho_e = ini^2
 
-  return SVector(rho, rho_v1, rho_e)
+    return SVector(rho, rho_v1, rho_e)
 end
 
 """
@@ -88,32 +89,32 @@ Source terms used for convergence tests in combination with
 [`initial_condition_convergence_test`](@ref)
 (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
 """
-@inline function source_terms_convergence_test(u, x, t, equations::CompressibleEulerEquations1D)
-  # Same settings as in `initial_condition`
-  c = 2
-  A = 0.1
-  L = 2
-  f = 1/L
-  ω = 2 * pi * f
-  γ = equations.gamma
+@inline function source_terms_convergence_test(u, x, t,
+                                               equations::CompressibleEulerEquations1D)
+    # Same settings as in `initial_condition`
+    c = 2
+    A = 0.1
+    L = 2
+    f = 1 / L
+    ω = 2 * pi * f
+    γ = equations.gamma
 
-  x1, = x
+    x1, = x
 
-  si, co = sincos(ω * (x1 - t))
-  rho = c + A * si
-  rho_x = ω * A * co
+    si, co = sincos(ω * (x1 - t))
+    rho = c + A * si
+    rho_x = ω * A * co
 
-  # Note that d/dt rho = -d/dx rho.
-  # This yields du2 = du3 = d/dx p (derivative of pressure).
-  # Other terms vanish because of v = 1.
-  du1 = zero(eltype(u))
-  du2 = rho_x * (2 * rho - 0.5) * (γ - 1)
-  du3 = du2
+    # Note that d/dt rho = -d/dx rho.
+    # This yields du2 = du3 = d/dx p (derivative of pressure).
+    # Other terms vanish because of v = 1.
+    du1 = zero(eltype(u))
+    du2 = rho_x * (2 * rho - 0.5) * (γ - 1)
+    du3 = du2
 
-  return SVector(du1, du2, du3)
+    return SVector(du1, du2, du3)
 end
 
-
 """
     initial_condition_density_wave(x, t, equations::CompressibleEulerEquations1D)
 
@@ -129,15 +130,14 @@ with the following parameters
 - polydeg = 5
 """
 function initial_condition_density_wave(x, t, equations::CompressibleEulerEquations1D)
-  v1 = 0.1
-  rho = 1 + 0.98 * sinpi(2 * (x[1] - t * v1))
-  rho_v1 = rho * v1
-  p = 20
-  rho_e = p / (equations.gamma - 1) + 1/2 * rho * v1^2
-  return SVector(rho, rho_v1, rho_e)
+    v1 = 0.1
+    rho = 1 + 0.98 * sinpi(2 * (x[1] - t * v1))
+    rho_v1 = rho * v1
+    p = 20
+    rho_e = p / (equations.gamma - 1) + 1 / 2 * rho * v1^2
+    return SVector(rho, rho_v1, rho_e)
 end
 
-
 """
     initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerEquations1D)
 
@@ -146,27 +146,27 @@ A weak blast wave taken from
   A provably entropy stable subcell shock capturing approach for high order split form DG
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
-function initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerEquations1D)
-  # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
-  # Set up polar coordinates
-  inicenter = SVector(0.0)
-  x_norm = x[1] - inicenter[1]
-  r = abs(x_norm)
-  # The following code is equivalent to
-  # phi = atan(0.0, x_norm)
-  # cos_phi = cos(phi)
-  # in 1D but faster
-  cos_phi = x_norm > 0 ? one(x_norm) : -one(x_norm)
-
-  # Calculate primitive variables
-  rho = r > 0.5 ? 1.0 : 1.1691
-  v1  = r > 0.5 ? 0.0 : 0.1882 * cos_phi
-  p   = r > 0.5 ? 1.0 : 1.245
-
-  return prim2cons(SVector(rho, v1, p), equations)
+function initial_condition_weak_blast_wave(x, t,
+                                           equations::CompressibleEulerEquations1D)
+    # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
+    # Set up polar coordinates
+    inicenter = SVector(0.0)
+    x_norm = x[1] - inicenter[1]
+    r = abs(x_norm)
+    # The following code is equivalent to
+    # phi = atan(0.0, x_norm)
+    # cos_phi = cos(phi)
+    # in 1D but faster
+    cos_phi = x_norm > 0 ? one(x_norm) : -one(x_norm)
+
+    # Calculate primitive variables
+    rho = r > 0.5 ? 1.0 : 1.1691
+    v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi
+    p = r > 0.5 ? 1.0 : 1.245
+
+    return prim2cons(SVector(rho, v1, p), equations)
 end
 
-
 """
     initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::CompressibleEulerEquations1D)
 
@@ -180,37 +180,36 @@ with self-gravity from
     spatial dimension. Thus, [`source_terms_eoc_test_coupled_euler_gravity`](@ref) is not
     present there.
 """
-function initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::CompressibleEulerEquations1D)
-  # OBS! this assumes that γ = 2 other manufactured source terms are incorrect
-  if equations.gamma != 2.0
-    error("adiabatic constant must be 2 for the coupling convergence test")
-  end
-  c = 2.0
-  A = 0.1
-  ini = c + A * sinpi(x[1] - t)
-  G = 1.0 # gravitational constant
+function initial_condition_eoc_test_coupled_euler_gravity(x, t,
+                                                          equations::CompressibleEulerEquations1D)
+    # OBS! this assumes that γ = 2 other manufactured source terms are incorrect
+    if equations.gamma != 2.0
+        error("adiabatic constant must be 2 for the coupling convergence test")
+    end
+    c = 2.0
+    A = 0.1
+    ini = c + A * sinpi(x[1] - t)
+    G = 1.0 # gravitational constant
 
-  rho = ini
-  v1 = 1.0
-  p = 2 * ini^2 * G / pi # * 2 / ndims, but ndims==1 here
+    rho = ini
+    v1 = 1.0
+    p = 2 * ini^2 * G / pi # * 2 / ndims, but ndims==1 here
 
-  return prim2cons(SVector(rho, v1, p), equations)
+    return prim2cons(SVector(rho, v1, p), equations)
 end
 
-
 # Calculate 1D flux for a single point
 @inline function flux(u, orientation::Integer, equations::CompressibleEulerEquations1D)
-  rho, rho_v1, rho_e = u
-  v1 = rho_v1 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
-  # Ignore orientation since it is always "1" in 1D
-  f1 = rho_v1
-  f2 = rho_v1 * v1 + p
-  f3 = (rho_e + p) * v1
-  return SVector(f1, f2, f3)
+    rho, rho_v1, rho_e = u
+    v1 = rho_v1 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
+    # Ignore orientation since it is always "1" in 1D
+    f1 = rho_v1
+    f2 = rho_v1 * v1 + p
+    f3 = (rho_e + p) * v1
+    return SVector(f1, f2, f3)
 end
 
-
 """
     flux_shima_etal(u_ll, u_rr, orientation, equations::CompressibleEulerEquations1D)
 
@@ -226,28 +225,28 @@ The modification is in the energy flux to guarantee pressure equilibrium and was
   compressible flows
   [DOI: 10.1016/j.jcp.2020.110060](https://doi.org/10.1016/j.jcp.2020.110060)
 """
-@inline function flux_shima_etal(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D)
-  # Unpack left and right state
-  rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations)
+@inline function flux_shima_etal(u_ll, u_rr, orientation::Integer,
+                                 equations::CompressibleEulerEquations1D)
+    # Unpack left and right state
+    rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations)
 
-  # Average each factor of products in flux
-  rho_avg = 1/2 * (rho_ll + rho_rr)
-  v1_avg  = 1/2 * ( v1_ll +  v1_rr)
-  p_avg   = 1/2 * (  p_ll +   p_rr)
-  kin_avg = 1/2 * (v1_ll * v1_rr)
+    # Average each factor of products in flux
+    rho_avg = 1 / 2 * (rho_ll + rho_rr)
+    v1_avg = 1 / 2 * (v1_ll + v1_rr)
+    p_avg = 1 / 2 * (p_ll + p_rr)
+    kin_avg = 1 / 2 * (v1_ll * v1_rr)
 
-  # Calculate fluxes
-  # Ignore orientation since it is always "1" in 1D
-  pv1_avg = 1/2 * (p_ll*v1_rr + p_rr*v1_ll)
-  f1 = rho_avg * v1_avg
-  f2 = f1 * v1_avg + p_avg
-  f3 = p_avg*v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg
+    # Calculate fluxes
+    # Ignore orientation since it is always "1" in 1D
+    pv1_avg = 1 / 2 * (p_ll * v1_rr + p_rr * v1_ll)
+    f1 = rho_avg * v1_avg
+    f2 = f1 * v1_avg + p_avg
+    f3 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg
 
-  return SVector(f1, f2, f3)
+    return SVector(f1, f2, f3)
 end
 
-
 """
     flux_kennedy_gruber(u_ll, u_rr, orientation, equations::CompressibleEulerEquations1D)
 
@@ -257,28 +256,28 @@ Kinetic energy preserving two-point flux by
   Navier-Stokes equations for a compressible fluid
   [DOI: 10.1016/j.jcp.2007.09.020](https://doi.org/10.1016/j.jcp.2007.09.020)
 """
-@inline function flux_kennedy_gruber(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D)
-  # Unpack left and right state
-  rho_e_ll = last(u_ll)
-  rho_e_rr = last(u_rr)
-  rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations)
+@inline function flux_kennedy_gruber(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerEquations1D)
+    # Unpack left and right state
+    rho_e_ll = last(u_ll)
+    rho_e_rr = last(u_rr)
+    rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations)
 
-  # Average each factor of products in flux
-  rho_avg = 1/2 * (rho_ll + rho_rr)
-  v1_avg  = 1/2 * ( v1_ll +  v1_rr)
-  p_avg   = 1/2 * (  p_ll +   p_rr)
-  e_avg   = 1/2 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr)
+    # Average each factor of products in flux
+    rho_avg = 1 / 2 * (rho_ll + rho_rr)
+    v1_avg = 1 / 2 * (v1_ll + v1_rr)
+    p_avg = 1 / 2 * (p_ll + p_rr)
+    e_avg = 1 / 2 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr)
 
-  # Ignore orientation since it is always "1" in 1D
-  f1 = rho_avg * v1_avg
-  f2 = rho_avg * v1_avg * v1_avg + p_avg
-  f3 = (rho_avg * e_avg + p_avg) * v1_avg
+    # Ignore orientation since it is always "1" in 1D
+    f1 = rho_avg * v1_avg
+    f2 = rho_avg * v1_avg * v1_avg + p_avg
+    f3 = (rho_avg * e_avg + p_avg) * v1_avg
 
-  return SVector(f1, f2, f3)
+    return SVector(f1, f2, f3)
 end
 
-
 """
     flux_chandrashekar(u_ll, u_rr, orientation, equations::CompressibleEulerEquations1D)
 
@@ -288,34 +287,35 @@ Entropy conserving two-point flux by
   for Compressible Euler and Navier-Stokes Equations
   [DOI: 10.4208/cicp.170712.010313a](https://doi.org/10.4208/cicp.170712.010313a)
 """
-@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D)
-  # Unpack left and right state
-  rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations)
-  beta_ll = 0.5 * rho_ll / p_ll
-  beta_rr = 0.5 * rho_rr / p_rr
-  specific_kin_ll = 0.5 * (v1_ll^2)
-  specific_kin_rr = 0.5 * (v1_rr^2)
-
-  # Compute the necessary mean values
-  rho_avg = 0.5 * (rho_ll + rho_rr)
-  rho_mean  = ln_mean(rho_ll, rho_rr)
-  beta_mean = ln_mean(beta_ll, beta_rr)
-  beta_avg = 0.5 * (beta_ll + beta_rr)
-  v1_avg = 0.5 * (v1_ll + v1_rr)
-  p_mean = 0.5 * rho_avg / beta_avg
-  velocity_square_avg = specific_kin_ll + specific_kin_rr
-
-  # Calculate fluxes
-  # Ignore orientation since it is always "1" in 1D
-  f1 = rho_mean * v1_avg
-  f2 = f1 * v1_avg + p_mean
-  f3 = f1 * 0.5*(1/(equations.gamma-1)/beta_mean - velocity_square_avg)+f2*v1_avg
-
-  return SVector(f1, f2, f3)
+@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer,
+                                    equations::CompressibleEulerEquations1D)
+    # Unpack left and right state
+    rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations)
+    beta_ll = 0.5 * rho_ll / p_ll
+    beta_rr = 0.5 * rho_rr / p_rr
+    specific_kin_ll = 0.5 * (v1_ll^2)
+    specific_kin_rr = 0.5 * (v1_rr^2)
+
+    # Compute the necessary mean values
+    rho_avg = 0.5 * (rho_ll + rho_rr)
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    beta_mean = ln_mean(beta_ll, beta_rr)
+    beta_avg = 0.5 * (beta_ll + beta_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    p_mean = 0.5 * rho_avg / beta_avg
+    velocity_square_avg = specific_kin_ll + specific_kin_rr
+
+    # Calculate fluxes
+    # Ignore orientation since it is always "1" in 1D
+    f1 = rho_mean * v1_avg
+    f2 = f1 * v1_avg + p_mean
+    f3 = f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - velocity_square_avg) +
+         f2 * v1_avg
+
+    return SVector(f1, f2, f3)
 end
 
-
 """
     flux_ranocha(u_ll, u_rr, orientation_or_normal_direction, equations::CompressibleEulerEquations1D)
 
@@ -330,36 +330,38 @@ See also
   the Euler Equations Using Summation-by-Parts Operators
   [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42)
 """
-@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D)
-  # Unpack left and right state
-  rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations)
-
-  # Compute the necessary mean values
-  rho_mean = ln_mean(rho_ll, rho_rr)
-  # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-  # in exact arithmetic since
-  #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-  #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-  inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-  v1_avg = 0.5 * (v1_ll + v1_rr)
-  p_avg  = 0.5 * (p_ll + p_rr)
-  velocity_square_avg = 0.5 * (v1_ll*v1_rr)
-
-  # Calculate fluxes
-  # Ignore orientation since it is always "1" in 1D
-  f1 = rho_mean * v1_avg
-  f2 = f1 * v1_avg + p_avg
-  f3 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll)
-
-  return SVector(f1, f2, f3)
-end
-
-@inline function flux_ranocha(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations1D)
-  return normal_direction[1] * flux_ranocha(u_ll, u_rr, 1, equations)
+@inline function flux_ranocha(u_ll, u_rr, orientation::Integer,
+                              equations::CompressibleEulerEquations1D)
+    # Unpack left and right state
+    rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations)
+
+    # Compute the necessary mean values
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+    # in exact arithmetic since
+    #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+    #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr)
+
+    # Calculate fluxes
+    # Ignore orientation since it is always "1" in 1D
+    f1 = rho_mean * v1_avg
+    f2 = f1 * v1_avg + p_avg
+    f3 = f1 * (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) +
+         0.5 * (p_ll * v1_rr + p_rr * v1_ll)
+
+    return SVector(f1, f2, f3)
+end
+
+@inline function flux_ranocha(u_ll, u_rr, normal_direction::AbstractVector,
+                              equations::CompressibleEulerEquations1D)
+    return normal_direction[1] * flux_ranocha(u_ll, u_rr, 1, equations)
 end
 
-
 """
     splitting_steger_warming(u, orientation::Integer,
                              equations::CompressibleEulerEquations1D)
@@ -386,43 +388,43 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
 """
 @inline function splitting_steger_warming(u, orientation::Integer,
                                           equations::CompressibleEulerEquations1D)
-  fm = splitting_steger_warming(u, Val{:minus}(), orientation, equations)
-  fp = splitting_steger_warming(u, Val{:plus}(),  orientation, equations)
-  return fm, fp
+    fm = splitting_steger_warming(u, Val{:minus}(), orientation, equations)
+    fp = splitting_steger_warming(u, Val{:plus}(), orientation, equations)
+    return fm, fp
 end
 
 @inline function splitting_steger_warming(u, ::Val{:plus}, orientation::Integer,
                                           equations::CompressibleEulerEquations1D)
-  rho, rho_v1, rho_e = u
-  v1 = rho_v1 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
-  a = sqrt(equations.gamma * p / rho)
+    rho, rho_v1, rho_e = u
+    v1 = rho_v1 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
+    a = sqrt(equations.gamma * p / rho)
 
-  lambda1 = v1
-  lambda2 = v1 + a
-  lambda3 = v1 - a
+    lambda1 = v1
+    lambda2 = v1 + a
+    lambda3 = v1 - a
 
-  lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :)
-  lambda2_p = positive_part(lambda2)
-  lambda3_p = positive_part(lambda3)
+    lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :)
+    lambda2_p = positive_part(lambda2)
+    lambda3_p = positive_part(lambda3)
 
-  alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p
+    alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p
 
-  rho_2gamma = 0.5 * rho / equations.gamma
-  f1p = rho_2gamma * alpha_p
-  f2p = rho_2gamma * (alpha_p * v1 + a * (lambda2_p - lambda3_p))
-  f3p = rho_2gamma * (alpha_p * 0.5 * v1^2 + a * v1 * (lambda2_p - lambda3_p)
-                      + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one)
+    rho_2gamma = 0.5 * rho / equations.gamma
+    f1p = rho_2gamma * alpha_p
+    f2p = rho_2gamma * (alpha_p * v1 + a * (lambda2_p - lambda3_p))
+    f3p = rho_2gamma * (alpha_p * 0.5 * v1^2 + a * v1 * (lambda2_p - lambda3_p)
+           + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one)
 
-  return SVector(f1p, f2p, f3p)
+    return SVector(f1p, f2p, f3p)
 end
 
 @inline function splitting_steger_warming(u, ::Val{:minus}, orientation::Integer,
                                           equations::CompressibleEulerEquations1D)
-  rho, rho_v1, rho_e = u
-  v1 = rho_v1 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
-  a = sqrt(equations.gamma * p / rho)
+    rho, rho_v1, rho_e = u
+    v1 = rho_v1 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
+    a = sqrt(equations.gamma * p / rho)
 
     lambda1 = v1
     lambda2 = v1 + a
@@ -438,12 +440,11 @@ end
     f1m = rho_2gamma * alpha_m
     f2m = rho_2gamma * (alpha_m * v1 + a * (lambda2_m - lambda3_m))
     f3m = rho_2gamma * (alpha_m * 0.5 * v1^2 + a * v1 * (lambda2_m - lambda3_m)
-                        + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one)
+           + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one)
 
-  return SVector(f1m, f2m, f3m)
+    return SVector(f1m, f2m, f3m)
 end
 
-
 """
     splitting_vanleer_haenel(u, orientation::Integer,
                              equations::CompressibleEulerEquations1D)
@@ -480,56 +481,55 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
 """
 @inline function splitting_vanleer_haenel(u, orientation::Integer,
                                           equations::CompressibleEulerEquations1D)
-  fm = splitting_vanleer_haenel(u, Val{:minus}(), orientation, equations)
-  fp = splitting_vanleer_haenel(u, Val{:plus}(),  orientation, equations)
-  return fm, fp
+    fm = splitting_vanleer_haenel(u, Val{:minus}(), orientation, equations)
+    fp = splitting_vanleer_haenel(u, Val{:plus}(), orientation, equations)
+    return fm, fp
 end
 
 @inline function splitting_vanleer_haenel(u, ::Val{:plus}, orientation::Integer,
                                           equations::CompressibleEulerEquations1D)
-  rho, rho_v1, rho_e = u
-  v1 = rho_v1 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
+    rho, rho_v1, rho_e = u
+    v1 = rho_v1 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
 
-  # sound speed and enthalpy
-  a = sqrt(equations.gamma * p / rho)
-  H = (rho_e + p) / rho
+    # sound speed and enthalpy
+    a = sqrt(equations.gamma * p / rho)
+    H = (rho_e + p) / rho
 
-  # signed Mach number
-  M = v1 / a
+    # signed Mach number
+    M = v1 / a
 
-  p_plus = 0.5 * (1 + equations.gamma * M) * p
+    p_plus = 0.5 * (1 + equations.gamma * M) * p
 
-  f1p = 0.25 * rho * a * (M + 1)^2
-  f2p = f1p * v1 + p_plus
-  f3p = f1p * H
+    f1p = 0.25 * rho * a * (M + 1)^2
+    f2p = f1p * v1 + p_plus
+    f3p = f1p * H
 
-  return SVector(f1p, f2p, f3p)
+    return SVector(f1p, f2p, f3p)
 end
 
 @inline function splitting_vanleer_haenel(u, ::Val{:minus}, orientation::Integer,
                                           equations::CompressibleEulerEquations1D)
-  rho, rho_v1, rho_e = u
-  v1 = rho_v1 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
+    rho, rho_v1, rho_e = u
+    v1 = rho_v1 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
 
-  # sound speed and enthalpy
-  a = sqrt(equations.gamma * p / rho)
-  H = (rho_e + p) / rho
+    # sound speed and enthalpy
+    a = sqrt(equations.gamma * p / rho)
+    H = (rho_e + p) / rho
 
-  # signed Mach number
-  M = v1 / a
+    # signed Mach number
+    M = v1 / a
 
-  p_minus = 0.5 * (1 - equations.gamma * M) * p
+    p_minus = 0.5 * (1 - equations.gamma * M) * p
 
-  f1m= -0.25 * rho * a * (M - 1)^2
-  f2m = f1m * v1 + p_minus
-  f3m = f1m * H
+    f1m = -0.25 * rho * a * (M - 1)^2
+    f2m = f1m * v1 + p_minus
+    f3m = f1m * H
 
-  return SVector(f1m, f2m, f3m)
+    return SVector(f1m, f2m, f3m)
 end
 
-
 # TODO: FD
 # This splitting is interesting because it can handle the "el diablo" wave
 # for long time runs. Computing the eigenvalues of the operator we see
@@ -568,99 +568,98 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
   [DOI: 10.2514/6.1991-1566](https://doi.org/10.2514/6.1991-1566)
 """
 @inline function splitting_coirier_vanleer(u, orientation::Integer,
-                                          equations::CompressibleEulerEquations1D)
-  fm = splitting_coirier_vanleer(u, Val{:minus}(), orientation, equations)
-  fp = splitting_coirier_vanleer(u, Val{:plus}(),  orientation, equations)
-  return fm, fp
+                                           equations::CompressibleEulerEquations1D)
+    fm = splitting_coirier_vanleer(u, Val{:minus}(), orientation, equations)
+    fp = splitting_coirier_vanleer(u, Val{:plus}(), orientation, equations)
+    return fm, fp
 end
 
 @inline function splitting_coirier_vanleer(u, ::Val{:plus}, orientation::Integer,
-                                            equations::CompressibleEulerEquations1D)
-  rho, rho_v1, rho_e = u
-  v1 = rho_v1 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
+                                           equations::CompressibleEulerEquations1D)
+    rho, rho_v1, rho_e = u
+    v1 = rho_v1 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
 
-  # sound speed and enthalpy
-  a = sqrt(equations.gamma * p / rho)
-  H = (rho_e + p) / rho
+    # sound speed and enthalpy
+    a = sqrt(equations.gamma * p / rho)
+    H = (rho_e + p) / rho
 
-  # signed Mach number
-  M = v1 / a
+    # signed Mach number
+    M = v1 / a
 
-  P = 2
-  mu = 1.0
-  nu = 0.75
-  omega = 2.0 # adjusted from suggested value of 1.5
+    P = 2
+    mu = 1.0
+    nu = 0.75
+    omega = 2.0 # adjusted from suggested value of 1.5
 
-  p_plus = 0.25 * ((M + 1)^2 * (2 - M) - nu * M * (M^2 - 1)^P) * p
+    p_plus = 0.25 * ((M + 1)^2 * (2 - M) - nu * M * (M^2 - 1)^P) * p
 
-  f1p = 0.25 * rho * a * ((M + 1)^2 - mu * (M^2 - 1)^P)
-  f2p = f1p * v1 + p_plus
-  f3p = f1p * H - omega * rho * a^3 * M^2 * (M^2 - 1)^2
+    f1p = 0.25 * rho * a * ((M + 1)^2 - mu * (M^2 - 1)^P)
+    f2p = f1p * v1 + p_plus
+    f3p = f1p * H - omega * rho * a^3 * M^2 * (M^2 - 1)^2
 
-  return SVector(f1p, f2p, f3p)
+    return SVector(f1p, f2p, f3p)
 end
 
 @inline function splitting_coirier_vanleer(u, ::Val{:minus}, orientation::Integer,
-                                            equations::CompressibleEulerEquations1D)
-  rho, rho_v1, rho_e = u
-  v1 = rho_v1 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
+                                           equations::CompressibleEulerEquations1D)
+    rho, rho_v1, rho_e = u
+    v1 = rho_v1 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
 
-  # sound speed and enthalpy
-  a = sqrt(equations.gamma * p / rho)
-  H = (rho_e + p) / rho
+    # sound speed and enthalpy
+    a = sqrt(equations.gamma * p / rho)
+    H = (rho_e + p) / rho
 
-  # signed Mach number
-  M = v1 / a
+    # signed Mach number
+    M = v1 / a
 
-  P = 2
-  mu = 1.0
-  nu = 0.75
-  omega = 2.0 # adjusted from suggested value of 1.5
+    P = 2
+    mu = 1.0
+    nu = 0.75
+    omega = 2.0 # adjusted from suggested value of 1.5
 
-  p_minus = 0.25 * ((M - 1)^2 * (2 + M) + nu * M * (M^2 - 1)^P) * p
+    p_minus = 0.25 * ((M - 1)^2 * (2 + M) + nu * M * (M^2 - 1)^P) * p
 
-  f1m = -0.25 * rho * a * ((M - 1)^2 - mu * (M^2 - 1)^P)
-  f2m = f1m * v1 + p_minus
-  f3m = f1m * H + omega * rho * a^3 * M^2 * (M^2 - 1)^2
+    f1m = -0.25 * rho * a * ((M - 1)^2 - mu * (M^2 - 1)^P)
+    f2m = f1m * v1 + p_minus
+    f3m = f1m * H + omega * rho * a^3 * M^2 * (M^2 - 1)^2
 
-  return SVector(f1m, f2m, f3m)
+    return SVector(f1m, f2m, f3m)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the
 # maximum velocity magnitude plus the maximum speed of sound
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D)
-  rho_ll, rho_v1_ll, rho_e_ll = u_ll
-  rho_rr, rho_v1_rr, rho_e_rr = u_rr
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerEquations1D)
+    rho_ll, rho_v1_ll, rho_e_ll = u_ll
+    rho_rr, rho_v1_rr, rho_e_rr = u_rr
 
-  # Calculate primitive variables and speed of sound
-  v1_ll = rho_v1_ll / rho_ll
-  v_mag_ll = abs(v1_ll)
-  p_ll = (equations.gamma - 1) * (rho_e_ll - 1/2 * rho_ll * v_mag_ll^2)
-  c_ll = sqrt(equations.gamma * p_ll / rho_ll)
-  v1_rr = rho_v1_rr / rho_rr
-  v_mag_rr = abs(v1_rr)
-  p_rr = (equations.gamma - 1) * (rho_e_rr - 1/2 * rho_rr * v_mag_rr^2)
-  c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+    # Calculate primitive variables and speed of sound
+    v1_ll = rho_v1_ll / rho_ll
+    v_mag_ll = abs(v1_ll)
+    p_ll = (equations.gamma - 1) * (rho_e_ll - 1 / 2 * rho_ll * v_mag_ll^2)
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+    v1_rr = rho_v1_rr / rho_rr
+    v_mag_rr = abs(v1_rr)
+    p_rr = (equations.gamma - 1) * (rho_e_rr - 1 / 2 * rho_rr * v_mag_rr^2)
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
 
-  λ_max = max(v_mag_ll, v_mag_rr) + max(c_ll, c_rr)
+    λ_max = max(v_mag_ll, v_mag_rr) + max(c_ll, c_rr)
 end
 
-
 # Calculate minimum and maximum wave speeds for HLL-type fluxes
-@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D)
-  rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations)
+@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerEquations1D)
+    rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations)
 
-  λ_min = v1_ll - sqrt(equations.gamma * p_ll / rho_ll)
-  λ_max = v1_rr + sqrt(equations.gamma * p_rr / rho_rr)
+    λ_min = v1_ll - sqrt(equations.gamma * p_ll / rho_ll)
+    λ_max = v1_rr + sqrt(equations.gamma * p_rr / rho_rr)
 
-  return λ_min, λ_max
+    return λ_min, λ_max
 end
 
-
 """
     flux_hllc(u_ll, u_rr, orientation, equations::CompressibleEulerEquations1D)
 
@@ -668,79 +667,80 @@ Computes the HLLC flux (HLL with Contact) for compressible Euler equations devel
 [Lecture slides](http://www.prague-sum.com/download/2012/Toro_2-HLLC-RiemannSolver.pdf)
 Signal speeds: [DOI: 10.1137/S1064827593260140](https://doi.org/10.1137/S1064827593260140)
 """
-function flux_hllc(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D)
-  # Calculate primitive variables and speed of sound
-  rho_ll, rho_v1_ll, rho_e_ll = u_ll
-  rho_rr, rho_v1_rr, rho_e_rr = u_rr
-
-  v1_ll = rho_v1_ll / rho_ll
-  e_ll  = rho_e_ll / rho_ll
-  p_ll = (equations.gamma - 1) * (rho_e_ll - 1/2 * rho_ll * v1_ll^2)
-  c_ll = sqrt(equations.gamma*p_ll/rho_ll)
-
-  v1_rr = rho_v1_rr / rho_rr
-  e_rr  = rho_e_rr / rho_rr
-  p_rr = (equations.gamma - 1) * (rho_e_rr - 1/2 * rho_rr * v1_rr^2 )
-  c_rr = sqrt(equations.gamma*p_rr/rho_rr)
-
-  # Obtain left and right fluxes
-  f_ll = flux(u_ll, orientation, equations)
-  f_rr = flux(u_rr, orientation, equations)
-
-  # Compute Roe averages
-  sqrt_rho_ll = sqrt(rho_ll)
-  sqrt_rho_rr = sqrt(rho_rr)
-  sum_sqrt_rho = sqrt_rho_ll + sqrt_rho_rr
-  vel_L = v1_ll
-  vel_R = v1_rr
-  vel_roe = (sqrt_rho_ll * vel_L + sqrt_rho_rr * vel_R) / sum_sqrt_rho
-  ekin_roe = 0.5 * vel_roe^2
-  H_ll = (rho_e_ll + p_ll) / rho_ll
-  H_rr = (rho_e_rr + p_rr) / rho_rr
-  H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) / sum_sqrt_rho
-  c_roe = sqrt((equations.gamma - 1) * (H_roe - ekin_roe))
-
-  Ssl = min(vel_L - c_ll, vel_roe - c_roe)
-  Ssr = max(vel_R + c_rr, vel_roe + c_roe)
-  sMu_L = Ssl - vel_L
-  sMu_R = Ssr - vel_R
-  if Ssl >= 0.0
-    f1 = f_ll[1]
-    f2 = f_ll[2]
-    f3 = f_ll[3]
-  elseif Ssr <= 0.0
-    f1 = f_rr[1]
-    f2 = f_rr[2]
-    f3 = f_rr[3]
-  else
-    SStar = (p_rr - p_ll + rho_ll*vel_L*sMu_L - rho_rr*vel_R*sMu_R) / (rho_ll*sMu_L - rho_rr*sMu_R)
-    if Ssl <= 0.0 <= SStar
-      densStar = rho_ll*sMu_L / (Ssl-SStar)
-      enerStar = e_ll + (SStar - vel_L) * (SStar + p_ll / (rho_ll * sMu_L))
-      UStar1 = densStar
-      UStar2 = densStar*SStar
-      UStar3 = densStar*enerStar
-
-      f1 = f_ll[1]+Ssl*(UStar1 - rho_ll)
-      f2 = f_ll[2]+Ssl*(UStar2 - rho_v1_ll)
-      f3 = f_ll[3]+Ssl*(UStar3 - rho_e_ll)
+function flux_hllc(u_ll, u_rr, orientation::Integer,
+                   equations::CompressibleEulerEquations1D)
+    # Calculate primitive variables and speed of sound
+    rho_ll, rho_v1_ll, rho_e_ll = u_ll
+    rho_rr, rho_v1_rr, rho_e_rr = u_rr
+
+    v1_ll = rho_v1_ll / rho_ll
+    e_ll = rho_e_ll / rho_ll
+    p_ll = (equations.gamma - 1) * (rho_e_ll - 1 / 2 * rho_ll * v1_ll^2)
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+
+    v1_rr = rho_v1_rr / rho_rr
+    e_rr = rho_e_rr / rho_rr
+    p_rr = (equations.gamma - 1) * (rho_e_rr - 1 / 2 * rho_rr * v1_rr^2)
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+
+    # Obtain left and right fluxes
+    f_ll = flux(u_ll, orientation, equations)
+    f_rr = flux(u_rr, orientation, equations)
+
+    # Compute Roe averages
+    sqrt_rho_ll = sqrt(rho_ll)
+    sqrt_rho_rr = sqrt(rho_rr)
+    sum_sqrt_rho = sqrt_rho_ll + sqrt_rho_rr
+    vel_L = v1_ll
+    vel_R = v1_rr
+    vel_roe = (sqrt_rho_ll * vel_L + sqrt_rho_rr * vel_R) / sum_sqrt_rho
+    ekin_roe = 0.5 * vel_roe^2
+    H_ll = (rho_e_ll + p_ll) / rho_ll
+    H_rr = (rho_e_rr + p_rr) / rho_rr
+    H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) / sum_sqrt_rho
+    c_roe = sqrt((equations.gamma - 1) * (H_roe - ekin_roe))
+
+    Ssl = min(vel_L - c_ll, vel_roe - c_roe)
+    Ssr = max(vel_R + c_rr, vel_roe + c_roe)
+    sMu_L = Ssl - vel_L
+    sMu_R = Ssr - vel_R
+    if Ssl >= 0.0
+        f1 = f_ll[1]
+        f2 = f_ll[2]
+        f3 = f_ll[3]
+    elseif Ssr <= 0.0
+        f1 = f_rr[1]
+        f2 = f_rr[2]
+        f3 = f_rr[3]
     else
-      densStar = rho_rr*sMu_R / (Ssr-SStar)
-      enerStar = e_rr + (SStar - vel_R) * (SStar + p_rr / (rho_rr * sMu_R))
-      UStar1 = densStar
-      UStar2 = densStar*SStar
-      UStar3 = densStar*enerStar
-
-      #end
-      f1 = f_rr[1]+Ssr*(UStar1 - rho_rr)
-      f2 = f_rr[2]+Ssr*(UStar2 - rho_v1_rr)
-      f3 = f_rr[3]+Ssr*(UStar3 - rho_e_rr)
+        SStar = (p_rr - p_ll + rho_ll * vel_L * sMu_L - rho_rr * vel_R * sMu_R) /
+                (rho_ll * sMu_L - rho_rr * sMu_R)
+        if Ssl <= 0.0 <= SStar
+            densStar = rho_ll * sMu_L / (Ssl - SStar)
+            enerStar = e_ll + (SStar - vel_L) * (SStar + p_ll / (rho_ll * sMu_L))
+            UStar1 = densStar
+            UStar2 = densStar * SStar
+            UStar3 = densStar * enerStar
+
+            f1 = f_ll[1] + Ssl * (UStar1 - rho_ll)
+            f2 = f_ll[2] + Ssl * (UStar2 - rho_v1_ll)
+            f3 = f_ll[3] + Ssl * (UStar3 - rho_e_ll)
+        else
+            densStar = rho_rr * sMu_R / (Ssr - SStar)
+            enerStar = e_rr + (SStar - vel_R) * (SStar + p_rr / (rho_rr * sMu_R))
+            UStar1 = densStar
+            UStar2 = densStar * SStar
+            UStar3 = densStar * enerStar
+
+            #end
+            f1 = f_rr[1] + Ssr * (UStar1 - rho_rr)
+            f2 = f_rr[2] + Ssr * (UStar2 - rho_v1_rr)
+            f3 = f_rr[3] + Ssr * (UStar3 - rho_e_rr)
+        end
     end
-  end
-  return SVector(f1, f2, f3)
+    return SVector(f1, f2, f3)
 end
 
-
 """
     flux_hlle(u_ll, u_rr, orientation, equations::CompressibleEulerEquations1D)
 
@@ -759,195 +759,189 @@ Compactly summarized:
   Numerical methods for conservation laws and related equations.
   [Link](https://metaphor.ethz.ch/x/2019/hs/401-4671-00L/literature/mishra_hyperbolic_pdes.pdf)
 """
-function flux_hlle(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations1D)
-  # Calculate primitive variables, enthalpy and speed of sound
-  rho_ll, v_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v_rr, p_rr = cons2prim(u_rr, equations)
-
-  # `u_ll[3]` is total energy `rho_e_ll` on the left
-  H_ll = (u_ll[3] + p_ll) / rho_ll
-  c_ll = sqrt(equations.gamma * p_ll / rho_ll)
-
-  # `u_rr[3]` is total energy `rho_e_rr` on the right
-  H_rr = (u_rr[3] + p_rr) / rho_rr
-  c_rr = sqrt(equations.gamma * p_rr / rho_rr)
-
-  # Compute Roe averages
-  sqrt_rho_ll = sqrt(rho_ll)
-  sqrt_rho_rr = sqrt(rho_rr)
-  inv_sum_sqrt_rho = inv(sqrt_rho_ll + sqrt_rho_rr)
-
-  v_roe = (sqrt_rho_ll * v_ll + sqrt_rho_rr * v_rr) * inv_sum_sqrt_rho
-  v_roe_mag = v_roe^2
-
-  H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) * inv_sum_sqrt_rho
-  c_roe = sqrt((equations.gamma - 1) * (H_roe - 0.5 * v_roe_mag))
-
-  # Compute convenience constant for positivity preservation, see
-  # https://doi.org/10.1016/0021-9991(91)90211-3
-  beta = sqrt(0.5 * (equations.gamma - 1) / equations.gamma)
-
-  # Estimate the edges of the Riemann fan (with positivity conservation)
-  SsL = min(v_roe - c_roe, v_ll - beta * c_ll, zero(v_roe))
-  SsR = max(v_roe + c_roe, v_rr + beta * c_rr, zero(v_roe))
-
-  if SsL >= 0.0 && SsR > 0.0
-    # Positive supersonic speed
-    f_ll = flux(u_ll, orientation, equations)
-
-    f1 = f_ll[1]
-    f2 = f_ll[2]
-    f3 = f_ll[3]
-  elseif SsR <= 0.0 && SsL < 0.0
-    # Negative supersonic speed
-    f_rr = flux(u_rr, orientation, equations)
-
-    f1 = f_rr[1]
-    f2 = f_rr[2]
-    f3 = f_rr[3]
-  else
-    # Subsonic case
-    # Compute left and right fluxes
-    f_ll = flux(u_ll, orientation, equations)
-    f_rr = flux(u_rr, orientation, equations)
-
-    f1 = (SsR * f_ll[1] - SsL * f_rr[1] + SsL * SsR * (u_rr[1] - u_ll[1])) / (SsR - SsL)
-    f2 = (SsR * f_ll[2] - SsL * f_rr[2] + SsL * SsR * (u_rr[2] - u_ll[2])) / (SsR - SsL)
-    f3 = (SsR * f_ll[3] - SsL * f_rr[3] + SsL * SsR * (u_rr[3] - u_ll[3])) / (SsR - SsL)
-  end
+function flux_hlle(u_ll, u_rr, orientation::Integer,
+                   equations::CompressibleEulerEquations1D)
+    # Calculate primitive variables, enthalpy and speed of sound
+    rho_ll, v_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v_rr, p_rr = cons2prim(u_rr, equations)
+
+    # `u_ll[3]` is total energy `rho_e_ll` on the left
+    H_ll = (u_ll[3] + p_ll) / rho_ll
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+
+    # `u_rr[3]` is total energy `rho_e_rr` on the right
+    H_rr = (u_rr[3] + p_rr) / rho_rr
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+
+    # Compute Roe averages
+    sqrt_rho_ll = sqrt(rho_ll)
+    sqrt_rho_rr = sqrt(rho_rr)
+    inv_sum_sqrt_rho = inv(sqrt_rho_ll + sqrt_rho_rr)
+
+    v_roe = (sqrt_rho_ll * v_ll + sqrt_rho_rr * v_rr) * inv_sum_sqrt_rho
+    v_roe_mag = v_roe^2
+
+    H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) * inv_sum_sqrt_rho
+    c_roe = sqrt((equations.gamma - 1) * (H_roe - 0.5 * v_roe_mag))
+
+    # Compute convenience constant for positivity preservation, see
+    # https://doi.org/10.1016/0021-9991(91)90211-3
+    beta = sqrt(0.5 * (equations.gamma - 1) / equations.gamma)
+
+    # Estimate the edges of the Riemann fan (with positivity conservation)
+    SsL = min(v_roe - c_roe, v_ll - beta * c_ll, zero(v_roe))
+    SsR = max(v_roe + c_roe, v_rr + beta * c_rr, zero(v_roe))
+
+    if SsL >= 0.0 && SsR > 0.0
+        # Positive supersonic speed
+        f_ll = flux(u_ll, orientation, equations)
+
+        f1 = f_ll[1]
+        f2 = f_ll[2]
+        f3 = f_ll[3]
+    elseif SsR <= 0.0 && SsL < 0.0
+        # Negative supersonic speed
+        f_rr = flux(u_rr, orientation, equations)
+
+        f1 = f_rr[1]
+        f2 = f_rr[2]
+        f3 = f_rr[3]
+    else
+        # Subsonic case
+        # Compute left and right fluxes
+        f_ll = flux(u_ll, orientation, equations)
+        f_rr = flux(u_rr, orientation, equations)
+
+        f1 = (SsR * f_ll[1] - SsL * f_rr[1] + SsL * SsR * (u_rr[1] - u_ll[1])) /
+             (SsR - SsL)
+        f2 = (SsR * f_ll[2] - SsL * f_rr[2] + SsL * SsR * (u_rr[2] - u_ll[2])) /
+             (SsR - SsL)
+        f3 = (SsR * f_ll[3] - SsL * f_rr[3] + SsL * SsR * (u_rr[3] - u_ll[3])) /
+             (SsR - SsL)
+    end
 
-  return SVector(f1, f2, f3)
+    return SVector(f1, f2, f3)
 end
 
-
 @inline function max_abs_speeds(u, equations::CompressibleEulerEquations1D)
-  rho, rho_v1, rho_e = u
-  v1 = rho_v1 / rho
-  p = (equations.gamma - 1) * (rho_e - 1/2 * rho * v1^2)
-  c = sqrt(equations.gamma * p / rho)
+    rho, rho_v1, rho_e = u
+    v1 = rho_v1 / rho
+    p = (equations.gamma - 1) * (rho_e - 1 / 2 * rho * v1^2)
+    c = sqrt(equations.gamma * p / rho)
 
-  return (abs(v1) + c,)
+    return (abs(v1) + c,)
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::CompressibleEulerEquations1D)
-  rho, rho_v1, rho_e = u
+    rho, rho_v1, rho_e = u
 
-  v1 = rho_v1 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
+    v1 = rho_v1 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1 * v1)
 
-  return SVector(rho, v1, p)
+    return SVector(rho, v1, p)
 end
 
-
 # Convert conservative variables to entropy
 @inline function cons2entropy(u, equations::CompressibleEulerEquations1D)
-  rho, rho_v1, rho_e = u
+    rho, rho_v1, rho_e = u
 
-  v1 = rho_v1 / rho
-  v_square = v1^2
-  p = (equations.gamma - 1) * (rho_e - 0.5 * rho * v_square)
-  s = log(p) - equations.gamma*log(rho)
-  rho_p = rho / p
+    v1 = rho_v1 / rho
+    v_square = v1^2
+    p = (equations.gamma - 1) * (rho_e - 0.5 * rho * v_square)
+    s = log(p) - equations.gamma * log(rho)
+    rho_p = rho / p
 
-  w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square
-  w2 = rho_p * v1
-  w3 = -rho_p
+    w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square
+    w2 = rho_p * v1
+    w3 = -rho_p
 
-  return SVector(w1, w2, w3)
+    return SVector(w1, w2, w3)
 end
 
 @inline function entropy2cons(w, equations::CompressibleEulerEquations1D)
-  # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
-  # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
-  @unpack gamma = equations
+    # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
+    # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
+    @unpack gamma = equations
 
-  # convert to entropy `-rho * s` used by Hughes, France, Mallet (1986)
-  # instead of `-rho * s / (gamma - 1)`
-  V1, V2, V5 = w .* (gamma - 1)
+    # convert to entropy `-rho * s` used by Hughes, France, Mallet (1986)
+    # instead of `-rho * s / (gamma - 1)`
+    V1, V2, V5 = w .* (gamma - 1)
 
-  # specific entropy, eq. (53)
-  s = gamma - V1 + 0.5 * (V2^2) / V5
+    # specific entropy, eq. (53)
+    s = gamma - V1 + 0.5 * (V2^2) / V5
 
-  # eq. (52)
-  energy_internal = ((gamma - 1) / (-V5)^gamma)^(equations.inv_gamma_minus_one) * exp(-s * equations.inv_gamma_minus_one)
+    # eq. (52)
+    energy_internal = ((gamma - 1) / (-V5)^gamma)^(equations.inv_gamma_minus_one) *
+                      exp(-s * equations.inv_gamma_minus_one)
 
-  # eq. (51)
-  rho    = -V5 * energy_internal
-  rho_v1 = V2 * energy_internal
-  rho_e  = (1 - 0.5 * (V2^2) / V5) * energy_internal
-  return SVector(rho, rho_v1, rho_e)
+    # eq. (51)
+    rho = -V5 * energy_internal
+    rho_v1 = V2 * energy_internal
+    rho_e = (1 - 0.5 * (V2^2) / V5) * energy_internal
+    return SVector(rho, rho_v1, rho_e)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::CompressibleEulerEquations1D)
-  rho, v1, p = prim
-  rho_v1 = rho * v1
-  rho_e  = p * equations.inv_gamma_minus_one + 0.5 * (rho_v1 * v1)
-  return SVector(rho, rho_v1, rho_e)
+    rho, v1, p = prim
+    rho_v1 = rho * v1
+    rho_e = p * equations.inv_gamma_minus_one + 0.5 * (rho_v1 * v1)
+    return SVector(rho, rho_v1, rho_e)
 end
 
-
 @inline function density(u, equations::CompressibleEulerEquations1D)
- rho = u[1]
- return rho
+    rho = u[1]
+    return rho
 end
 
 @inline function pressure(u, equations::CompressibleEulerEquations1D)
- rho, rho_v1, rho_e = u
- p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2) / rho)
- return p
+    rho, rho_v1, rho_e = u
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2) / rho)
+    return p
 end
 
-
 @inline function density_pressure(u, equations::CompressibleEulerEquations1D)
- rho, rho_v1, rho_e = u
- rho_times_p = (equations.gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2))
- return rho_times_p
+    rho, rho_v1, rho_e = u
+    rho_times_p = (equations.gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2))
+    return rho_times_p
 end
 
-
 # Calculate thermodynamic entropy for a conservative state `cons`
 @inline function entropy_thermodynamic(cons, equations::CompressibleEulerEquations1D)
-  # Pressure
-  p = (equations.gamma - 1) * (cons[3] - 1/2 * (cons[2]^2) / cons[1])
+    # Pressure
+    p = (equations.gamma - 1) * (cons[3] - 1 / 2 * (cons[2]^2) / cons[1])
 
-  # Thermodynamic entropy
-  s = log(p) - equations.gamma*log(cons[1])
+    # Thermodynamic entropy
+    s = log(p) - equations.gamma * log(cons[1])
 
-  return s
+    return s
 end
 
-
 # Calculate mathematical entropy for a conservative state `cons`
 @inline function entropy_math(cons, equations::CompressibleEulerEquations1D)
-  # Mathematical entropy
-  S = -entropy_thermodynamic(cons, equations) * cons[1] * equations.inv_gamma_minus_one
+    # Mathematical entropy
+    S = -entropy_thermodynamic(cons, equations) * cons[1] *
+        equations.inv_gamma_minus_one
 
-  return S
+    return S
 end
 
-
 # Default entropy is the mathematical entropy
-@inline entropy(cons, equations::CompressibleEulerEquations1D) = entropy_math(cons, equations)
-
+@inline function entropy(cons, equations::CompressibleEulerEquations1D)
+    entropy_math(cons, equations)
+end
 
 # Calculate total energy for a conservative state `cons`
 @inline energy_total(cons, ::CompressibleEulerEquations1D) = cons[3]
 
-
 # Calculate kinetic energy for a conservative state `cons`
 @inline function energy_kinetic(cons, equations::CompressibleEulerEquations1D)
-  return 0.5 * (cons[2]^2)/cons[1]
+    return 0.5 * (cons[2]^2) / cons[1]
 end
 
-
 # Calculate internal energy for a conservative state `cons`
 @inline function energy_internal(cons, equations::CompressibleEulerEquations1D)
-  return energy_total(cons, equations) - energy_kinetic(cons, equations)
+    return energy_total(cons, equations) - energy_kinetic(cons, equations)
 end
-
-
 end # @muladd
diff --git a/src/equations/compressible_euler_2d.jl b/src/equations/compressible_euler_2d.jl
index 4877f6891c1..66e3c7bff84 100644
--- a/src/equations/compressible_euler_2d.jl
+++ b/src/equations/compressible_euler_2d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     CompressibleEulerEquations2D(gamma)
@@ -37,21 +37,22 @@ p = (\gamma - 1) \left( \rho e - \frac{1}{2} \rho (v_1^2+v_2^2) \right)
 ```
 the pressure.
 """
-struct CompressibleEulerEquations2D{RealT<:Real} <: AbstractCompressibleEulerEquations{2, 4}
-  gamma::RealT               # ratio of specific heats
-  inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
-
-  function CompressibleEulerEquations2D(gamma)
-    γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1))
-    new{typeof(γ)}(γ, inv_gamma_minus_one)
-  end
+struct CompressibleEulerEquations2D{RealT <: Real} <:
+       AbstractCompressibleEulerEquations{2, 4}
+    gamma::RealT               # ratio of specific heats
+    inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
+
+    function CompressibleEulerEquations2D(gamma)
+        γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1))
+        new{typeof(γ)}(γ, inv_gamma_minus_one)
+    end
 end
 
-
-varnames(::typeof(cons2cons), ::CompressibleEulerEquations2D) = ("rho", "rho_v1", "rho_v2", "rho_e")
+function varnames(::typeof(cons2cons), ::CompressibleEulerEquations2D)
+    ("rho", "rho_v1", "rho_v2", "rho_e")
+end
 varnames(::typeof(cons2prim), ::CompressibleEulerEquations2D) = ("rho", "v1", "v2", "p")
 
-
 # Set initial conditions at physical location `x` for time `t`
 """
     initial_condition_constant(x, t, equations::CompressibleEulerEquations2D)
@@ -59,14 +60,13 @@ varnames(::typeof(cons2prim), ::CompressibleEulerEquations2D) = ("rho", "v1", "v
 A constant initial condition to test free-stream preservation.
 """
 function initial_condition_constant(x, t, equations::CompressibleEulerEquations2D)
-  rho = 1.0
-  rho_v1 = 0.1
-  rho_v2 = -0.2
-  rho_e = 10.0
-  return SVector(rho, rho_v1, rho_v2, rho_e)
+    rho = 1.0
+    rho_v1 = 0.1
+    rho_v2 = -0.2
+    rho_e = 10.0
+    return SVector(rho, rho_v1, rho_v2, rho_e)
 end
 
-
 """
     initial_condition_convergence_test(x, t, equations::CompressibleEulerEquations2D)
 
@@ -74,20 +74,21 @@ A smooth initial condition used for convergence tests in combination with
 [`source_terms_convergence_test`](@ref)
 (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
 """
-function initial_condition_convergence_test(x, t, equations::CompressibleEulerEquations2D)
-  c = 2
-  A = 0.1
-  L = 2
-  f = 1/L
-  ω = 2 * pi * f
-  ini = c + A * sin(ω * (x[1] + x[2] - t))
-
-  rho = ini
-  rho_v1 = ini
-  rho_v2 = ini
-  rho_e = ini^2
-
-  return SVector(rho, rho_v1, rho_v2, rho_e)
+function initial_condition_convergence_test(x, t,
+                                            equations::CompressibleEulerEquations2D)
+    c = 2
+    A = 0.1
+    L = 2
+    f = 1 / L
+    ω = 2 * pi * f
+    ini = c + A * sin(ω * (x[1] + x[2] - t))
+
+    rho = ini
+    rho_v1 = ini
+    rho_v2 = ini
+    rho_e = ini^2
+
+    return SVector(rho, rho_v1, rho_v2, rho_e)
 end
 
 """
@@ -97,32 +98,32 @@ Source terms used for convergence tests in combination with
 [`initial_condition_convergence_test`](@ref)
 (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
 """
-@inline function source_terms_convergence_test(u, x, t, equations::CompressibleEulerEquations2D)
-  # Same settings as in `initial_condition`
-  c = 2
-  A = 0.1
-  L = 2
-  f = 1/L
-  ω = 2 * pi * f
-  γ = equations.gamma
-
-  x1, x2 = x
-  si, co = sincos(ω * (x1 + x2 - t))
-  rho = c + A * si
-  rho_x = ω * A * co
-  # Note that d/dt rho = -d/dx rho = -d/dy rho.
-
-  tmp = (2 * rho - 1) * (γ - 1)
-
-  du1 = rho_x
-  du2 = rho_x * (1 + tmp)
-  du3 = du2
-  du4 = 2 * rho_x * (rho + tmp)
-
-  return SVector(du1, du2, du3, du4)
+@inline function source_terms_convergence_test(u, x, t,
+                                               equations::CompressibleEulerEquations2D)
+    # Same settings as in `initial_condition`
+    c = 2
+    A = 0.1
+    L = 2
+    f = 1 / L
+    ω = 2 * pi * f
+    γ = equations.gamma
+
+    x1, x2 = x
+    si, co = sincos(ω * (x1 + x2 - t))
+    rho = c + A * si
+    rho_x = ω * A * co
+    # Note that d/dt rho = -d/dx rho = -d/dy rho.
+
+    tmp = (2 * rho - 1) * (γ - 1)
+
+    du1 = rho_x
+    du2 = rho_x * (1 + tmp)
+    du3 = du2
+    du4 = 2 * rho_x * (rho + tmp)
+
+    return SVector(du1, du2, du3, du4)
 end
 
-
 """
     initial_condition_density_wave(x, t, equations::CompressibleEulerEquations2D)
 
@@ -138,17 +139,16 @@ with the following parameters
 - polydeg = 5
 """
 function initial_condition_density_wave(x, t, equations::CompressibleEulerEquations2D)
-  v1 = 0.1
-  v2 = 0.2
-  rho = 1 + 0.98 * sinpi(2 * (x[1] + x[2] - t * (v1 + v2)))
-  rho_v1 = rho * v1
-  rho_v2 = rho * v2
-  p = 20
-  rho_e = p / (equations.gamma - 1) + 1/2 * rho * (v1^2 + v2^2)
-  return SVector(rho, rho_v1, rho_v2, rho_e)
+    v1 = 0.1
+    v2 = 0.2
+    rho = 1 + 0.98 * sinpi(2 * (x[1] + x[2] - t * (v1 + v2)))
+    rho_v1 = rho * v1
+    rho_v2 = rho * v2
+    p = 20
+    rho_e = p / (equations.gamma - 1) + 1 / 2 * rho * (v1^2 + v2^2)
+    return SVector(rho, rho_v1, rho_v2, rho_e)
 end
 
-
 """
     initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerEquations2D)
 
@@ -157,26 +157,26 @@ A weak blast wave taken from
   A provably entropy stable subcell shock capturing approach for high order split form DG
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
-function initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerEquations2D)
-  # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
-  # Set up polar coordinates
-  inicenter = SVector(0.0, 0.0)
-  x_norm = x[1] - inicenter[1]
-  y_norm = x[2] - inicenter[2]
-  r = sqrt(x_norm^2 + y_norm^2)
-  phi = atan(y_norm, x_norm)
-  sin_phi, cos_phi = sincos(phi)
-
-  # Calculate primitive variables
-  rho = r > 0.5 ? 1.0 : 1.1691
-  v1  = r > 0.5 ? 0.0 : 0.1882 * cos_phi
-  v2  = r > 0.5 ? 0.0 : 0.1882 * sin_phi
-  p   = r > 0.5 ? 1.0 : 1.245
-
-  return prim2cons(SVector(rho, v1, v2, p), equations)
+function initial_condition_weak_blast_wave(x, t,
+                                           equations::CompressibleEulerEquations2D)
+    # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
+    # Set up polar coordinates
+    inicenter = SVector(0.0, 0.0)
+    x_norm = x[1] - inicenter[1]
+    y_norm = x[2] - inicenter[2]
+    r = sqrt(x_norm^2 + y_norm^2)
+    phi = atan(y_norm, x_norm)
+    sin_phi, cos_phi = sincos(phi)
+
+    # Calculate primitive variables
+    rho = r > 0.5 ? 1.0 : 1.1691
+    v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi
+    v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi
+    p = r > 0.5 ? 1.0 : 1.245
+
+    return prim2cons(SVector(rho, v1, v2, p), equations)
 end
 
-
 """
     initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::CompressibleEulerEquations2D)
 
@@ -187,22 +187,23 @@ Setup used for convergence tests of the Euler equations with self-gravity used i
 in combination with [`source_terms_eoc_test_coupled_euler_gravity`](@ref)
 or [`source_terms_eoc_test_euler`](@ref).
 """
-function initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::CompressibleEulerEquations2D)
-  # OBS! this assumes that γ = 2 other manufactured source terms are incorrect
-  if equations.gamma != 2.0
-    error("adiabatic constant must be 2 for the coupling convergence test")
-  end
-  c = 2.0
-  A = 0.1
-  ini = c + A * sin(pi * (x[1] + x[2] - t))
-  G = 1.0 # gravitational constant
-
-  rho = ini
-  v1 = 1.0
-  v2 = 1.0
-  p = ini^2 * G / pi # * 2 / ndims, but ndims==2 here
-
-  return prim2cons(SVector(rho, v1, v2, p), equations)
+function initial_condition_eoc_test_coupled_euler_gravity(x, t,
+                                                          equations::CompressibleEulerEquations2D)
+    # OBS! this assumes that γ = 2 other manufactured source terms are incorrect
+    if equations.gamma != 2.0
+        error("adiabatic constant must be 2 for the coupling convergence test")
+    end
+    c = 2.0
+    A = 0.1
+    ini = c + A * sin(pi * (x[1] + x[2] - t))
+    G = 1.0 # gravitational constant
+
+    rho = ini
+    v1 = 1.0
+    v2 = 1.0
+    p = ini^2 * G / pi # * 2 / ndims, but ndims==2 here
+
+    return prim2cons(SVector(rho, v1, v2, p), equations)
 end
 
 """
@@ -214,24 +215,25 @@ Setup used for convergence tests of the Euler equations with self-gravity used i
   [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593)
 in combination with [`initial_condition_eoc_test_coupled_euler_gravity`](@ref).
 """
-@inline function source_terms_eoc_test_coupled_euler_gravity(u, x, t, equations::CompressibleEulerEquations2D)
-  # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity`
-  c = 2.0
-  A = 0.1
-  G = 1.0 # gravitational constant, must match coupling solver
-  C_grav = -2 * G / pi # 2 == 4 / ndims
-
-  x1, x2 = x
-  si, co = sincos(pi * (x1 + x2 - t))
-  rhox = A * pi * co
-  rho  = c + A *  si
-
-  du1 = rhox
-  du2 = rhox
-  du3 = rhox
-  du4 = (1.0 - C_grav*rho)*rhox
-
-  return SVector(du1, du2, du3, du4)
+@inline function source_terms_eoc_test_coupled_euler_gravity(u, x, t,
+                                                             equations::CompressibleEulerEquations2D)
+    # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity`
+    c = 2.0
+    A = 0.1
+    G = 1.0 # gravitational constant, must match coupling solver
+    C_grav = -2 * G / pi # 2 == 4 / ndims
+
+    x1, x2 = x
+    si, co = sincos(pi * (x1 + x2 - t))
+    rhox = A * pi * co
+    rho = c + A * si
+
+    du1 = rhox
+    du2 = rhox
+    du3 = rhox
+    du4 = (1.0 - C_grav * rho) * rhox
+
+    return SVector(du1, du2, du3, du4)
 end
 
 """
@@ -243,27 +245,27 @@ Setup used for convergence tests of the Euler equations with self-gravity used i
   [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593)
 in combination with [`initial_condition_eoc_test_coupled_euler_gravity`](@ref).
 """
-@inline function source_terms_eoc_test_euler(u, x, t, equations::CompressibleEulerEquations2D)
-  # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity`
-  c = 2.0
-  A = 0.1
-  G = 1.0
-  C_grav = -2 * G / pi # 2 == 4 / ndims
-
-  x1, x2 = x
-  si, co = sincos(pi * (x1 + x2 - t))
-  rhox = A * pi * co
-  rho  = c + A *  si
-
-  du1 = rhox
-  du2 = rhox * (1 -     C_grav * rho)
-  du3 = rhox * (1 -     C_grav * rho)
-  du4 = rhox * (1 - 3 * C_grav * rho)
-
-  return SVector(du1, du2, du3, du4)
+@inline function source_terms_eoc_test_euler(u, x, t,
+                                             equations::CompressibleEulerEquations2D)
+    # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity`
+    c = 2.0
+    A = 0.1
+    G = 1.0
+    C_grav = -2 * G / pi # 2 == 4 / ndims
+
+    x1, x2 = x
+    si, co = sincos(pi * (x1 + x2 - t))
+    rhox = A * pi * co
+    rho = c + A * si
+
+    du1 = rhox
+    du2 = rhox * (1 - C_grav * rho)
+    du3 = rhox * (1 - C_grav * rho)
+    du4 = rhox * (1 - 3 * C_grav * rho)
+
+    return SVector(du1, du2, du3, du4)
 end
 
-
 """
     boundary_condition_slip_wall(u_inner, normal_direction, x, t, surface_flux_function,
                                  equations::CompressibleEulerEquations2D)
@@ -290,36 +292,40 @@ Should be used together with [`UnstructuredMesh2D`](@ref).
                                               x, t,
                                               surface_flux_function,
                                               equations::CompressibleEulerEquations2D)
+    norm_ = norm(normal_direction)
+    # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later
+    normal = normal_direction / norm_
+
+    # rotate the internal solution state
+    u_local = rotate_to_x(u_inner, normal, equations)
+
+    # compute the primitive variables
+    rho_local, v_normal, v_tangent, p_local = cons2prim(u_local, equations)
+
+    # Get the solution of the pressure Riemann problem
+    # See Section 6.3.3 of
+    # Eleuterio F. Toro (2009)
+    # Riemann Solvers and Numerical Methods for Fluid Dynamics: A Practical Introduction
+    # [DOI: 10.1007/b79761](https://doi.org/10.1007/b79761)
+    if v_normal <= 0.0
+        sound_speed = sqrt(equations.gamma * p_local / rho_local) # local sound speed
+        p_star = p_local *
+                 (1 + 0.5 * (equations.gamma - 1) * v_normal / sound_speed)^(2 *
+                                                                             equations.gamma *
+                                                                             equations.inv_gamma_minus_one)
+    else # v_normal > 0.0
+        A = 2 / ((equations.gamma + 1) * rho_local)
+        B = p_local * (equations.gamma - 1) / (equations.gamma + 1)
+        p_star = p_local +
+                 0.5 * v_normal / A *
+                 (v_normal + sqrt(v_normal^2 + 4 * A * (p_local + B)))
+    end
 
-  norm_ = norm(normal_direction)
-  # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later
-  normal = normal_direction / norm_
-
-  # rotate the internal solution state
-  u_local = rotate_to_x(u_inner, normal, equations)
-
-  # compute the primitive variables
-  rho_local, v_normal, v_tangent, p_local = cons2prim(u_local, equations)
-
-  # Get the solution of the pressure Riemann problem
-  # See Section 6.3.3 of
-  # Eleuterio F. Toro (2009)
-  # Riemann Solvers and Numerical Methods for Fluid Dynamics: A Practical Introduction
-  # [DOI: 10.1007/b79761](https://doi.org/10.1007/b79761)
-  if v_normal <= 0.0
-    sound_speed = sqrt(equations.gamma * p_local / rho_local) # local sound speed
-    p_star = p_local * (1 + 0.5 * (equations.gamma - 1) * v_normal / sound_speed)^(2 * equations.gamma * equations.inv_gamma_minus_one)
-  else # v_normal > 0.0
-    A = 2 / ((equations.gamma + 1) * rho_local)
-    B = p_local * (equations.gamma - 1) / (equations.gamma + 1)
-    p_star = p_local + 0.5 * v_normal / A * (v_normal + sqrt(v_normal^2 + 4 * A * (p_local + B)))
-  end
-
-  # For the slip wall we directly set the flux as the normal velocity is zero
-  return SVector(zero(eltype(u_inner)),
-                 p_star * normal[1],
-                 p_star * normal[2],
-                 zero(eltype(u_inner))) * norm_
+    # For the slip wall we directly set the flux as the normal velocity is zero
+    return SVector(zero(eltype(u_inner)),
+                   p_star * normal[1],
+                   p_star * normal[2],
+                   zero(eltype(u_inner))) * norm_
 end
 
 """
@@ -332,16 +338,16 @@ Should be used together with [`TreeMesh`](@ref).
                                               direction, x, t,
                                               surface_flux_function,
                                               equations::CompressibleEulerEquations2D)
-  # get the appropriate normal vector from the orientation
-  if orientation == 1
-    normal_direction = SVector(1, 0)
-  else # orientation == 2
-    normal_direction = SVector(0, 1)
-  end
-
-  # compute and return the flux using `boundary_condition_slip_wall` routine above
-  return boundary_condition_slip_wall(u_inner, normal_direction, direction,
-                                      x, t, surface_flux_function, equations)
+    # get the appropriate normal vector from the orientation
+    if orientation == 1
+        normal_direction = SVector(1, 0)
+    else # orientation == 2
+        normal_direction = SVector(0, 1)
+    end
+
+    # compute and return the flux using `boundary_condition_slip_wall` routine above
+    return boundary_condition_slip_wall(u_inner, normal_direction, direction,
+                                        x, t, surface_flux_function, equations)
 end
 
 """
@@ -354,56 +360,57 @@ Should be used together with [`StructuredMesh`](@ref).
                                               direction, x, t,
                                               surface_flux_function,
                                               equations::CompressibleEulerEquations2D)
-  # flip sign of normal to make it outward pointing, then flip the sign of the normal flux back
-  # to be inward pointing on the -x and -y sides due to the orientation convention used by StructuredMesh
-  if isodd(direction)
-    boundary_flux = -boundary_condition_slip_wall(u_inner, -normal_direction,
-                                                  x, t, surface_flux_function, equations)
-  else
-    boundary_flux = boundary_condition_slip_wall(u_inner, normal_direction,
-                                                 x, t, surface_flux_function, equations)
-  end
-
-  return boundary_flux
-end
+    # flip sign of normal to make it outward pointing, then flip the sign of the normal flux back
+    # to be inward pointing on the -x and -y sides due to the orientation convention used by StructuredMesh
+    if isodd(direction)
+        boundary_flux = -boundary_condition_slip_wall(u_inner, -normal_direction,
+                                                      x, t, surface_flux_function,
+                                                      equations)
+    else
+        boundary_flux = boundary_condition_slip_wall(u_inner, normal_direction,
+                                                     x, t, surface_flux_function,
+                                                     equations)
+    end
 
+    return boundary_flux
+end
 
 # Calculate 2D flux for a single point
 @inline function flux(u, orientation::Integer, equations::CompressibleEulerEquations2D)
-  rho, rho_v1, rho_v2, rho_e = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
-  if orientation == 1
-    f1 = rho_v1
-    f2 = rho_v1 * v1 + p
-    f3 = rho_v1 * v2
-    f4 = (rho_e + p) * v1
-  else
-    f1 = rho_v2
-    f2 = rho_v2 * v1
-    f3 = rho_v2 * v2 + p
-    f4 = (rho_e + p) * v2
-  end
-  return SVector(f1, f2, f3, f4)
+    rho, rho_v1, rho_v2, rho_e = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
+    if orientation == 1
+        f1 = rho_v1
+        f2 = rho_v1 * v1 + p
+        f3 = rho_v1 * v2
+        f4 = (rho_e + p) * v1
+    else
+        f1 = rho_v2
+        f2 = rho_v2 * v1
+        f3 = rho_v2 * v2 + p
+        f4 = (rho_e + p) * v2
+    end
+    return SVector(f1, f2, f3, f4)
 end
 
 # Calculate 2D flux for a single point in the normal direction
 # Note, this directional vector is not normalized
-@inline function flux(u, normal_direction::AbstractVector, equations::CompressibleEulerEquations2D)
-  rho_e = last(u)
-  rho, v1, v2, p = cons2prim(u, equations)
-
-  v_normal = v1 * normal_direction[1] + v2 * normal_direction[2]
-  rho_v_normal = rho * v_normal
-  f1 = rho_v_normal
-  f2 = rho_v_normal * v1 + p * normal_direction[1]
-  f3 = rho_v_normal * v2 + p * normal_direction[2]
-  f4 = (rho_e + p) * v_normal
-  return SVector(f1, f2, f3, f4)
+@inline function flux(u, normal_direction::AbstractVector,
+                      equations::CompressibleEulerEquations2D)
+    rho_e = last(u)
+    rho, v1, v2, p = cons2prim(u, equations)
+
+    v_normal = v1 * normal_direction[1] + v2 * normal_direction[2]
+    rho_v_normal = rho * v_normal
+    f1 = rho_v_normal
+    f2 = rho_v_normal * v1 + p * normal_direction[1]
+    f3 = rho_v_normal * v2 + p * normal_direction[2]
+    f4 = (rho_e + p) * v_normal
+    return SVector(f1, f2, f3, f4)
 end
 
-
 """
     flux_shima_etal(u_ll, u_rr, orientation_or_normal_direction,
                     equations::CompressibleEulerEquations2D)
@@ -420,61 +427,63 @@ The modification is in the energy flux to guarantee pressure equilibrium and was
   compressible flows
   [DOI: 10.1016/j.jcp.2020.110060](https://doi.org/10.1016/j.jcp.2020.110060)
 """
-@inline function flux_shima_etal(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D)
-  # Unpack left and right state
-  rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
-
-  # Average each factor of products in flux
-  rho_avg = 1/2 * (rho_ll + rho_rr)
-  v1_avg  = 1/2 * ( v1_ll +  v1_rr)
-  v2_avg  = 1/2 * ( v2_ll +  v2_rr)
-  p_avg   = 1/2 * (  p_ll +   p_rr)
-  kin_avg = 1/2 * (v1_ll*v1_rr + v2_ll*v2_rr)
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    pv1_avg = 1/2 * (p_ll*v1_rr + p_rr*v1_ll)
-    f1 = rho_avg * v1_avg
-    f2 = f1 * v1_avg + p_avg
-    f3 = f1 * v2_avg
-    f4 = p_avg*v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg
-  else
-    pv2_avg = 1/2 * (p_ll*v2_rr + p_rr*v2_ll)
-    f1 = rho_avg * v2_avg
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg + p_avg
-    f4 = p_avg*v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg
-  end
-
-  return SVector(f1, f2, f3, f4)
-end
+@inline function flux_shima_etal(u_ll, u_rr, orientation::Integer,
+                                 equations::CompressibleEulerEquations2D)
+    # Unpack left and right state
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+
+    # Average each factor of products in flux
+    rho_avg = 1 / 2 * (rho_ll + rho_rr)
+    v1_avg = 1 / 2 * (v1_ll + v1_rr)
+    v2_avg = 1 / 2 * (v2_ll + v2_rr)
+    p_avg = 1 / 2 * (p_ll + p_rr)
+    kin_avg = 1 / 2 * (v1_ll * v1_rr + v2_ll * v2_rr)
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        pv1_avg = 1 / 2 * (p_ll * v1_rr + p_rr * v1_ll)
+        f1 = rho_avg * v1_avg
+        f2 = f1 * v1_avg + p_avg
+        f3 = f1 * v2_avg
+        f4 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg
+    else
+        pv2_avg = 1 / 2 * (p_ll * v2_rr + p_rr * v2_ll)
+        f1 = rho_avg * v2_avg
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg + p_avg
+        f4 = p_avg * v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg
+    end
 
-@inline function flux_shima_etal(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations2D)
-  # Unpack left and right state
-  rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
-  v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
-  v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
-
-  # Average each factor of products in flux
-  rho_avg = 1/2 * (rho_ll + rho_rr)
-  v1_avg  = 1/2 * ( v1_ll +  v1_rr)
-  v2_avg  = 1/2 * ( v2_ll +  v2_rr)
-  v_dot_n_avg = 1/2 * (v_dot_n_ll + v_dot_n_rr)
-  p_avg   = 1/2 * (  p_ll +   p_rr)
-  velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr)
-
-  # Calculate fluxes depending on normal_direction
-  f1 = rho_avg * v_dot_n_avg
-  f2 = f1 * v1_avg + p_avg * normal_direction[1]
-  f3 = f1 * v2_avg + p_avg * normal_direction[2]
-  f4 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
-        + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-  return SVector(f1, f2, f3, f4)
+    return SVector(f1, f2, f3, f4)
 end
 
+@inline function flux_shima_etal(u_ll, u_rr, normal_direction::AbstractVector,
+                                 equations::CompressibleEulerEquations2D)
+    # Unpack left and right state
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+    v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
+    v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
+
+    # Average each factor of products in flux
+    rho_avg = 1 / 2 * (rho_ll + rho_rr)
+    v1_avg = 1 / 2 * (v1_ll + v1_rr)
+    v2_avg = 1 / 2 * (v2_ll + v2_rr)
+    v_dot_n_avg = 1 / 2 * (v_dot_n_ll + v_dot_n_rr)
+    p_avg = 1 / 2 * (p_ll + p_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
+
+    # Calculate fluxes depending on normal_direction
+    f1 = rho_avg * v_dot_n_avg
+    f2 = f1 * v1_avg + p_avg * normal_direction[1]
+    f3 = f1 * v2_avg + p_avg * normal_direction[2]
+    f4 = (f1 * velocity_square_avg +
+          p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
+          + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+    return SVector(f1, f2, f3, f4)
+end
 
 """
     flux_kennedy_gruber(u_ll, u_rr, orientation_or_normal_direction,
@@ -486,60 +495,61 @@ Kinetic energy preserving two-point flux by
   Navier-Stokes equations for a compressible fluid
   [DOI: 10.1016/j.jcp.2007.09.020](https://doi.org/10.1016/j.jcp.2007.09.020)
 """
-@inline function flux_kennedy_gruber(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D)
-  # Unpack left and right state
-  rho_e_ll = last(u_ll)
-  rho_e_rr = last(u_rr)
-  rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
-
-  # Average each factor of products in flux
-  rho_avg = 1/2 * (rho_ll + rho_rr)
-  v1_avg  = 1/2 * ( v1_ll +  v1_rr)
-  v2_avg  = 1/2 * ( v2_ll +  v2_rr)
-  p_avg   = 1/2 * (  p_ll +   p_rr)
-  e_avg   = 1/2 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr)
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    f1 = rho_avg * v1_avg
-    f2 = rho_avg * v1_avg * v1_avg + p_avg
-    f3 = rho_avg * v1_avg * v2_avg
-    f4 = (rho_avg * e_avg + p_avg) * v1_avg
-  else
-    f1 = rho_avg * v2_avg
-    f2 = rho_avg * v2_avg * v1_avg
-    f3 = rho_avg * v2_avg * v2_avg + p_avg
-    f4 = (rho_avg * e_avg + p_avg) * v2_avg
-  end
-
-  return SVector(f1, f2, f3, f4)
-end
+@inline function flux_kennedy_gruber(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerEquations2D)
+    # Unpack left and right state
+    rho_e_ll = last(u_ll)
+    rho_e_rr = last(u_rr)
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+
+    # Average each factor of products in flux
+    rho_avg = 1 / 2 * (rho_ll + rho_rr)
+    v1_avg = 1 / 2 * (v1_ll + v1_rr)
+    v2_avg = 1 / 2 * (v2_ll + v2_rr)
+    p_avg = 1 / 2 * (p_ll + p_rr)
+    e_avg = 1 / 2 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr)
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        f1 = rho_avg * v1_avg
+        f2 = rho_avg * v1_avg * v1_avg + p_avg
+        f3 = rho_avg * v1_avg * v2_avg
+        f4 = (rho_avg * e_avg + p_avg) * v1_avg
+    else
+        f1 = rho_avg * v2_avg
+        f2 = rho_avg * v2_avg * v1_avg
+        f3 = rho_avg * v2_avg * v2_avg + p_avg
+        f4 = (rho_avg * e_avg + p_avg) * v2_avg
+    end
 
-@inline function flux_kennedy_gruber(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations2D)
-  # Unpack left and right state
-  rho_e_ll = last(u_ll)
-  rho_e_rr = last(u_rr)
-  rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
-
-  # Average each factor of products in flux
-  rho_avg = 0.5 * (rho_ll + rho_rr)
-  v1_avg  = 0.5 * (v1_ll + v1_rr)
-  v2_avg  = 0.5 * (v2_ll + v2_rr)
-  v_dot_n_avg = v1_avg * normal_direction[1] + v2_avg * normal_direction[2]
-  p_avg = 0.5 * (p_ll + p_rr)
-  e_avg = 0.5 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr)
-
-  # Calculate fluxes depending on normal_direction
-  f1 = rho_avg * v_dot_n_avg
-  f2 = f1 * v1_avg + p_avg * normal_direction[1]
-  f3 = f1 * v2_avg + p_avg * normal_direction[2]
-  f4 = f1 * e_avg + p_avg * v_dot_n_avg
-
-  return SVector(f1, f2, f3, f4)
+    return SVector(f1, f2, f3, f4)
 end
 
+@inline function flux_kennedy_gruber(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::CompressibleEulerEquations2D)
+    # Unpack left and right state
+    rho_e_ll = last(u_ll)
+    rho_e_rr = last(u_rr)
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+
+    # Average each factor of products in flux
+    rho_avg = 0.5 * (rho_ll + rho_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v_dot_n_avg = v1_avg * normal_direction[1] + v2_avg * normal_direction[2]
+    p_avg = 0.5 * (p_ll + p_rr)
+    e_avg = 0.5 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr)
+
+    # Calculate fluxes depending on normal_direction
+    f1 = rho_avg * v_dot_n_avg
+    f2 = f1 * v1_avg + p_avg * normal_direction[1]
+    f3 = f1 * v2_avg + p_avg * normal_direction[2]
+    f4 = f1 * e_avg + p_avg * v_dot_n_avg
+
+    return SVector(f1, f2, f3, f4)
+end
 
 """
     flux_chandrashekar(u_ll, u_rr, orientation, equations::CompressibleEulerEquations2D)
@@ -550,41 +560,43 @@ Entropy conserving two-point flux by
   for Compressible Euler and Navier-Stokes Equations
   [DOI: 10.4208/cicp.170712.010313a](https://doi.org/10.4208/cicp.170712.010313a)
 """
-@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D)
-  # Unpack left and right state
-  rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
-  beta_ll = 0.5 * rho_ll / p_ll
-  beta_rr = 0.5 * rho_rr / p_rr
-  specific_kin_ll = 0.5 * (v1_ll^2 + v2_ll^2)
-  specific_kin_rr = 0.5 * (v1_rr^2 + v2_rr^2)
-
-  # Compute the necessary mean values
-  rho_avg = 0.5 * (rho_ll + rho_rr)
-  rho_mean  = ln_mean(rho_ll, rho_rr)
-  beta_mean = ln_mean(beta_ll, beta_rr)
-  beta_avg = 0.5 * (beta_ll + beta_rr)
-  v1_avg = 0.5 * (v1_ll + v1_rr)
-  v2_avg = 0.5 * (v2_ll + v2_rr)
-  p_mean = 0.5 * rho_avg / beta_avg
-  velocity_square_avg = specific_kin_ll + specific_kin_rr
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    f1 = rho_mean * v1_avg
-    f2 = f1 * v1_avg + p_mean
-    f3 = f1 * v2_avg
-    f4 = f1 * 0.5*(1/(equations.gamma-1)/beta_mean - velocity_square_avg)+f2*v1_avg + f3*v2_avg
-  else
-    f1 = rho_mean * v2_avg
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg + p_mean
-    f4 = f1 * 0.5*(1/(equations.gamma-1)/beta_mean - velocity_square_avg)+f2*v1_avg + f3*v2_avg
-  end
-
-  return SVector(f1, f2, f3, f4)
-end
+@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer,
+                                    equations::CompressibleEulerEquations2D)
+    # Unpack left and right state
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+    beta_ll = 0.5 * rho_ll / p_ll
+    beta_rr = 0.5 * rho_rr / p_rr
+    specific_kin_ll = 0.5 * (v1_ll^2 + v2_ll^2)
+    specific_kin_rr = 0.5 * (v1_rr^2 + v2_rr^2)
+
+    # Compute the necessary mean values
+    rho_avg = 0.5 * (rho_ll + rho_rr)
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    beta_mean = ln_mean(beta_ll, beta_rr)
+    beta_avg = 0.5 * (beta_ll + beta_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    p_mean = 0.5 * rho_avg / beta_avg
+    velocity_square_avg = specific_kin_ll + specific_kin_rr
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        f1 = rho_mean * v1_avg
+        f2 = f1 * v1_avg + p_mean
+        f3 = f1 * v2_avg
+        f4 = f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - velocity_square_avg) +
+             f2 * v1_avg + f3 * v2_avg
+    else
+        f1 = rho_mean * v2_avg
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg + p_mean
+        f4 = f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - velocity_square_avg) +
+             f2 * v1_avg + f3 * v2_avg
+    end
 
+    return SVector(f1, f2, f3, f4)
+end
 
 """
     flux_ranocha(u_ll, u_rr, orientation_or_normal_direction,
@@ -601,68 +613,74 @@ See also
   the Euler Equations Using Summation-by-Parts Operators
   [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42)
 """
-@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D)
-  # Unpack left and right state
-  rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
-
-  # Compute the necessary mean values
-  rho_mean = ln_mean(rho_ll, rho_rr)
-  # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-  # in exact arithmetic since
-  #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-  #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-  inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-  v1_avg = 0.5 * (v1_ll + v1_rr)
-  v2_avg = 0.5 * (v2_ll + v2_rr)
-  p_avg  = 0.5 * (p_ll + p_rr)
-  velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr)
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    f1 = rho_mean * v1_avg
-    f2 = f1 * v1_avg + p_avg
-    f3 = f1 * v2_avg
-    f4 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll)
-  else
-    f1 = rho_mean * v2_avg
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg + p_avg
-    f4 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v2_rr + p_rr*v2_ll)
-  end
-
-  return SVector(f1, f2, f3, f4)
-end
+@inline function flux_ranocha(u_ll, u_rr, orientation::Integer,
+                              equations::CompressibleEulerEquations2D)
+    # Unpack left and right state
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+
+    # Compute the necessary mean values
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+    # in exact arithmetic since
+    #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+    #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        f1 = rho_mean * v1_avg
+        f2 = f1 * v1_avg + p_avg
+        f3 = f1 * v2_avg
+        f4 = f1 *
+             (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) +
+             0.5 * (p_ll * v1_rr + p_rr * v1_ll)
+    else
+        f1 = rho_mean * v2_avg
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg + p_avg
+        f4 = f1 *
+             (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) +
+             0.5 * (p_ll * v2_rr + p_rr * v2_ll)
+    end
 
-@inline function flux_ranocha(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations2D)
-  # Unpack left and right state
-  rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
-  v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
-  v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
-
-  # Compute the necessary mean values
-  rho_mean = ln_mean(rho_ll, rho_rr)
-  # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-  # in exact arithmetic since
-  #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-  #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-  inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-  v1_avg = 0.5 * (v1_ll + v1_rr)
-  v2_avg = 0.5 * (v2_ll + v2_rr)
-  p_avg  = 0.5 * (p_ll + p_rr)
-  velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr)
-
-  # Calculate fluxes depending on normal_direction
-  f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
-  f2 = f1 * v1_avg + p_avg * normal_direction[1]
-  f3 = f1 * v2_avg + p_avg * normal_direction[2]
-  f4 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-        + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-  return SVector(f1, f2, f3, f4)
+    return SVector(f1, f2, f3, f4)
 end
 
+@inline function flux_ranocha(u_ll, u_rr, normal_direction::AbstractVector,
+                              equations::CompressibleEulerEquations2D)
+    # Unpack left and right state
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+    v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
+    v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
+
+    # Compute the necessary mean values
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+    # in exact arithmetic since
+    #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+    #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
+
+    # Calculate fluxes depending on normal_direction
+    f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
+    f2 = f1 * v1_avg + p_avg * normal_direction[1]
+    f3 = f1 * v2_avg + p_avg * normal_direction[2]
+    f4 = (f1 * (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+          +
+          0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+    return SVector(f1, f2, f3, f4)
+end
 
 """
     splitting_steger_warming(u, orientation::Integer,
@@ -690,104 +708,107 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
 """
 @inline function splitting_steger_warming(u, orientation::Integer,
                                           equations::CompressibleEulerEquations2D)
-  fm = splitting_steger_warming(u, Val{:minus}(), orientation, equations)
-  fp = splitting_steger_warming(u, Val{:plus}(),  orientation, equations)
-  return fm, fp
+    fm = splitting_steger_warming(u, Val{:minus}(), orientation, equations)
+    fp = splitting_steger_warming(u, Val{:plus}(), orientation, equations)
+    return fm, fp
 end
 
 @inline function splitting_steger_warming(u, ::Val{:plus}, orientation::Integer,
                                           equations::CompressibleEulerEquations2D)
-  rho, rho_v1, rho_v2, rho_e = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
-  a = sqrt(equations.gamma * p / rho)
-
-  if orientation == 1
-    lambda1 = v1
-    lambda2 = v1 + a
-    lambda3 = v1 - a
-
-    lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :)
-    lambda2_p = positive_part(lambda2)
-    lambda3_p = positive_part(lambda3)
-
-    alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p
-
-    rho_2gamma = 0.5 * rho / equations.gamma
-    f1p = rho_2gamma * alpha_p
-    f2p = rho_2gamma * (alpha_p * v1 + a * (lambda2_p - lambda3_p))
-    f3p = rho_2gamma * alpha_p * v2
-    f4p = rho_2gamma * (alpha_p * 0.5 * (v1^2 + v2^2) + a * v1 * (lambda2_p - lambda3_p)
-                        + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one)
-  else # orientation == 2
-    lambda1 = v2
-    lambda2 = v2 + a
-    lambda3 = v2 - a
-
-    lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :)
-    lambda2_p = positive_part(lambda2)
-    lambda3_p = positive_part(lambda3)
-
-    alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p
-
-    rho_2gamma = 0.5 * rho / equations.gamma
-    f1p = rho_2gamma * alpha_p
-    f2p = rho_2gamma * alpha_p * v1
-    f3p = rho_2gamma * (alpha_p * v2 + a * (lambda2_p - lambda3_p))
-    f4p = rho_2gamma * (alpha_p * 0.5 * (v1^2 + v2^2) + a * v2 * (lambda2_p - lambda3_p)
-                        + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one)
-  end
-  return SVector(f1p, f2p, f3p, f4p)
+    rho, rho_v1, rho_v2, rho_e = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
+    a = sqrt(equations.gamma * p / rho)
+
+    if orientation == 1
+        lambda1 = v1
+        lambda2 = v1 + a
+        lambda3 = v1 - a
+
+        lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :)
+        lambda2_p = positive_part(lambda2)
+        lambda3_p = positive_part(lambda3)
+
+        alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p
+
+        rho_2gamma = 0.5 * rho / equations.gamma
+        f1p = rho_2gamma * alpha_p
+        f2p = rho_2gamma * (alpha_p * v1 + a * (lambda2_p - lambda3_p))
+        f3p = rho_2gamma * alpha_p * v2
+        f4p = rho_2gamma *
+              (alpha_p * 0.5 * (v1^2 + v2^2) + a * v1 * (lambda2_p - lambda3_p)
+               + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one)
+    else # orientation == 2
+        lambda1 = v2
+        lambda2 = v2 + a
+        lambda3 = v2 - a
+
+        lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :)
+        lambda2_p = positive_part(lambda2)
+        lambda3_p = positive_part(lambda3)
+
+        alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p
+
+        rho_2gamma = 0.5 * rho / equations.gamma
+        f1p = rho_2gamma * alpha_p
+        f2p = rho_2gamma * alpha_p * v1
+        f3p = rho_2gamma * (alpha_p * v2 + a * (lambda2_p - lambda3_p))
+        f4p = rho_2gamma *
+              (alpha_p * 0.5 * (v1^2 + v2^2) + a * v2 * (lambda2_p - lambda3_p)
+               + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one)
+    end
+    return SVector(f1p, f2p, f3p, f4p)
 end
 
 @inline function splitting_steger_warming(u, ::Val{:minus}, orientation::Integer,
                                           equations::CompressibleEulerEquations2D)
-  rho, rho_v1, rho_v2, rho_e = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
-  a = sqrt(equations.gamma * p / rho)
-
-  if orientation == 1
-    lambda1 = v1
-    lambda2 = v1 + a
-    lambda3 = v1 - a
-
-    lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :)
-    lambda2_m = negative_part(lambda2)
-    lambda3_m = negative_part(lambda3)
-
-    alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m
-
-    rho_2gamma = 0.5 * rho / equations.gamma
-    f1m = rho_2gamma * alpha_m
-    f2m = rho_2gamma * (alpha_m * v1 + a * (lambda2_m - lambda3_m))
-    f3m = rho_2gamma * alpha_m * v2
-    f4m = rho_2gamma * (alpha_m * 0.5 * (v1^2 + v2^2) + a * v1 * (lambda2_m - lambda3_m)
-                        + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one)
-  else # orientation == 2
-    lambda1 = v2
-    lambda2 = v2 + a
-    lambda3 = v2 - a
-
-    lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :)
-    lambda2_m = negative_part(lambda2)
-    lambda3_m = negative_part(lambda3)
-
-    alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m
-
-    rho_2gamma = 0.5 * rho / equations.gamma
-    f1m = rho_2gamma * alpha_m
-    f2m = rho_2gamma * alpha_m * v1
-    f3m = rho_2gamma * (alpha_m * v2 + a * (lambda2_m-lambda3_m))
-    f4m = rho_2gamma * (alpha_m * 0.5 * (v1^2 + v2^2) + a * v2 * (lambda2_m - lambda3_m)
-                        + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one)
-  end
-  return SVector(f1m, f2m, f3m, f4m)
+    rho, rho_v1, rho_v2, rho_e = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
+    a = sqrt(equations.gamma * p / rho)
+
+    if orientation == 1
+        lambda1 = v1
+        lambda2 = v1 + a
+        lambda3 = v1 - a
+
+        lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :)
+        lambda2_m = negative_part(lambda2)
+        lambda3_m = negative_part(lambda3)
+
+        alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m
+
+        rho_2gamma = 0.5 * rho / equations.gamma
+        f1m = rho_2gamma * alpha_m
+        f2m = rho_2gamma * (alpha_m * v1 + a * (lambda2_m - lambda3_m))
+        f3m = rho_2gamma * alpha_m * v2
+        f4m = rho_2gamma *
+              (alpha_m * 0.5 * (v1^2 + v2^2) + a * v1 * (lambda2_m - lambda3_m)
+               + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one)
+    else # orientation == 2
+        lambda1 = v2
+        lambda2 = v2 + a
+        lambda3 = v2 - a
+
+        lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :)
+        lambda2_m = negative_part(lambda2)
+        lambda3_m = negative_part(lambda3)
+
+        alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m
+
+        rho_2gamma = 0.5 * rho / equations.gamma
+        f1m = rho_2gamma * alpha_m
+        f2m = rho_2gamma * alpha_m * v1
+        f3m = rho_2gamma * (alpha_m * v2 + a * (lambda2_m - lambda3_m))
+        f4m = rho_2gamma *
+              (alpha_m * 0.5 * (v1^2 + v2^2) + a * v2 * (lambda2_m - lambda3_m)
+               + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one)
+    end
+    return SVector(f1m, f2m, f3m, f4m)
 end
 
-
 """
     splitting_vanleer_haenel(u, orientation::Integer,
                              equations::CompressibleEulerEquations2D)
@@ -824,72 +845,71 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
 """
 @inline function splitting_vanleer_haenel(u, orientation::Integer,
                                           equations::CompressibleEulerEquations2D)
-  fm = splitting_vanleer_haenel(u, Val{:minus}(), orientation, equations)
-  fp = splitting_vanleer_haenel(u, Val{:plus}(),  orientation, equations)
-  return fm, fp
+    fm = splitting_vanleer_haenel(u, Val{:minus}(), orientation, equations)
+    fp = splitting_vanleer_haenel(u, Val{:plus}(), orientation, equations)
+    return fm, fp
 end
 
 @inline function splitting_vanleer_haenel(u, ::Val{:plus}, orientation::Integer,
                                           equations::CompressibleEulerEquations2D)
-  rho, rho_v1, rho_v2, rho_e = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
-
-  a = sqrt(equations.gamma * p / rho)
-  H = (rho_e + p) / rho
-
-  if orientation == 1
-    M = v1 / a
-    p_plus = 0.5 * (1 + equations.gamma * M) * p
-
-    f1p = 0.25 * rho * a * (M + 1)^2
-    f2p = f1p * v1 + p_plus
-    f3p = f1p * v2
-    f4p = f1p * H
-  else # orientation == 2
-    M = v2 / a
-    p_plus = 0.5 * (1 + equations.gamma * M) * p
-
-    f1p = 0.25 * rho * a * (M + 1)^2
-    f2p = f1p * v1
-    f3p = f1p * v2 + p_plus
-    f4p = f1p * H
-  end
-  return SVector(f1p, f2p, f3p, f4p)
+    rho, rho_v1, rho_v2, rho_e = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
+
+    a = sqrt(equations.gamma * p / rho)
+    H = (rho_e + p) / rho
+
+    if orientation == 1
+        M = v1 / a
+        p_plus = 0.5 * (1 + equations.gamma * M) * p
+
+        f1p = 0.25 * rho * a * (M + 1)^2
+        f2p = f1p * v1 + p_plus
+        f3p = f1p * v2
+        f4p = f1p * H
+    else # orientation == 2
+        M = v2 / a
+        p_plus = 0.5 * (1 + equations.gamma * M) * p
+
+        f1p = 0.25 * rho * a * (M + 1)^2
+        f2p = f1p * v1
+        f3p = f1p * v2 + p_plus
+        f4p = f1p * H
+    end
+    return SVector(f1p, f2p, f3p, f4p)
 end
 
 @inline function splitting_vanleer_haenel(u, ::Val{:minus}, orientation::Integer,
                                           equations::CompressibleEulerEquations2D)
-  rho, rho_v1, rho_v2, rho_e = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
-
-  a = sqrt(equations.gamma * p / rho)
-  H = (rho_e + p) / rho
-
-  if orientation == 1
-    M = v1 / a
-    p_minus = 0.5 * (1 - equations.gamma * M) * p
-
-    f1m= -0.25 * rho * a * (M - 1)^2
-    f2m = f1m * v1 + p_minus
-    f3m = f1m * v2
-    f4m = f1m * H
-  else # orientation == 2
-    M = v2 / a
-    p_minus = 0.5 * (1 - equations.gamma * M) * p
-
-    f1m= -0.25 * rho * a * (M - 1)^2
-    f2m = f1m * v1
-    f3m = f1m * v2 + p_minus
-    f4m = f1m * H
-  end
-  return SVector(f1m, f2m, f3m, f4m)
+    rho, rho_v1, rho_v2, rho_e = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
+
+    a = sqrt(equations.gamma * p / rho)
+    H = (rho_e + p) / rho
+
+    if orientation == 1
+        M = v1 / a
+        p_minus = 0.5 * (1 - equations.gamma * M) * p
+
+        f1m = -0.25 * rho * a * (M - 1)^2
+        f2m = f1m * v1 + p_minus
+        f3m = f1m * v2
+        f4m = f1m * H
+    else # orientation == 2
+        M = v2 / a
+        p_minus = 0.5 * (1 - equations.gamma * M) * p
+
+        f1m = -0.25 * rho * a * (M - 1)^2
+        f2m = f1m * v1
+        f3m = f1m * v2 + p_minus
+        f4m = f1m * H
+    end
+    return SVector(f1m, f2m, f3m, f4m)
 end
 
-
 """
     splitting_lax_friedrichs(u, orientation::Integer,
                              equations::CompressibleEulerEquations2D)
@@ -911,184 +931,183 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
 """
 @inline function splitting_lax_friedrichs(u, orientation::Integer,
                                           equations::CompressibleEulerEquations2D)
-  fm = splitting_lax_friedrichs(u, Val{:minus}(), orientation, equations)
-  fp = splitting_lax_friedrichs(u, Val{:plus}(),  orientation, equations)
-  return fm, fp
+    fm = splitting_lax_friedrichs(u, Val{:minus}(), orientation, equations)
+    fp = splitting_lax_friedrichs(u, Val{:plus}(), orientation, equations)
+    return fm, fp
 end
 
 @inline function splitting_lax_friedrichs(u, ::Val{:plus}, orientation::Integer,
                                           equations::CompressibleEulerEquations2D)
-  rho, rho_v1, rho_v2, rho_e = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
-
-  a = sqrt(equations.gamma * p / rho)
-  H = (rho_e + p) / rho
-  lambda = 0.5 * (sqrt(v1^2 + v2^2) + a)
-
-  if orientation == 1
-    #lambda = 0.5 * (abs(v1) + a)
-    f1p = 0.5 * rho * v1 + lambda * u[1]
-    f2p = 0.5 * rho * v1 * v1 + 0.5 * p + lambda * u[2]
-    f3p = 0.5 * rho * v1 * v2 + lambda * u[3]
-    f4p = 0.5 * rho * v1 * H + lambda * u[4]
-  else # orientation == 2
-    #lambda = 0.5 * (abs(v2) + a)
-    f1p = 0.5 * rho * v2 + lambda * u[1]
-    f2p = 0.5 * rho * v2 * v1 + lambda * u[2]
-    f3p = 0.5 * rho * v2 * v2 + 0.5 * p + lambda * u[3]
-    f4p = 0.5 * rho * v2 * H + lambda * u[4]
-  end
-  return SVector(f1p, f2p, f3p, f4p)
+    rho, rho_v1, rho_v2, rho_e = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
+
+    a = sqrt(equations.gamma * p / rho)
+    H = (rho_e + p) / rho
+    lambda = 0.5 * (sqrt(v1^2 + v2^2) + a)
+
+    if orientation == 1
+        #lambda = 0.5 * (abs(v1) + a)
+        f1p = 0.5 * rho * v1 + lambda * u[1]
+        f2p = 0.5 * rho * v1 * v1 + 0.5 * p + lambda * u[2]
+        f3p = 0.5 * rho * v1 * v2 + lambda * u[3]
+        f4p = 0.5 * rho * v1 * H + lambda * u[4]
+    else # orientation == 2
+        #lambda = 0.5 * (abs(v2) + a)
+        f1p = 0.5 * rho * v2 + lambda * u[1]
+        f2p = 0.5 * rho * v2 * v1 + lambda * u[2]
+        f3p = 0.5 * rho * v2 * v2 + 0.5 * p + lambda * u[3]
+        f4p = 0.5 * rho * v2 * H + lambda * u[4]
+    end
+    return SVector(f1p, f2p, f3p, f4p)
 end
 
 @inline function splitting_lax_friedrichs(u, ::Val{:minus}, orientation::Integer,
                                           equations::CompressibleEulerEquations2D)
-  rho, rho_v1, rho_v2, rho_e = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
-
-  a = sqrt(equations.gamma * p / rho)
-  H = (rho_e + p) / rho
-  lambda = 0.5 * (sqrt(v1^2 + v2^2) + a)
-
-  if orientation == 1
-    #lambda = 0.5 * (abs(v1) + a)
-    f1m = 0.5 * rho * v1 - lambda * u[1]
-    f2m = 0.5 * rho * v1 * v1 + 0.5 * p - lambda * u[2]
-    f3m = 0.5 * rho * v1 * v2 - lambda * u[3]
-    f4m = 0.5 * rho * v1 * H - lambda * u[4]
-  else # orientation == 2
-    #lambda = 0.5 * (abs(v2) + a)
-    f1m = 0.5 * rho * v2 - lambda * u[1]
-    f2m = 0.5 * rho * v2 * v1 - lambda * u[2]
-    f3m = 0.5 * rho * v2 * v2 + 0.5 * p - lambda * u[3]
-    f4m = 0.5 * rho * v2 * H - lambda * u[4]
-  end
-  return SVector(f1m, f2m, f3m, f4m)
+    rho, rho_v1, rho_v2, rho_e = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
+
+    a = sqrt(equations.gamma * p / rho)
+    H = (rho_e + p) / rho
+    lambda = 0.5 * (sqrt(v1^2 + v2^2) + a)
+
+    if orientation == 1
+        #lambda = 0.5 * (abs(v1) + a)
+        f1m = 0.5 * rho * v1 - lambda * u[1]
+        f2m = 0.5 * rho * v1 * v1 + 0.5 * p - lambda * u[2]
+        f3m = 0.5 * rho * v1 * v2 - lambda * u[3]
+        f4m = 0.5 * rho * v1 * H - lambda * u[4]
+    else # orientation == 2
+        #lambda = 0.5 * (abs(v2) + a)
+        f1m = 0.5 * rho * v2 - lambda * u[1]
+        f2m = 0.5 * rho * v2 * v1 - lambda * u[2]
+        f3m = 0.5 * rho * v2 * v2 + 0.5 * p - lambda * u[3]
+        f4m = 0.5 * rho * v2 * H - lambda * u[4]
+    end
+    return SVector(f1m, f2m, f3m, f4m)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the
 # maximum velocity magnitude plus the maximum speed of sound
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D)
-  rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
-
-  # Get the velocity value in the appropriate direction
-  if orientation == 1
-    v_ll = v1_ll
-    v_rr = v1_rr
-  else # orientation == 2
-    v_ll = v2_ll
-    v_rr = v2_rr
-  end
-  # Calculate sound speeds
-  c_ll = sqrt(equations.gamma * p_ll / rho_ll)
-  c_rr = sqrt(equations.gamma * p_rr / rho_rr)
-
-  λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
-end
-
-
-@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations2D)
-  rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
-
-  # Calculate normal velocities and sound speed
-  # left
-  v_ll = (  v1_ll * normal_direction[1]
-          + v2_ll * normal_direction[2] )
-  c_ll = sqrt(equations.gamma * p_ll / rho_ll)
-  # right
-  v_rr = (  v1_rr * normal_direction[1]
-          + v2_rr * normal_direction[2] )
-  c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerEquations2D)
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+
+    # Get the velocity value in the appropriate direction
+    if orientation == 1
+        v_ll = v1_ll
+        v_rr = v1_rr
+    else # orientation == 2
+        v_ll = v2_ll
+        v_rr = v2_rr
+    end
+    # Calculate sound speeds
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
 
-  return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction)
+    λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
 end
 
+@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::CompressibleEulerEquations2D)
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+
+    # Calculate normal velocities and sound speed
+    # left
+    v_ll = (v1_ll * normal_direction[1]
+            +
+            v2_ll * normal_direction[2])
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+    # right
+    v_rr = (v1_rr * normal_direction[1]
+            +
+            v2_rr * normal_direction[2])
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+
+    return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction)
+end
 
 # Calculate minimum and maximum wave speeds for HLL-type fluxes
 @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
                                      equations::CompressibleEulerEquations2D)
-  rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
-
-  if orientation == 1 # x-direction
-    λ_min = v1_ll - sqrt(equations.gamma * p_ll / rho_ll)
-    λ_max = v1_rr + sqrt(equations.gamma * p_rr / rho_rr)
-  else # y-direction
-    λ_min = v2_ll - sqrt(equations.gamma * p_ll / rho_ll)
-    λ_max = v2_rr + sqrt(equations.gamma * p_rr / rho_rr)
-  end
-
-  return λ_min, λ_max
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+
+    if orientation == 1 # x-direction
+        λ_min = v1_ll - sqrt(equations.gamma * p_ll / rho_ll)
+        λ_max = v1_rr + sqrt(equations.gamma * p_rr / rho_rr)
+    else # y-direction
+        λ_min = v2_ll - sqrt(equations.gamma * p_ll / rho_ll)
+        λ_max = v2_rr + sqrt(equations.gamma * p_rr / rho_rr)
+    end
+
+    return λ_min, λ_max
 end
 
 @inline function min_max_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
                                      equations::CompressibleEulerEquations2D)
-  rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
 
-  v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
-  v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
+    v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
+    v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
 
-  norm_ = norm(normal_direction)
-  # The v_normals are already scaled by the norm
-  λ_min = v_normal_ll - sqrt(equations.gamma * p_ll / rho_ll) * norm_
-  λ_max = v_normal_rr + sqrt(equations.gamma * p_rr / rho_rr) * norm_
+    norm_ = norm(normal_direction)
+    # The v_normals are already scaled by the norm
+    λ_min = v_normal_ll - sqrt(equations.gamma * p_ll / rho_ll) * norm_
+    λ_max = v_normal_rr + sqrt(equations.gamma * p_rr / rho_rr) * norm_
 
-  return λ_min, λ_max
+    return λ_min, λ_max
 end
 
-
 # Called inside `FluxRotated` in `numerical_fluxes.jl` so the direction
 # has been normalized prior to this rotation of the state vector
 @inline function rotate_to_x(u, normal_vector, equations::CompressibleEulerEquations2D)
-  # cos and sin of the angle between the x-axis and the normalized normal_vector are
-  # the normalized vector's x and y coordinates respectively (see unit circle).
-  c = normal_vector[1]
-  s = normal_vector[2]
-
-  # Apply the 2D rotation matrix with normal and tangent directions of the form
-  # [ 1    0    0   0;
-  #   0   n_1  n_2  0;
-  #   0   t_1  t_2  0;
-  #   0    0    0   1 ]
-  # where t_1 = -n_2 and t_2 = n_1
-
-  return SVector(u[1],
-                 c * u[2] + s * u[3],
-                 -s * u[2] + c * u[3],
-                 u[4])
+    # cos and sin of the angle between the x-axis and the normalized normal_vector are
+    # the normalized vector's x and y coordinates respectively (see unit circle).
+    c = normal_vector[1]
+    s = normal_vector[2]
+
+    # Apply the 2D rotation matrix with normal and tangent directions of the form
+    # [ 1    0    0   0;
+    #   0   n_1  n_2  0;
+    #   0   t_1  t_2  0;
+    #   0    0    0   1 ]
+    # where t_1 = -n_2 and t_2 = n_1
+
+    return SVector(u[1],
+                   c * u[2] + s * u[3],
+                   -s * u[2] + c * u[3],
+                   u[4])
 end
 
-
 # Called inside `FluxRotated` in `numerical_fluxes.jl` so the direction
 # has been normalized prior to this back-rotation of the state vector
-@inline function rotate_from_x(u, normal_vector, equations::CompressibleEulerEquations2D)
-  # cos and sin of the angle between the x-axis and the normalized normal_vector are
-  # the normalized vector's x and y coordinates respectively (see unit circle).
-  c = normal_vector[1]
-  s = normal_vector[2]
-
-  # Apply the 2D back-rotation matrix with normal and tangent directions of the form
-  # [ 1    0    0   0;
-  #   0   n_1  t_1  0;
-  #   0   n_2  t_2  0;
-  #   0    0    0   1 ]
-  # where t_1 = -n_2 and t_2 = n_1
-
-  return SVector(u[1],
-                 c * u[2] - s * u[3],
-                 s * u[2] + c * u[3],
-                 u[4])
+@inline function rotate_from_x(u, normal_vector,
+                               equations::CompressibleEulerEquations2D)
+    # cos and sin of the angle between the x-axis and the normalized normal_vector are
+    # the normalized vector's x and y coordinates respectively (see unit circle).
+    c = normal_vector[1]
+    s = normal_vector[2]
+
+    # Apply the 2D back-rotation matrix with normal and tangent directions of the form
+    # [ 1    0    0   0;
+    #   0   n_1  t_1  0;
+    #   0   n_2  t_2  0;
+    #   0    0    0   1 ]
+    # where t_1 = -n_2 and t_2 = n_1
+
+    return SVector(u[1],
+                   c * u[2] - s * u[3],
+                   s * u[2] + c * u[3],
+                   u[4])
 end
 
-
 """
     flux_hllc(u_ll, u_rr, orientation, equations::CompressibleEulerEquations2D)
 
@@ -1096,101 +1115,102 @@ Computes the HLLC flux (HLL with Contact) for compressible Euler equations devel
 [Lecture slides](http://www.prague-sum.com/download/2012/Toro_2-HLLC-RiemannSolver.pdf)
 Signal speeds: [DOI: 10.1137/S1064827593260140](https://doi.org/10.1137/S1064827593260140)
 """
-function flux_hllc(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D)
-  # Calculate primitive variables and speed of sound
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr
-
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  e_ll  = rho_e_ll / rho_ll
-  p_ll = (equations.gamma - 1) * (rho_e_ll - 1/2 * rho_ll * (v1_ll^2 + v2_ll^2))
-  c_ll = sqrt(equations.gamma*p_ll/rho_ll)
-
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
-  e_rr  = rho_e_rr / rho_rr
-  p_rr = (equations.gamma - 1) * (rho_e_rr - 1/2 * rho_rr * (v1_rr^2 + v2_rr^2))
-  c_rr = sqrt(equations.gamma*p_rr/rho_rr)
-
-  # Obtain left and right fluxes
-  f_ll = flux(u_ll, orientation, equations)
-  f_rr = flux(u_rr, orientation, equations)
-
-  # Compute Roe averages
-  sqrt_rho_ll = sqrt(rho_ll)
-  sqrt_rho_rr = sqrt(rho_rr)
-  sum_sqrt_rho = sqrt_rho_ll + sqrt_rho_rr
-  if orientation == 1 # x-direction
-    vel_L = v1_ll
-    vel_R = v1_rr
-    ekin_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr)^2
-  elseif orientation == 2 # y-direction
-    vel_L = v2_ll
-    vel_R = v2_rr
-    ekin_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr)^2
-  end
-  vel_roe = (sqrt_rho_ll * vel_L + sqrt_rho_rr * vel_R) / sum_sqrt_rho
-  ekin_roe = 0.5 * (vel_roe^2 + ekin_roe / sum_sqrt_rho^2)
-  H_ll = (rho_e_ll + p_ll) / rho_ll
-  H_rr = (rho_e_rr + p_rr) / rho_rr
-  H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) / sum_sqrt_rho
-  c_roe = sqrt((equations.gamma - 1) * (H_roe - ekin_roe))
-  Ssl = min(vel_L - c_ll, vel_roe - c_roe)
-  Ssr = max(vel_R + c_rr, vel_roe + c_roe)
-  sMu_L = Ssl - vel_L
-  sMu_R = Ssr - vel_R
-
-  if Ssl >= 0.0
-    f1 = f_ll[1]
-    f2 = f_ll[2]
-    f3 = f_ll[3]
-    f4 = f_ll[4]
-  elseif Ssr <= 0.0
-    f1 = f_rr[1]
-    f2 = f_rr[2]
-    f3 = f_rr[3]
-    f4 = f_rr[4]
-  else
-    SStar = (p_rr - p_ll + rho_ll*vel_L*sMu_L - rho_rr*vel_R*sMu_R) / (rho_ll*sMu_L - rho_rr*sMu_R)
-    if Ssl <= 0.0 <= SStar
-      densStar = rho_ll*sMu_L / (Ssl-SStar)
-      enerStar = e_ll + (SStar - vel_L) * (SStar + p_ll / (rho_ll * sMu_L))
-      UStar1 = densStar
-      UStar4 = densStar*enerStar
-      if orientation == 1 # x-direction
-        UStar2 = densStar*SStar
-        UStar3 = densStar*v2_ll
-      elseif orientation == 2 # y-direction
-        UStar2 = densStar*v1_ll
-        UStar3 = densStar*SStar
-      end
-      f1 = f_ll[1]+Ssl*(UStar1 - rho_ll)
-      f2 = f_ll[2]+Ssl*(UStar2 - rho_v1_ll)
-      f3 = f_ll[3]+Ssl*(UStar3 - rho_v2_ll)
-      f4 = f_ll[4]+Ssl*(UStar4 - rho_e_ll)
+function flux_hllc(u_ll, u_rr, orientation::Integer,
+                   equations::CompressibleEulerEquations2D)
+    # Calculate primitive variables and speed of sound
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    e_ll = rho_e_ll / rho_ll
+    p_ll = (equations.gamma - 1) * (rho_e_ll - 1 / 2 * rho_ll * (v1_ll^2 + v2_ll^2))
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    e_rr = rho_e_rr / rho_rr
+    p_rr = (equations.gamma - 1) * (rho_e_rr - 1 / 2 * rho_rr * (v1_rr^2 + v2_rr^2))
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+
+    # Obtain left and right fluxes
+    f_ll = flux(u_ll, orientation, equations)
+    f_rr = flux(u_rr, orientation, equations)
+
+    # Compute Roe averages
+    sqrt_rho_ll = sqrt(rho_ll)
+    sqrt_rho_rr = sqrt(rho_rr)
+    sum_sqrt_rho = sqrt_rho_ll + sqrt_rho_rr
+    if orientation == 1 # x-direction
+        vel_L = v1_ll
+        vel_R = v1_rr
+        ekin_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr)^2
+    elseif orientation == 2 # y-direction
+        vel_L = v2_ll
+        vel_R = v2_rr
+        ekin_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr)^2
+    end
+    vel_roe = (sqrt_rho_ll * vel_L + sqrt_rho_rr * vel_R) / sum_sqrt_rho
+    ekin_roe = 0.5 * (vel_roe^2 + ekin_roe / sum_sqrt_rho^2)
+    H_ll = (rho_e_ll + p_ll) / rho_ll
+    H_rr = (rho_e_rr + p_rr) / rho_rr
+    H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) / sum_sqrt_rho
+    c_roe = sqrt((equations.gamma - 1) * (H_roe - ekin_roe))
+    Ssl = min(vel_L - c_ll, vel_roe - c_roe)
+    Ssr = max(vel_R + c_rr, vel_roe + c_roe)
+    sMu_L = Ssl - vel_L
+    sMu_R = Ssr - vel_R
+
+    if Ssl >= 0.0
+        f1 = f_ll[1]
+        f2 = f_ll[2]
+        f3 = f_ll[3]
+        f4 = f_ll[4]
+    elseif Ssr <= 0.0
+        f1 = f_rr[1]
+        f2 = f_rr[2]
+        f3 = f_rr[3]
+        f4 = f_rr[4]
     else
-      densStar = rho_rr*sMu_R / (Ssr-SStar)
-      enerStar = e_rr + (SStar - vel_R) * (SStar + p_rr / (rho_rr * sMu_R))
-      UStar1 = densStar
-      UStar4 = densStar*enerStar
-      if orientation == 1 # x-direction
-        UStar2 = densStar*SStar
-        UStar3 = densStar*v2_rr
-      elseif orientation == 2 # y-direction
-        UStar2 = densStar*v1_rr
-        UStar3 = densStar*SStar
-      end
-      f1 = f_rr[1]+Ssr*(UStar1 - rho_rr)
-      f2 = f_rr[2]+Ssr*(UStar2 - rho_v1_rr)
-      f3 = f_rr[3]+Ssr*(UStar3 - rho_v2_rr)
-      f4 = f_rr[4]+Ssr*(UStar4 - rho_e_rr)
+        SStar = (p_rr - p_ll + rho_ll * vel_L * sMu_L - rho_rr * vel_R * sMu_R) /
+                (rho_ll * sMu_L - rho_rr * sMu_R)
+        if Ssl <= 0.0 <= SStar
+            densStar = rho_ll * sMu_L / (Ssl - SStar)
+            enerStar = e_ll + (SStar - vel_L) * (SStar + p_ll / (rho_ll * sMu_L))
+            UStar1 = densStar
+            UStar4 = densStar * enerStar
+            if orientation == 1 # x-direction
+                UStar2 = densStar * SStar
+                UStar3 = densStar * v2_ll
+            elseif orientation == 2 # y-direction
+                UStar2 = densStar * v1_ll
+                UStar3 = densStar * SStar
+            end
+            f1 = f_ll[1] + Ssl * (UStar1 - rho_ll)
+            f2 = f_ll[2] + Ssl * (UStar2 - rho_v1_ll)
+            f3 = f_ll[3] + Ssl * (UStar3 - rho_v2_ll)
+            f4 = f_ll[4] + Ssl * (UStar4 - rho_e_ll)
+        else
+            densStar = rho_rr * sMu_R / (Ssr - SStar)
+            enerStar = e_rr + (SStar - vel_R) * (SStar + p_rr / (rho_rr * sMu_R))
+            UStar1 = densStar
+            UStar4 = densStar * enerStar
+            if orientation == 1 # x-direction
+                UStar2 = densStar * SStar
+                UStar3 = densStar * v2_rr
+            elseif orientation == 2 # y-direction
+                UStar2 = densStar * v1_rr
+                UStar3 = densStar * SStar
+            end
+            f1 = f_rr[1] + Ssr * (UStar1 - rho_rr)
+            f2 = f_rr[2] + Ssr * (UStar2 - rho_v1_rr)
+            f3 = f_rr[3] + Ssr * (UStar3 - rho_v2_rr)
+            f4 = f_rr[4] + Ssr * (UStar4 - rho_e_rr)
+        end
     end
-  end
-  return SVector(f1, f2, f3, f4)
+    return SVector(f1, f2, f3, f4)
 end
 
-
 """
     flux_hlle(u_ll, u_rr, orientation, equations::CompressibleEulerEquations2D)
 
@@ -1206,171 +1226,167 @@ of the numerical flux.
   On Godunov-type methods near low densities.
   [DOI: 10.1016/0021-9991(91)90211-3](https://doi.org/10.1016/0021-9991(91)90211-3)
 """
-function flux_hlle(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations2D)
-  # Calculate primitive variables, enthalpy and speed of sound
-  rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
-
-  # `u_ll[4]` is total energy `rho_e_ll` on the left
-  H_ll = (u_ll[4] + p_ll) / rho_ll
-  c_ll = sqrt(equations.gamma * p_ll / rho_ll)
-
-  # `u_rr[4]` is total energy `rho_e_rr` on the right
-  H_rr = (u_rr[4] + p_rr) / rho_rr
-  c_rr = sqrt(equations.gamma * p_rr / rho_rr)
-
-  # Compute Roe averages
-  sqrt_rho_ll = sqrt(rho_ll)
-  sqrt_rho_rr = sqrt(rho_rr)
-  inv_sum_sqrt_rho = inv(sqrt_rho_ll + sqrt_rho_rr)
-
-  v1_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr) * inv_sum_sqrt_rho
-  v2_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr) * inv_sum_sqrt_rho
-  v_roe_mag = v1_roe^2 + v2_roe^2
-
-  H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) * inv_sum_sqrt_rho
-  c_roe = sqrt((equations.gamma - 1) * (H_roe - 0.5 * v_roe_mag))
-
-  # Compute convenience constant for positivity preservation, see
-  # https://doi.org/10.1016/0021-9991(91)90211-3
-  beta = sqrt(0.5 * (equations.gamma - 1) / equations.gamma)
-
-  # Estimate the edges of the Riemann fan (with positivity conservation)
-  if orientation == 1 # x-direction
-    SsL = min(v1_roe - c_roe, v1_ll - beta * c_ll, zero(v1_roe))
-    SsR = max(v1_roe + c_roe, v1_rr + beta * c_rr, zero(v1_roe))
-  elseif orientation == 2 # y-direction
-    SsL = min(v2_roe - c_roe, v2_ll - beta * c_ll, zero(v2_roe))
-    SsR = max(v2_roe + c_roe, v2_rr + beta * c_rr, zero(v2_roe))
-  end
-
-  if SsL >= 0.0 && SsR > 0.0
-    # Positive supersonic speed
-    f_ll = flux(u_ll, orientation, equations)
-
-    f1 = f_ll[1]
-    f2 = f_ll[2]
-    f3 = f_ll[3]
-    f4 = f_ll[4]
-  elseif SsR <= 0.0 && SsL < 0.0
-    # Negative supersonic speed
-    f_rr = flux(u_rr, orientation, equations)
-
-    f1 = f_rr[1]
-    f2 = f_rr[2]
-    f3 = f_rr[3]
-    f4 = f_rr[4]
-  else
-    # Subsonic case
-    # Compute left and right fluxes
-    f_ll = flux(u_ll, orientation, equations)
-    f_rr = flux(u_rr, orientation, equations)
+function flux_hlle(u_ll, u_rr, orientation::Integer,
+                   equations::CompressibleEulerEquations2D)
+    # Calculate primitive variables, enthalpy and speed of sound
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+
+    # `u_ll[4]` is total energy `rho_e_ll` on the left
+    H_ll = (u_ll[4] + p_ll) / rho_ll
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+
+    # `u_rr[4]` is total energy `rho_e_rr` on the right
+    H_rr = (u_rr[4] + p_rr) / rho_rr
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+
+    # Compute Roe averages
+    sqrt_rho_ll = sqrt(rho_ll)
+    sqrt_rho_rr = sqrt(rho_rr)
+    inv_sum_sqrt_rho = inv(sqrt_rho_ll + sqrt_rho_rr)
+
+    v1_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr) * inv_sum_sqrt_rho
+    v2_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr) * inv_sum_sqrt_rho
+    v_roe_mag = v1_roe^2 + v2_roe^2
+
+    H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) * inv_sum_sqrt_rho
+    c_roe = sqrt((equations.gamma - 1) * (H_roe - 0.5 * v_roe_mag))
+
+    # Compute convenience constant for positivity preservation, see
+    # https://doi.org/10.1016/0021-9991(91)90211-3
+    beta = sqrt(0.5 * (equations.gamma - 1) / equations.gamma)
+
+    # Estimate the edges of the Riemann fan (with positivity conservation)
+    if orientation == 1 # x-direction
+        SsL = min(v1_roe - c_roe, v1_ll - beta * c_ll, zero(v1_roe))
+        SsR = max(v1_roe + c_roe, v1_rr + beta * c_rr, zero(v1_roe))
+    elseif orientation == 2 # y-direction
+        SsL = min(v2_roe - c_roe, v2_ll - beta * c_ll, zero(v2_roe))
+        SsR = max(v2_roe + c_roe, v2_rr + beta * c_rr, zero(v2_roe))
+    end
 
-    f1 = (SsR * f_ll[1] - SsL * f_rr[1] + SsL * SsR * (u_rr[1] - u_ll[1])) / (SsR - SsL)
-    f2 = (SsR * f_ll[2] - SsL * f_rr[2] + SsL * SsR * (u_rr[2] - u_ll[2])) / (SsR - SsL)
-    f3 = (SsR * f_ll[3] - SsL * f_rr[3] + SsL * SsR * (u_rr[3] - u_ll[3])) / (SsR - SsL)
-    f4 = (SsR * f_ll[4] - SsL * f_rr[4] + SsL * SsR * (u_rr[4] - u_ll[4])) / (SsR - SsL)
-  end
+    if SsL >= 0.0 && SsR > 0.0
+        # Positive supersonic speed
+        f_ll = flux(u_ll, orientation, equations)
+
+        f1 = f_ll[1]
+        f2 = f_ll[2]
+        f3 = f_ll[3]
+        f4 = f_ll[4]
+    elseif SsR <= 0.0 && SsL < 0.0
+        # Negative supersonic speed
+        f_rr = flux(u_rr, orientation, equations)
+
+        f1 = f_rr[1]
+        f2 = f_rr[2]
+        f3 = f_rr[3]
+        f4 = f_rr[4]
+    else
+        # Subsonic case
+        # Compute left and right fluxes
+        f_ll = flux(u_ll, orientation, equations)
+        f_rr = flux(u_rr, orientation, equations)
+
+        f1 = (SsR * f_ll[1] - SsL * f_rr[1] + SsL * SsR * (u_rr[1] - u_ll[1])) /
+             (SsR - SsL)
+        f2 = (SsR * f_ll[2] - SsL * f_rr[2] + SsL * SsR * (u_rr[2] - u_ll[2])) /
+             (SsR - SsL)
+        f3 = (SsR * f_ll[3] - SsL * f_rr[3] + SsL * SsR * (u_rr[3] - u_ll[3])) /
+             (SsR - SsL)
+        f4 = (SsR * f_ll[4] - SsL * f_rr[4] + SsL * SsR * (u_rr[4] - u_ll[4])) /
+             (SsR - SsL)
+    end
 
-  return SVector(f1, f2, f3, f4)
+    return SVector(f1, f2, f3, f4)
 end
 
-
 @inline function max_abs_speeds(u, equations::CompressibleEulerEquations2D)
-  rho, v1, v2, p = cons2prim(u, equations)
-  c = sqrt(equations.gamma * p / rho)
+    rho, v1, v2, p = cons2prim(u, equations)
+    c = sqrt(equations.gamma * p / rho)
 
-  return abs(v1) + c, abs(v2) + c
+    return abs(v1) + c, abs(v2) + c
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::CompressibleEulerEquations2D)
-  rho, rho_v1, rho_v2, rho_e = u
+    rho, rho_v1, rho_v2, rho_e = u
 
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
 
-  return SVector(rho, v1, v2, p)
+    return SVector(rho, v1, v2, p)
 end
 
-
 # Convert conservative variables to entropy
 @inline function cons2entropy(u, equations::CompressibleEulerEquations2D)
-  rho, rho_v1, rho_v2, rho_e = u
+    rho, rho_v1, rho_v2, rho_e = u
 
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v_square = v1^2 + v2^2
-  p = (equations.gamma - 1) * (rho_e - 0.5 * rho * v_square)
-  s = log(p) - equations.gamma*log(rho)
-  rho_p = rho / p
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v_square = v1^2 + v2^2
+    p = (equations.gamma - 1) * (rho_e - 0.5 * rho * v_square)
+    s = log(p) - equations.gamma * log(rho)
+    rho_p = rho / p
 
-  w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square
-  w2 = rho_p * v1
-  w3 = rho_p * v2
-  w4 = -rho_p
+    w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square
+    w2 = rho_p * v1
+    w3 = rho_p * v2
+    w4 = -rho_p
 
-  return SVector(w1, w2, w3, w4)
+    return SVector(w1, w2, w3, w4)
 end
 
 @inline function entropy2cons(w, equations::CompressibleEulerEquations2D)
-  # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
-  # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
-  @unpack gamma = equations
-
-  # convert to entropy `-rho * s` used by Hughes, France, Mallet (1986)
-  # instead of `-rho * s / (gamma - 1)`
-  V1, V2, V3, V5 = w .* (gamma-1)
-
-  # s = specific entropy, eq. (53)
-  s = gamma - V1 + (V2^2 + V3^2)/(2*V5)
-
-  # eq. (52)
-  rho_iota = ((gamma-1) / (-V5)^gamma)^(equations.inv_gamma_minus_one)*exp(-s * equations.inv_gamma_minus_one)
-
-  # eq. (51)
-  rho      = -rho_iota * V5
-  rho_v1   =  rho_iota * V2
-  rho_v2   =  rho_iota * V3
-  rho_e    =  rho_iota * (1-(V2^2 + V3^2)/(2*V5))
-  return SVector(rho, rho_v1, rho_v2, rho_e)
+    # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
+    # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
+    @unpack gamma = equations
+
+    # convert to entropy `-rho * s` used by Hughes, France, Mallet (1986)
+    # instead of `-rho * s / (gamma - 1)`
+    V1, V2, V3, V5 = w .* (gamma - 1)
+
+    # s = specific entropy, eq. (53)
+    s = gamma - V1 + (V2^2 + V3^2) / (2 * V5)
+
+    # eq. (52)
+    rho_iota = ((gamma - 1) / (-V5)^gamma)^(equations.inv_gamma_minus_one) *
+               exp(-s * equations.inv_gamma_minus_one)
+
+    # eq. (51)
+    rho = -rho_iota * V5
+    rho_v1 = rho_iota * V2
+    rho_v2 = rho_iota * V3
+    rho_e = rho_iota * (1 - (V2^2 + V3^2) / (2 * V5))
+    return SVector(rho, rho_v1, rho_v2, rho_e)
 end
 
-
-
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::CompressibleEulerEquations2D)
-  rho, v1, v2, p = prim
-  rho_v1 = rho * v1
-  rho_v2 = rho * v2
-  rho_e  = p * equations.inv_gamma_minus_one + 0.5 * (rho_v1 * v1 + rho_v2 * v2)
-  return SVector(rho, rho_v1, rho_v2, rho_e)
+    rho, v1, v2, p = prim
+    rho_v1 = rho * v1
+    rho_v2 = rho * v2
+    rho_e = p * equations.inv_gamma_minus_one + 0.5 * (rho_v1 * v1 + rho_v2 * v2)
+    return SVector(rho, rho_v1, rho_v2, rho_e)
 end
 
-
 @inline function density(u, equations::CompressibleEulerEquations2D)
- rho = u[1]
- return rho
+    rho = u[1]
+    return rho
 end
 
-
 @inline function pressure(u, equations::CompressibleEulerEquations2D)
- rho, rho_v1, rho_v2, rho_e = u
- p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2) / rho)
- return p
+    rho, rho_v1, rho_v2, rho_e = u
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2) / rho)
+    return p
 end
 
-
 @inline function density_pressure(u, equations::CompressibleEulerEquations2D)
- rho, rho_v1, rho_v2, rho_e = u
- rho_times_p = (equations.gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2 + rho_v2^2))
- return rho_times_p
+    rho, rho_v1, rho_v2, rho_e = u
+    rho_times_p = (equations.gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2 + rho_v2^2))
+    return rho_times_p
 end
 
-
 # Calculates the entropy flux in direction "orientation" and the entropy variables for a state cons
 # NOTE: This method seems to work currently (b82534e) but is never used anywhere. Thus it is
 # commented here until someone uses it or writes a test for it.
@@ -1394,47 +1410,42 @@ end
 #   return entropy, entropy_flux
 # end
 
-
 # Calculate thermodynamic entropy for a conservative state `cons`
 @inline function entropy_thermodynamic(cons, equations::CompressibleEulerEquations2D)
-  # Pressure
-  p = (equations.gamma - 1) * (cons[4] - 1/2 * (cons[2]^2 + cons[3]^2) / cons[1])
+    # Pressure
+    p = (equations.gamma - 1) * (cons[4] - 1 / 2 * (cons[2]^2 + cons[3]^2) / cons[1])
 
-  # Thermodynamic entropy
-  s = log(p) - equations.gamma*log(cons[1])
+    # Thermodynamic entropy
+    s = log(p) - equations.gamma * log(cons[1])
 
-  return s
+    return s
 end
 
-
 # Calculate mathematical entropy for a conservative state `cons`
 @inline function entropy_math(cons, equations::CompressibleEulerEquations2D)
-  # Mathematical entropy
-  S = -entropy_thermodynamic(cons, equations) * cons[1] * equations.inv_gamma_minus_one
+    # Mathematical entropy
+    S = -entropy_thermodynamic(cons, equations) * cons[1] *
+        equations.inv_gamma_minus_one
 
-  return S
+    return S
 end
 
-
 # Default entropy is the mathematical entropy
-@inline entropy(cons, equations::CompressibleEulerEquations2D) = entropy_math(cons, equations)
-
+@inline function entropy(cons, equations::CompressibleEulerEquations2D)
+    entropy_math(cons, equations)
+end
 
 # Calculate total energy for a conservative state `cons`
 @inline energy_total(cons, ::CompressibleEulerEquations2D) = cons[4]
 
-
 # Calculate kinetic energy for a conservative state `cons`
 @inline function energy_kinetic(u, equations::CompressibleEulerEquations2D)
-  rho, rho_v1, rho_v2, rho_e = u
-  return (rho_v1^2 + rho_v2^2) / (2 * rho)
+    rho, rho_v1, rho_v2, rho_e = u
+    return (rho_v1^2 + rho_v2^2) / (2 * rho)
 end
 
-
 # Calculate internal energy for a conservative state `cons`
 @inline function energy_internal(cons, equations::CompressibleEulerEquations2D)
-  return energy_total(cons, equations) - energy_kinetic(cons, equations)
+    return energy_total(cons, equations) - energy_kinetic(cons, equations)
 end
-
-
 end # @muladd
diff --git a/src/equations/compressible_euler_3d.jl b/src/equations/compressible_euler_3d.jl
index c56b7114669..c16a454b176 100644
--- a/src/equations/compressible_euler_3d.jl
+++ b/src/equations/compressible_euler_3d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     CompressibleEulerEquations3D(gamma)
@@ -42,20 +42,23 @@ p = (\gamma - 1) \left( \rho e - \frac{1}{2} \rho (v_1^2+v_2^2+v_3^2) \right)
 ```
 the pressure.
 """
-struct CompressibleEulerEquations3D{RealT<:Real} <: AbstractCompressibleEulerEquations{3, 5}
-  gamma::RealT               # ratio of specific heats
-  inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
-
-  function CompressibleEulerEquations3D(gamma)
-    γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1))
-    new{typeof(γ)}(γ, inv_gamma_minus_one)
-  end
+struct CompressibleEulerEquations3D{RealT <: Real} <:
+       AbstractCompressibleEulerEquations{3, 5}
+    gamma::RealT               # ratio of specific heats
+    inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
+
+    function CompressibleEulerEquations3D(gamma)
+        γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1))
+        new{typeof(γ)}(γ, inv_gamma_minus_one)
+    end
 end
 
-
-varnames(::typeof(cons2cons), ::CompressibleEulerEquations3D) = ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e")
-varnames(::typeof(cons2prim), ::CompressibleEulerEquations3D) = ("rho", "v1", "v2", "v3", "p")
-
+function varnames(::typeof(cons2cons), ::CompressibleEulerEquations3D)
+    ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e")
+end
+function varnames(::typeof(cons2prim), ::CompressibleEulerEquations3D)
+    ("rho", "v1", "v2", "v3", "p")
+end
 
 # Set initial conditions at physical location `x` for time `t`
 """
@@ -64,36 +67,36 @@ varnames(::typeof(cons2prim), ::CompressibleEulerEquations3D) = ("rho", "v1", "v
 A constant initial condition to test free-stream preservation.
 """
 function initial_condition_constant(x, t, equations::CompressibleEulerEquations3D)
-  rho = 1.0
-  rho_v1 = 0.1
-  rho_v2 = -0.2
-  rho_v3 = 0.7
-  rho_e = 10.0
-  return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e)
+    rho = 1.0
+    rho_v1 = 0.1
+    rho_v2 = -0.2
+    rho_v3 = 0.7
+    rho_e = 10.0
+    return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e)
 end
 
-
 """
     initial_condition_convergence_test(x, t, equations::CompressibleEulerEquations3D)
 
 A smooth initial condition used for convergence tests in combination with
 [`source_terms_convergence_test`](@ref).
 """
-function initial_condition_convergence_test(x, t, equations::CompressibleEulerEquations3D)
-  c = 2
-  A = 0.1
-  L = 2
-  f = 1/L
-  ω = 2 * pi * f
-  ini = c + A * sin(ω * (x[1] + x[2] + x[3] - t))
-
-  rho = ini
-  rho_v1 = ini
-  rho_v2 = ini
-  rho_v3 = ini
-  rho_e = ini^2
-
-  return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e)
+function initial_condition_convergence_test(x, t,
+                                            equations::CompressibleEulerEquations3D)
+    c = 2
+    A = 0.1
+    L = 2
+    f = 1 / L
+    ω = 2 * pi * f
+    ini = c + A * sin(ω * (x[1] + x[2] + x[3] - t))
+
+    rho = ini
+    rho_v1 = ini
+    rho_v2 = ini
+    rho_v3 = ini
+    rho_e = ini^2
+
+    return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e)
 end
 
 """
@@ -102,33 +105,33 @@ end
 Source terms used for convergence tests in combination with
 [`initial_condition_convergence_test`](@ref).
 """
-@inline function source_terms_convergence_test(u, x, t, equations::CompressibleEulerEquations3D)
-  # Same settings as in `initial_condition`
-  c = 2
-  A = 0.1
-  L = 2
-  f = 1/L
-  ω = 2 * pi * f
-  γ = equations.gamma
-
-  x1, x2, x3 = x
-  si, co = sincos(ω * (x1 + x2 + x3 - t))
-  rho = c + A * si
-  rho_x = ω * A * co
-  # Note that d/dt rho = -d/dx rho = -d/dy rho = - d/dz rho.
-
-  tmp = (2 * rho - 1.5) * (γ - 1)
-
-  du1 = 2 * rho_x
-  du2 = rho_x * (2 + tmp)
-  du3 = du2
-  du4 = du2
-  du5 = rho_x * (4 * rho + 3 * tmp)
-
-  return SVector(du1, du2, du3, du4, du5)
+@inline function source_terms_convergence_test(u, x, t,
+                                               equations::CompressibleEulerEquations3D)
+    # Same settings as in `initial_condition`
+    c = 2
+    A = 0.1
+    L = 2
+    f = 1 / L
+    ω = 2 * pi * f
+    γ = equations.gamma
+
+    x1, x2, x3 = x
+    si, co = sincos(ω * (x1 + x2 + x3 - t))
+    rho = c + A * si
+    rho_x = ω * A * co
+    # Note that d/dt rho = -d/dx rho = -d/dy rho = - d/dz rho.
+
+    tmp = (2 * rho - 1.5) * (γ - 1)
+
+    du1 = 2 * rho_x
+    du2 = rho_x * (2 + tmp)
+    du3 = du2
+    du4 = du2
+    du5 = rho_x * (4 * rho + 3 * tmp)
+
+    return SVector(du1, du2, du3, du4, du5)
 end
 
-
 """
     initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerEquations3D)
 
@@ -137,28 +140,28 @@ A weak blast wave taken from
   A provably entropy stable subcell shock capturing approach for high order split form DG
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
-function initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerEquations3D)
-  # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
-  # Set up spherical coordinates
-  inicenter = (0, 0, 0)
-  x_norm = x[1] - inicenter[1]
-  y_norm = x[2] - inicenter[2]
-  z_norm = x[3] - inicenter[3]
-  r = sqrt(x_norm^2 + y_norm^2 + z_norm^2)
-  phi   = atan(y_norm, x_norm)
-  theta = iszero(r) ? 0.0 : acos(z_norm / r)
-
-  # Calculate primitive variables
-  rho = r > 0.5 ? 1.0 : 1.1691
-  v1  = r > 0.5 ? 0.0 : 0.1882 * cos(phi) * sin(theta)
-  v2  = r > 0.5 ? 0.0 : 0.1882 * sin(phi) * sin(theta)
-  v3  = r > 0.5 ? 0.0 : 0.1882 * cos(theta)
-  p   = r > 0.5 ? 1.0 : 1.245
-
-  return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+function initial_condition_weak_blast_wave(x, t,
+                                           equations::CompressibleEulerEquations3D)
+    # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
+    # Set up spherical coordinates
+    inicenter = (0, 0, 0)
+    x_norm = x[1] - inicenter[1]
+    y_norm = x[2] - inicenter[2]
+    z_norm = x[3] - inicenter[3]
+    r = sqrt(x_norm^2 + y_norm^2 + z_norm^2)
+    phi = atan(y_norm, x_norm)
+    theta = iszero(r) ? 0.0 : acos(z_norm / r)
+
+    # Calculate primitive variables
+    rho = r > 0.5 ? 1.0 : 1.1691
+    v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi) * sin(theta)
+    v2 = r > 0.5 ? 0.0 : 0.1882 * sin(phi) * sin(theta)
+    v3 = r > 0.5 ? 0.0 : 0.1882 * cos(theta)
+    p = r > 0.5 ? 1.0 : 1.245
+
+    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
 end
 
-
 """
     initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::CompressibleEulerEquations3D)
 
@@ -169,23 +172,24 @@ Setup used for convergence tests of the Euler equations with self-gravity used i
 in combination with [`source_terms_eoc_test_coupled_euler_gravity`](@ref)
 or [`source_terms_eoc_test_euler`](@ref).
 """
-function initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::CompressibleEulerEquations3D)
-  # OBS! this assumes that γ = 2 other manufactured source terms are incorrect
-  if equations.gamma != 2.0
-    error("adiabatic constant must be 2 for the coupling convergence test")
-  end
-  c = 2.0
-  A = 0.1
-  ini = c + A * sin(pi * (x[1] + x[2] + x[3] - t))
-  G = 1.0 # gravitational constant
-
-  rho = ini
-  v1 = 1.0
-  v2 = 1.0
-  v3 = 1.0
-  p = ini^2 * G * 2 / (3 * pi) # "3" is the number of spatial dimensions
-
-  return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+function initial_condition_eoc_test_coupled_euler_gravity(x, t,
+                                                          equations::CompressibleEulerEquations3D)
+    # OBS! this assumes that γ = 2 other manufactured source terms are incorrect
+    if equations.gamma != 2.0
+        error("adiabatic constant must be 2 for the coupling convergence test")
+    end
+    c = 2.0
+    A = 0.1
+    ini = c + A * sin(pi * (x[1] + x[2] + x[3] - t))
+    G = 1.0 # gravitational constant
+
+    rho = ini
+    v1 = 1.0
+    v2 = 1.0
+    v3 = 1.0
+    p = ini^2 * G * 2 / (3 * pi) # "3" is the number of spatial dimensions
+
+    return prim2cons(SVector(rho, v1, v2, v3, p), equations)
 end
 
 """
@@ -197,27 +201,28 @@ Setup used for convergence tests of the Euler equations with self-gravity used i
   [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593)
 in combination with [`initial_condition_eoc_test_coupled_euler_gravity`](@ref).
 """
-@inline function source_terms_eoc_test_coupled_euler_gravity(u, x, t, equations::CompressibleEulerEquations3D)
-  # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity`
-  c = 2.0
-  A = 0.1
-  G = 1.0 # gravitational constant, must match coupling solver
-  C_grav = -4 * G / (3 * pi) # "3" is the number of spatial dimensions  # 2D: -2.0*G/pi
-
-  x1, x2, x3 = x
-  # TODO: sincospi
-  si, co = sincos(pi * (x1 + x2 + x3 - t))
-  rhox = A * pi * co
-  rho  = c + A * si
-
-  # In "2 * rhox", the "2" is "number of spatial dimensions minus one"
-  du1 = 2 * rhox
-  du2 = 2 * rhox
-  du3 = 2 * rhox
-  du4 = 2 * rhox
-  du5 = 2 * rhox * (3/2 - C_grav*rho) # "3" in "3/2" is the number of spatial dimensions
-
-  return SVector(du1, du2, du3, du4, du5)
+@inline function source_terms_eoc_test_coupled_euler_gravity(u, x, t,
+                                                             equations::CompressibleEulerEquations3D)
+    # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity`
+    c = 2.0
+    A = 0.1
+    G = 1.0 # gravitational constant, must match coupling solver
+    C_grav = -4 * G / (3 * pi) # "3" is the number of spatial dimensions  # 2D: -2.0*G/pi
+
+    x1, x2, x3 = x
+    # TODO: sincospi
+    si, co = sincos(pi * (x1 + x2 + x3 - t))
+    rhox = A * pi * co
+    rho = c + A * si
+
+    # In "2 * rhox", the "2" is "number of spatial dimensions minus one"
+    du1 = 2 * rhox
+    du2 = 2 * rhox
+    du3 = 2 * rhox
+    du4 = 2 * rhox
+    du5 = 2 * rhox * (3 / 2 - C_grav * rho) # "3" in "3/2" is the number of spatial dimensions
+
+    return SVector(du1, du2, du3, du4, du5)
 end
 
 """
@@ -235,28 +240,27 @@ in combination with [`initial_condition_eoc_test_coupled_euler_gravity`](@ref).
     [`source_terms_eoc_test_coupled_euler_gravity`](@ref) instead.
 """
 function source_terms_eoc_test_euler(u, x, t, equations::CompressibleEulerEquations3D)
-  # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity`
-  c = 2.0
-  A = 0.1
-  G = 1.0
-  C_grav = -4 * G / (3 * pi) # "3" is the number of spatial dimensions
-
-  x1, x2, x3 = x
-  # TODO: sincospi
-  si, co = sincos(pi * (x1 + x2 + x3 - t))
-  rhox = A * pi * co
-  rho  = c + A *  si
-
-  du1 = rhox *  2
-  du2 = rhox * (2 -     C_grav * rho)
-  du3 = rhox * (2 -     C_grav * rho)
-  du4 = rhox * (2 -     C_grav * rho)
-  du5 = rhox * (3 - 5 * C_grav * rho)
-
-  return SVector(du1, du2, du3, du4, du5)
+    # Same settings as in `initial_condition_eoc_test_coupled_euler_gravity`
+    c = 2.0
+    A = 0.1
+    G = 1.0
+    C_grav = -4 * G / (3 * pi) # "3" is the number of spatial dimensions
+
+    x1, x2, x3 = x
+    # TODO: sincospi
+    si, co = sincos(pi * (x1 + x2 + x3 - t))
+    rhox = A * pi * co
+    rho = c + A * si
+
+    du1 = rhox * 2
+    du2 = rhox * (2 - C_grav * rho)
+    du3 = rhox * (2 - C_grav * rho)
+    du4 = rhox * (2 - C_grav * rho)
+    du5 = rhox * (3 - 5 * C_grav * rho)
+
+    return SVector(du1, du2, du3, du4, du5)
 end
 
-
 """
     boundary_condition_slip_wall(u_inner, normal_direction, x, t, surface_flux_function,
                                  equations::CompressibleEulerEquations3D)
@@ -281,46 +285,50 @@ Details about the 1D pressure Riemann solution can be found in Section 6.3.3 of
                                               x, t,
                                               surface_flux_function,
                                               equations::CompressibleEulerEquations3D)
+    norm_ = norm(normal_direction)
+    # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later
+    normal = normal_direction / norm_
+
+    # Some vector that can't be identical to normal_vector (unless normal_vector == 0)
+    tangent1 = SVector(normal_direction[2], normal_direction[3], -normal_direction[1])
+    # Orthogonal projection
+    tangent1 -= dot(normal, tangent1) * normal
+    tangent1 = normalize(tangent1)
+
+    # Third orthogonal vector
+    tangent2 = normalize(cross(normal_direction, tangent1))
+
+    # rotate the internal solution state
+    u_local = rotate_to_x(u_inner, normal, tangent1, tangent2, equations)
+
+    # compute the primitive variables
+    rho_local, v_normal, v_tangent1, v_tangent2, p_local = cons2prim(u_local, equations)
+
+    # Get the solution of the pressure Riemann problem
+    # See Section 6.3.3 of
+    # Eleuterio F. Toro (2009)
+    # Riemann Solvers and Numerical Methods for Fluid Dynamics: A Practical Introduction
+    # [DOI: 10.1007/b79761](https://doi.org/10.1007/b79761)
+    if v_normal <= 0.0
+        sound_speed = sqrt(equations.gamma * p_local / rho_local) # local sound speed
+        p_star = p_local *
+                 (1 + 0.5 * (equations.gamma - 1) * v_normal / sound_speed)^(2 *
+                                                                             equations.gamma *
+                                                                             equations.inv_gamma_minus_one)
+    else # v_normal > 0.0
+        A = 2 / ((equations.gamma + 1) * rho_local)
+        B = p_local * (equations.gamma - 1) / (equations.gamma + 1)
+        p_star = p_local +
+                 0.5 * v_normal / A *
+                 (v_normal + sqrt(v_normal^2 + 4 * A * (p_local + B)))
+    end
 
-  norm_ = norm(normal_direction)
-  # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later
-  normal = normal_direction / norm_
-
-  # Some vector that can't be identical to normal_vector (unless normal_vector == 0)
-  tangent1 = SVector(normal_direction[2], normal_direction[3], -normal_direction[1])
-  # Orthogonal projection
-  tangent1 -= dot(normal, tangent1) * normal
-  tangent1 = normalize(tangent1)
-
-  # Third orthogonal vector
-  tangent2 = normalize(cross(normal_direction, tangent1))
-
-  # rotate the internal solution state
-  u_local = rotate_to_x(u_inner, normal, tangent1, tangent2, equations)
-
-  # compute the primitive variables
-  rho_local, v_normal, v_tangent1, v_tangent2, p_local = cons2prim(u_local, equations)
-
-  # Get the solution of the pressure Riemann problem
-  # See Section 6.3.3 of
-  # Eleuterio F. Toro (2009)
-  # Riemann Solvers and Numerical Methods for Fluid Dynamics: A Practical Introduction
-  # [DOI: 10.1007/b79761](https://doi.org/10.1007/b79761)
-  if v_normal <= 0.0
-    sound_speed = sqrt(equations.gamma * p_local / rho_local) # local sound speed
-    p_star = p_local * (1 + 0.5 * (equations.gamma - 1) * v_normal / sound_speed)^(2 * equations.gamma * equations.inv_gamma_minus_one)
-  else # v_normal > 0.0
-    A = 2 / ((equations.gamma + 1) * rho_local)
-    B = p_local * (equations.gamma - 1) / (equations.gamma + 1)
-    p_star = p_local + 0.5 * v_normal / A * (v_normal + sqrt(v_normal^2 + 4 * A * (p_local + B)))
-  end
-
-  # For the slip wall we directly set the flux as the normal velocity is zero
-  return SVector(zero(eltype(u_inner)),
-                 p_star * normal[1],
-                 p_star * normal[2],
-                 p_star * normal[3],
-                 zero(eltype(u_inner))) * norm_
+    # For the slip wall we directly set the flux as the normal velocity is zero
+    return SVector(zero(eltype(u_inner)),
+                   p_star * normal[1],
+                   p_star * normal[2],
+                   p_star * normal[3],
+                   zero(eltype(u_inner))) * norm_
 end
 
 """
@@ -333,18 +341,18 @@ Should be used together with [`TreeMesh`](@ref).
                                               direction, x, t,
                                               surface_flux_function,
                                               equations::CompressibleEulerEquations3D)
-  # get the appropriate normal vector from the orientation
-  if orientation == 1
-    normal_direction = SVector(1.0, 0.0, 0.0)
-  elseif orientation == 2
-    normal_direction = SVector(0.0, 1.0, 0.0)
-  else # orientation == 3
-    normal_direction = SVector(0.0, 0.0, 1.0)
-  end
-
-  # compute and return the flux using `boundary_condition_slip_wall` routine above
-  return boundary_condition_slip_wall(u_inner, normal_direction, direction,
-                                      x, t, surface_flux_function, equations)
+    # get the appropriate normal vector from the orientation
+    if orientation == 1
+        normal_direction = SVector(1.0, 0.0, 0.0)
+    elseif orientation == 2
+        normal_direction = SVector(0.0, 1.0, 0.0)
+    else # orientation == 3
+        normal_direction = SVector(0.0, 0.0, 1.0)
+    end
+
+    # compute and return the flux using `boundary_condition_slip_wall` routine above
+    return boundary_condition_slip_wall(u_inner, normal_direction, direction,
+                                        x, t, surface_flux_function, equations)
 end
 
 """
@@ -357,63 +365,66 @@ Should be used together with [`StructuredMesh`](@ref).
                                               direction, x, t,
                                               surface_flux_function,
                                               equations::CompressibleEulerEquations3D)
-  # flip sign of normal to make it outward pointing, then flip the sign of the normal flux back
-  # to be inward pointing on the -x, -y, and -z sides due to the orientation convention used by StructuredMesh
-  if isodd(direction)
-    boundary_flux = -boundary_condition_slip_wall(u_inner, -normal_direction,
-                                                  x, t, surface_flux_function, equations)
-  else
-    boundary_flux = boundary_condition_slip_wall(u_inner, normal_direction,
-                                                 x, t, surface_flux_function, equations)
-  end
-
-  return boundary_flux
+    # flip sign of normal to make it outward pointing, then flip the sign of the normal flux back
+    # to be inward pointing on the -x, -y, and -z sides due to the orientation convention used by StructuredMesh
+    if isodd(direction)
+        boundary_flux = -boundary_condition_slip_wall(u_inner, -normal_direction,
+                                                      x, t, surface_flux_function,
+                                                      equations)
+    else
+        boundary_flux = boundary_condition_slip_wall(u_inner, normal_direction,
+                                                     x, t, surface_flux_function,
+                                                     equations)
+    end
+
+    return boundary_flux
 end
 
 # Calculate 1D flux for a single point
 @inline function flux(u, orientation::Integer, equations::CompressibleEulerEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
-  if orientation == 1
-    f1 = rho_v1
-    f2 = rho_v1 * v1 + p
-    f3 = rho_v1 * v2
-    f4 = rho_v1 * v3
-    f5 = (rho_e + p) * v1
-  elseif orientation == 2
-    f1 = rho_v2
-    f2 = rho_v2 * v1
-    f3 = rho_v2 * v2 + p
-    f4 = rho_v2 * v3
-    f5 = (rho_e + p) * v2
-  else
-    f1 = rho_v3
-    f2 = rho_v3 * v1
-    f3 = rho_v3 * v2
-    f4 = rho_v3 * v3 + p
-    f5 = (rho_e + p) * v3
-  end
-  return SVector(f1, f2, f3, f4, f5)
+    rho, rho_v1, rho_v2, rho_v3, rho_e = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    p = (equations.gamma - 1) *
+        (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
+    if orientation == 1
+        f1 = rho_v1
+        f2 = rho_v1 * v1 + p
+        f3 = rho_v1 * v2
+        f4 = rho_v1 * v3
+        f5 = (rho_e + p) * v1
+    elseif orientation == 2
+        f1 = rho_v2
+        f2 = rho_v2 * v1
+        f3 = rho_v2 * v2 + p
+        f4 = rho_v2 * v3
+        f5 = (rho_e + p) * v2
+    else
+        f1 = rho_v3
+        f2 = rho_v3 * v1
+        f3 = rho_v3 * v2
+        f4 = rho_v3 * v3 + p
+        f5 = (rho_e + p) * v3
+    end
+    return SVector(f1, f2, f3, f4, f5)
 end
 
-@inline function flux(u, normal::AbstractVector, equations::CompressibleEulerEquations3D)
-  rho_e = last(u)
-  rho, v1, v2, v3, p = cons2prim(u, equations)
-
-  v_normal = v1 * normal[1] + v2 * normal[2] + v3 * normal[3]
-  rho_v_normal = rho * v_normal
-  f1 = rho_v_normal
-  f2 = rho_v_normal * v1 + p * normal[1]
-  f3 = rho_v_normal * v2 + p * normal[2]
-  f4 = rho_v_normal * v3 + p * normal[3]
-  f5 = (rho_e + p) * v_normal
-  return SVector(f1, f2, f3, f4, f5)
+@inline function flux(u, normal::AbstractVector,
+                      equations::CompressibleEulerEquations3D)
+    rho_e = last(u)
+    rho, v1, v2, v3, p = cons2prim(u, equations)
+
+    v_normal = v1 * normal[1] + v2 * normal[2] + v3 * normal[3]
+    rho_v_normal = rho * v_normal
+    f1 = rho_v_normal
+    f2 = rho_v_normal * v1 + p * normal[1]
+    f3 = rho_v_normal * v2 + p * normal[2]
+    f4 = rho_v_normal * v3 + p * normal[3]
+    f5 = (rho_e + p) * v_normal
+    return SVector(f1, f2, f3, f4, f5)
 end
 
-
 """
     flux_shima_etal(u_ll, u_rr, orientation_or_normal_direction,
                     equations::CompressibleEulerEquations3D)
@@ -430,73 +441,77 @@ The modification is in the energy flux to guarantee pressure equilibrium and was
   compressible flows
   [DOI: 10.1016/j.jcp.2020.110060](https://doi.org/10.1016/j.jcp.2020.110060)
 """
-@inline function flux_shima_etal(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D)
-  # Unpack left and right state
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
-
-  # Average each factor of products in flux
-  rho_avg = 1/2 * (rho_ll + rho_rr)
-  v1_avg  = 1/2 * ( v1_ll +  v1_rr)
-  v2_avg  = 1/2 * ( v2_ll +  v2_rr)
-  v3_avg  = 1/2 * ( v3_ll +  v3_rr)
-  p_avg   = 1/2 * (  p_ll +   p_rr)
-  kin_avg = 1/2 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr)
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    pv1_avg = 1/2 * (p_ll*v1_rr + p_rr*v1_ll)
-    f1 = rho_avg * v1_avg
-    f2 = f1 * v1_avg + p_avg
-    f3 = f1 * v2_avg
-    f4 = f1 * v3_avg
-    f5 = p_avg*v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg
-  elseif orientation == 2
-    pv2_avg = 1/2 * (p_ll*v2_rr + p_rr*v2_ll)
-    f1 = rho_avg * v2_avg
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg + p_avg
-    f4 = f1 * v3_avg
-    f5 = p_avg*v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg
-  else
-    pv3_avg = 1/2 * (p_ll*v3_rr + p_rr*v3_ll)
-    f1 = rho_avg * v3_avg
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg
-    f4 = f1 * v3_avg + p_avg
-    f5 = p_avg*v3_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv3_avg
-  end
-
-  return SVector(f1, f2, f3, f4, f5)
-end
+@inline function flux_shima_etal(u_ll, u_rr, orientation::Integer,
+                                 equations::CompressibleEulerEquations3D)
+    # Unpack left and right state
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+
+    # Average each factor of products in flux
+    rho_avg = 1 / 2 * (rho_ll + rho_rr)
+    v1_avg = 1 / 2 * (v1_ll + v1_rr)
+    v2_avg = 1 / 2 * (v2_ll + v2_rr)
+    v3_avg = 1 / 2 * (v3_ll + v3_rr)
+    p_avg = 1 / 2 * (p_ll + p_rr)
+    kin_avg = 1 / 2 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        pv1_avg = 1 / 2 * (p_ll * v1_rr + p_rr * v1_ll)
+        f1 = rho_avg * v1_avg
+        f2 = f1 * v1_avg + p_avg
+        f3 = f1 * v2_avg
+        f4 = f1 * v3_avg
+        f5 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg
+    elseif orientation == 2
+        pv2_avg = 1 / 2 * (p_ll * v2_rr + p_rr * v2_ll)
+        f1 = rho_avg * v2_avg
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg + p_avg
+        f4 = f1 * v3_avg
+        f5 = p_avg * v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg
+    else
+        pv3_avg = 1 / 2 * (p_ll * v3_rr + p_rr * v3_ll)
+        f1 = rho_avg * v3_avg
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg
+        f4 = f1 * v3_avg + p_avg
+        f5 = p_avg * v3_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv3_avg
+    end
 
-@inline function flux_shima_etal(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations3D)
-  # Unpack left and right state
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
-  v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v3_ll * normal_direction[3]
-  v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + v3_rr * normal_direction[3]
-
-  # Average each factor of products in flux
-  rho_avg = 1/2 * (rho_ll + rho_rr)
-  v1_avg  = 1/2 * ( v1_ll +  v1_rr)
-  v2_avg  = 1/2 * ( v2_ll +  v2_rr)
-  v3_avg  = 1/2 * ( v3_ll +  v3_rr)
-  v_dot_n_avg = 1/2 * (v_dot_n_ll + v_dot_n_rr)
-  p_avg   = 1/2 * (  p_ll +   p_rr)
-  velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr)
-
-  # Calculate fluxes depending on normal_direction
-  f1 = rho_avg * v_dot_n_avg
-  f2 = f1 * v1_avg + p_avg * normal_direction[1]
-  f3 = f1 * v2_avg + p_avg * normal_direction[2]
-  f4 = f1 * v3_avg + p_avg * normal_direction[3]
-  f5 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
-        + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-  return SVector(f1, f2, f3, f4, f5)
+    return SVector(f1, f2, f3, f4, f5)
 end
 
+@inline function flux_shima_etal(u_ll, u_rr, normal_direction::AbstractVector,
+                                 equations::CompressibleEulerEquations3D)
+    # Unpack left and right state
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+    v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] +
+                 v3_ll * normal_direction[3]
+    v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] +
+                 v3_rr * normal_direction[3]
+
+    # Average each factor of products in flux
+    rho_avg = 1 / 2 * (rho_ll + rho_rr)
+    v1_avg = 1 / 2 * (v1_ll + v1_rr)
+    v2_avg = 1 / 2 * (v2_ll + v2_rr)
+    v3_avg = 1 / 2 * (v3_ll + v3_rr)
+    v_dot_n_avg = 1 / 2 * (v_dot_n_ll + v_dot_n_rr)
+    p_avg = 1 / 2 * (p_ll + p_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+    # Calculate fluxes depending on normal_direction
+    f1 = rho_avg * v_dot_n_avg
+    f2 = f1 * v1_avg + p_avg * normal_direction[1]
+    f3 = f1 * v2_avg + p_avg * normal_direction[2]
+    f4 = f1 * v3_avg + p_avg * normal_direction[3]
+    f5 = (f1 * velocity_square_avg +
+          p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
+          + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+    return SVector(f1, f2, f3, f4, f5)
+end
 
 """
     flux_kennedy_gruber(u_ll, u_rr, orientation_or_normal_direction,
@@ -508,79 +523,83 @@ Kinetic energy preserving two-point flux by
   Navier-Stokes equations for a compressible fluid
   [DOI: 10.1016/j.jcp.2007.09.020](https://doi.org/10.1016/j.jcp.2007.09.020)
 """
-@inline function flux_kennedy_gruber(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D)
-  # Unpack left and right state
-  rho_e_ll = last(u_ll)
-  rho_e_rr = last(u_rr)
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
-
-  # Average each factor of products in flux
-  rho_avg = 0.5 * (rho_ll + rho_rr)
-  v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-  v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-  v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-  p_avg   = 0.5 * (  p_ll +   p_rr)
-  e_avg   = 0.5 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr)
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    f1 = rho_avg * v1_avg
-    f2 = f1 * v1_avg + p_avg
-    f3 = f1 * v2_avg
-    f4 = f1 * v3_avg
-    f5 = (rho_avg * e_avg + p_avg) * v1_avg
-  elseif orientation == 2
-    f1 = rho_avg * v2_avg
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg + p_avg
-    f4 = f1 * v3_avg
-    f5 = (rho_avg * e_avg + p_avg) * v2_avg
-  else
-    f1 = rho_avg * v3_avg
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg
-    f4 = f1 * v3_avg + p_avg
-    f5 = (rho_avg * e_avg + p_avg) * v3_avg
-  end
-
-  return SVector(f1, f2, f3, f4, f5)
-end
+@inline function flux_kennedy_gruber(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerEquations3D)
+    # Unpack left and right state
+    rho_e_ll = last(u_ll)
+    rho_e_rr = last(u_rr)
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+
+    # Average each factor of products in flux
+    rho_avg = 0.5 * (rho_ll + rho_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    e_avg = 0.5 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr)
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        f1 = rho_avg * v1_avg
+        f2 = f1 * v1_avg + p_avg
+        f3 = f1 * v2_avg
+        f4 = f1 * v3_avg
+        f5 = (rho_avg * e_avg + p_avg) * v1_avg
+    elseif orientation == 2
+        f1 = rho_avg * v2_avg
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg + p_avg
+        f4 = f1 * v3_avg
+        f5 = (rho_avg * e_avg + p_avg) * v2_avg
+    else
+        f1 = rho_avg * v3_avg
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg
+        f4 = f1 * v3_avg + p_avg
+        f5 = (rho_avg * e_avg + p_avg) * v3_avg
+    end
 
-@inline function flux_kennedy_gruber(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations3D)
-  # Unpack left and right state
-  rho_e_ll = last(u_ll)
-  rho_e_rr = last(u_rr)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr = u_rr
-
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
-  v3_rr = rho_v3_rr / rho_rr
-
-  # Average each factor of products in flux
-  rho_avg = 0.5 * (rho_ll + rho_rr)
-  v1_avg  = 0.5 * (v1_ll + v1_rr)
-  v2_avg  = 0.5 * (v2_ll + v2_rr)
-  v3_avg  = 0.5 * (v3_ll + v3_rr)
-  v_dot_n_avg = v1_avg * normal_direction[1] + v2_avg * normal_direction[2] + v3_avg * normal_direction[3]
-  p_avg = 0.5 * ((equations.gamma - 1) * (rho_e_ll - 0.5 * rho_ll * (v1_ll^2 + v2_ll^2 + v3_ll^2)) +
-                 (equations.gamma - 1) * (rho_e_rr - 0.5 * rho_rr * (v1_rr^2 + v2_rr^2 + v3_rr^2)))
-  e_avg = 0.5 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr)
-
-  # Calculate fluxes depending on normal_direction
-  f1 = rho_avg * v_dot_n_avg
-  f2 = f1 * v1_avg + p_avg * normal_direction[1]
-  f3 = f1 * v2_avg + p_avg * normal_direction[2]
-  f4 = f1 * v3_avg + p_avg * normal_direction[3]
-  f5 = f1 * e_avg + p_avg * v_dot_n_avg
-
-  return SVector(f1, f2, f3, f4, f5)
+    return SVector(f1, f2, f3, f4, f5)
 end
 
+@inline function flux_kennedy_gruber(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::CompressibleEulerEquations3D)
+    # Unpack left and right state
+    rho_e_ll = last(u_ll)
+    rho_e_rr = last(u_rr)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr = u_rr
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+
+    # Average each factor of products in flux
+    rho_avg = 0.5 * (rho_ll + rho_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    v_dot_n_avg = v1_avg * normal_direction[1] + v2_avg * normal_direction[2] +
+                  v3_avg * normal_direction[3]
+    p_avg = 0.5 * ((equations.gamma - 1) *
+             (rho_e_ll - 0.5 * rho_ll * (v1_ll^2 + v2_ll^2 + v3_ll^2)) +
+             (equations.gamma - 1) *
+             (rho_e_rr - 0.5 * rho_rr * (v1_rr^2 + v2_rr^2 + v3_rr^2)))
+    e_avg = 0.5 * (rho_e_ll / rho_ll + rho_e_rr / rho_rr)
+
+    # Calculate fluxes depending on normal_direction
+    f1 = rho_avg * v_dot_n_avg
+    f2 = f1 * v1_avg + p_avg * normal_direction[1]
+    f3 = f1 * v2_avg + p_avg * normal_direction[2]
+    f4 = f1 * v3_avg + p_avg * normal_direction[3]
+    f5 = f1 * e_avg + p_avg * v_dot_n_avg
+
+    return SVector(f1, f2, f3, f4, f5)
+end
 
 """
     flux_chandrashekar(u_ll, u_rr, orientation, equations::CompressibleEulerEquations3D)
@@ -591,51 +610,54 @@ Entropy conserving two-point flux by
   for Compressible Euler and Navier-Stokes Equations
   [DOI: 10.4208/cicp.170712.010313a](https://doi.org/10.4208/cicp.170712.010313a)
 """
-@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D)
-  # Unpack left and right state
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
-
-  beta_ll = 0.5 * rho_ll / p_ll
-  beta_rr = 0.5 * rho_rr / p_rr
-  specific_kin_ll = 0.5 * (v1_ll^2 + v2_ll^2 + v3_ll^2)
-  specific_kin_rr = 0.5 * (v1_rr^2 + v2_rr^2 + v3_rr^2)
-
-  # Compute the necessary mean values
-  rho_avg = 0.5 * (rho_ll + rho_rr)
-  rho_mean  = ln_mean(rho_ll,  rho_rr)
-  beta_mean = ln_mean(beta_ll, beta_rr)
-  beta_avg = 0.5 * (beta_ll + beta_rr)
-  v1_avg = 0.5 * (v1_ll + v1_rr)
-  v2_avg = 0.5 * (v2_ll + v2_rr)
-  v3_avg = 0.5 * (v3_ll + v3_rr)
-  p_mean = 0.5 * rho_avg / beta_avg
-  velocity_square_avg = specific_kin_ll + specific_kin_rr
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    f1 = rho_mean * v1_avg
-    f2 = f1 * v1_avg + p_mean
-    f3 = f1 * v2_avg
-    f4 = f1 * v3_avg
-    f5 = f1 * 0.5*(1/(equations.gamma-1)/beta_mean - velocity_square_avg)+ f2*v1_avg + f3*v2_avg + f4*v3_avg
-  elseif orientation == 2
-    f1 = rho_mean * v2_avg
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg + p_mean
-    f4 = f1 * v3_avg
-    f5 = f1 * 0.5*(1/(equations.gamma-1)/beta_mean - velocity_square_avg)+ f2*v1_avg + f3*v2_avg + f4*v3_avg
-  else
-    f1 = rho_mean * v3_avg
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg
-    f4 = f1 * v3_avg + p_mean
-    f5 = f1 * 0.5*(1/(equations.gamma-1)/beta_mean - velocity_square_avg)+ f2*v1_avg + f3*v2_avg + f4*v3_avg
-  end
-
-  return SVector(f1, f2, f3, f4, f5)
-end
+@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer,
+                                    equations::CompressibleEulerEquations3D)
+    # Unpack left and right state
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+
+    beta_ll = 0.5 * rho_ll / p_ll
+    beta_rr = 0.5 * rho_rr / p_rr
+    specific_kin_ll = 0.5 * (v1_ll^2 + v2_ll^2 + v3_ll^2)
+    specific_kin_rr = 0.5 * (v1_rr^2 + v2_rr^2 + v3_rr^2)
+
+    # Compute the necessary mean values
+    rho_avg = 0.5 * (rho_ll + rho_rr)
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    beta_mean = ln_mean(beta_ll, beta_rr)
+    beta_avg = 0.5 * (beta_ll + beta_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_mean = 0.5 * rho_avg / beta_avg
+    velocity_square_avg = specific_kin_ll + specific_kin_rr
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        f1 = rho_mean * v1_avg
+        f2 = f1 * v1_avg + p_mean
+        f3 = f1 * v2_avg
+        f4 = f1 * v3_avg
+        f5 = f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - velocity_square_avg) +
+             f2 * v1_avg + f3 * v2_avg + f4 * v3_avg
+    elseif orientation == 2
+        f1 = rho_mean * v2_avg
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg + p_mean
+        f4 = f1 * v3_avg
+        f5 = f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - velocity_square_avg) +
+             f2 * v1_avg + f3 * v2_avg + f4 * v3_avg
+    else
+        f1 = rho_mean * v3_avg
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg
+        f4 = f1 * v3_avg + p_mean
+        f5 = f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - velocity_square_avg) +
+             f2 * v1_avg + f3 * v2_avg + f4 * v3_avg
+    end
 
+    return SVector(f1, f2, f3, f4, f5)
+end
 
 """
     flux_ranocha(u_ll, u_rr, orientation_or_normal_direction,
@@ -652,79 +674,89 @@ See also
   the Euler Equations Using Summation-by-Parts Operators
   [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42)
 """
-@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D)
-  # Unpack left and right state
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
-
-  # Compute the necessary mean values
-  rho_mean = ln_mean(rho_ll, rho_rr)
-  # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-  # in exact arithmetic since
-  #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-  #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-  inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-  v1_avg = 0.5 * (v1_ll + v1_rr)
-  v2_avg = 0.5 * (v2_ll + v2_rr)
-  v3_avg = 0.5 * (v3_ll + v3_rr)
-  p_avg  = 0.5 * (p_ll + p_rr)
-  velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr)
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    f1 = rho_mean * v1_avg
-    f2 = f1 * v1_avg + p_avg
-    f3 = f1 * v2_avg
-    f4 = f1 * v3_avg
-    f5 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll)
-  elseif orientation == 2
-    f1 = rho_mean * v2_avg
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg + p_avg
-    f4 = f1 * v3_avg
-    f5 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v2_rr + p_rr*v2_ll)
-  else # orientation == 3
-    f1 = rho_mean * v3_avg
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg
-    f4 = f1 * v3_avg + p_avg
-    f5 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v3_rr + p_rr*v3_ll)
-  end
-
-  return SVector(f1, f2, f3, f4, f5)
-end
+@inline function flux_ranocha(u_ll, u_rr, orientation::Integer,
+                              equations::CompressibleEulerEquations3D)
+    # Unpack left and right state
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+
+    # Compute the necessary mean values
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+    # in exact arithmetic since
+    #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+    #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        f1 = rho_mean * v1_avg
+        f2 = f1 * v1_avg + p_avg
+        f3 = f1 * v2_avg
+        f4 = f1 * v3_avg
+        f5 = f1 *
+             (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) +
+             0.5 * (p_ll * v1_rr + p_rr * v1_ll)
+    elseif orientation == 2
+        f1 = rho_mean * v2_avg
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg + p_avg
+        f4 = f1 * v3_avg
+        f5 = f1 *
+             (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) +
+             0.5 * (p_ll * v2_rr + p_rr * v2_ll)
+    else # orientation == 3
+        f1 = rho_mean * v3_avg
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg
+        f4 = f1 * v3_avg + p_avg
+        f5 = f1 *
+             (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) +
+             0.5 * (p_ll * v3_rr + p_rr * v3_ll)
+    end
 
-@inline function flux_ranocha(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations3D)
-  # Unpack left and right state
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
-  v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v3_ll * normal_direction[3]
-  v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + v3_rr * normal_direction[3]
-
-  # Compute the necessary mean values
-  rho_mean = ln_mean(rho_ll, rho_rr)
-  # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-  # in exact arithmetic since
-  #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-  #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-  inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-  v1_avg = 0.5 * (v1_ll + v1_rr)
-  v2_avg = 0.5 * (v2_ll + v2_rr)
-  v3_avg = 0.5 * (v3_ll + v3_rr)
-  p_avg  = 0.5 * (p_ll + p_rr)
-  velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr)
-
-  # Calculate fluxes depending on normal_direction
-  f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
-  f2 = f1 * v1_avg + p_avg * normal_direction[1]
-  f3 = f1 * v2_avg + p_avg * normal_direction[2]
-  f4 = f1 * v3_avg + p_avg * normal_direction[3]
-  f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-      + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-  return SVector(f1, f2, f3, f4, f5)
+    return SVector(f1, f2, f3, f4, f5)
 end
 
+@inline function flux_ranocha(u_ll, u_rr, normal_direction::AbstractVector,
+                              equations::CompressibleEulerEquations3D)
+    # Unpack left and right state
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+    v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] +
+                 v3_ll * normal_direction[3]
+    v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] +
+                 v3_rr * normal_direction[3]
+
+    # Compute the necessary mean values
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+    # in exact arithmetic since
+    #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+    #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+    # Calculate fluxes depending on normal_direction
+    f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
+    f2 = f1 * v1_avg + p_avg * normal_direction[1]
+    f3 = f1 * v2_avg + p_avg * normal_direction[2]
+    f4 = f1 * v3_avg + p_avg * normal_direction[3]
+    f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+          +
+          0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+    return SVector(f1, f2, f3, f4, f5)
+end
 
 """
     splitting_steger_warming(u, orientation::Integer,
@@ -752,146 +784,153 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
 """
 @inline function splitting_steger_warming(u, orientation::Integer,
                                           equations::CompressibleEulerEquations3D)
-  fm = splitting_steger_warming(u, Val{:minus}(), orientation, equations)
-  fp = splitting_steger_warming(u, Val{:plus}(),  orientation, equations)
-  return fm, fp
+    fm = splitting_steger_warming(u, Val{:minus}(), orientation, equations)
+    fp = splitting_steger_warming(u, Val{:plus}(), orientation, equations)
+    return fm, fp
 end
 
 @inline function splitting_steger_warming(u, ::Val{:plus}, orientation::Integer,
                                           equations::CompressibleEulerEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
-  a = sqrt(equations.gamma * p / rho)
-
-  if orientation == 1
-    lambda1 = v1
-    lambda2 = v1 + a
-    lambda3 = v1 - a
-
-    lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :)
-    lambda2_p = positive_part(lambda2)
-    lambda3_p = positive_part(lambda3)
-
-    alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p
-
-    rho_2gamma = 0.5 * rho / equations.gamma
-    f1p = rho_2gamma * alpha_p
-    f2p = rho_2gamma * (alpha_p * v1 + a * (lambda2_p - lambda3_p))
-    f3p = rho_2gamma * alpha_p * v2
-    f4p = rho_2gamma * alpha_p * v3
-    f5p = rho_2gamma * (alpha_p * 0.5 * (v1^2 + v2^2 + v3^2) + a * v1 * (lambda2_p - lambda3_p)
-                        + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one)
-  elseif orientation == 2
-    lambda1 = v2
-    lambda2 = v2 + a
-    lambda3 = v2 - a
-
-    lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :)
-    lambda2_p = positive_part(lambda2)
-    lambda3_p = positive_part(lambda3)
-
-    alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p
-
-    rho_2gamma = 0.5 * rho / equations.gamma
-    f1p = rho_2gamma * alpha_p
-    f2p = rho_2gamma * alpha_p * v1
-    f3p = rho_2gamma * (alpha_p * v2 + a * (lambda2_p - lambda3_p))
-    f4p = rho_2gamma * alpha_p * v3
-    f5p = rho_2gamma * (alpha_p * 0.5 * (v1^2 + v2^2 + v3^2) + a * v2 * (lambda2_p - lambda3_p)
-                        + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one)
-  else # orientation == 3
-    lambda1 = v3
-    lambda2 = v3 + a
-    lambda3 = v3 - a
-
-    lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :)
-    lambda2_p = positive_part(lambda2)
-    lambda3_p = positive_part(lambda3)
-
-    alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p
-
-    rho_2gamma = 0.5 * rho / equations.gamma
-    f1p = rho_2gamma * alpha_p
-    f2p = rho_2gamma * alpha_p * v1
-    f3p = rho_2gamma * alpha_p * v2
-    f4p = rho_2gamma * (alpha_p * v3 + a * (lambda2_p - lambda3_p))
-    f5p = rho_2gamma * (alpha_p * 0.5 * (v1^2 + v2^2 + v3^2) + a * v3 * (lambda2_p - lambda3_p)
-                        + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one)
-  end
-  return SVector(f1p, f2p, f3p, f4p, f5p)
+    rho, rho_v1, rho_v2, rho_v3, rho_e = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    p = (equations.gamma - 1) *
+        (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
+    a = sqrt(equations.gamma * p / rho)
+
+    if orientation == 1
+        lambda1 = v1
+        lambda2 = v1 + a
+        lambda3 = v1 - a
+
+        lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :)
+        lambda2_p = positive_part(lambda2)
+        lambda3_p = positive_part(lambda3)
+
+        alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p
+
+        rho_2gamma = 0.5 * rho / equations.gamma
+        f1p = rho_2gamma * alpha_p
+        f2p = rho_2gamma * (alpha_p * v1 + a * (lambda2_p - lambda3_p))
+        f3p = rho_2gamma * alpha_p * v2
+        f4p = rho_2gamma * alpha_p * v3
+        f5p = rho_2gamma *
+              (alpha_p * 0.5 * (v1^2 + v2^2 + v3^2) + a * v1 * (lambda2_p - lambda3_p)
+               + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one)
+    elseif orientation == 2
+        lambda1 = v2
+        lambda2 = v2 + a
+        lambda3 = v2 - a
+
+        lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :)
+        lambda2_p = positive_part(lambda2)
+        lambda3_p = positive_part(lambda3)
+
+        alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p
+
+        rho_2gamma = 0.5 * rho / equations.gamma
+        f1p = rho_2gamma * alpha_p
+        f2p = rho_2gamma * alpha_p * v1
+        f3p = rho_2gamma * (alpha_p * v2 + a * (lambda2_p - lambda3_p))
+        f4p = rho_2gamma * alpha_p * v3
+        f5p = rho_2gamma *
+              (alpha_p * 0.5 * (v1^2 + v2^2 + v3^2) + a * v2 * (lambda2_p - lambda3_p)
+               + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one)
+    else # orientation == 3
+        lambda1 = v3
+        lambda2 = v3 + a
+        lambda3 = v3 - a
+
+        lambda1_p = positive_part(lambda1) # Same as (lambda_i + abs(lambda_i)) / 2, but faster :)
+        lambda2_p = positive_part(lambda2)
+        lambda3_p = positive_part(lambda3)
+
+        alpha_p = 2 * (equations.gamma - 1) * lambda1_p + lambda2_p + lambda3_p
+
+        rho_2gamma = 0.5 * rho / equations.gamma
+        f1p = rho_2gamma * alpha_p
+        f2p = rho_2gamma * alpha_p * v1
+        f3p = rho_2gamma * alpha_p * v2
+        f4p = rho_2gamma * (alpha_p * v3 + a * (lambda2_p - lambda3_p))
+        f5p = rho_2gamma *
+              (alpha_p * 0.5 * (v1^2 + v2^2 + v3^2) + a * v3 * (lambda2_p - lambda3_p)
+               + a^2 * (lambda2_p + lambda3_p) * equations.inv_gamma_minus_one)
+    end
+    return SVector(f1p, f2p, f3p, f4p, f5p)
 end
 
 @inline function splitting_steger_warming(u, ::Val{:minus}, orientation::Integer,
                                           equations::CompressibleEulerEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
-  a = sqrt(equations.gamma * p / rho)
-
-  if orientation == 1
-    lambda1 = v1
-    lambda2 = v1 + a
-    lambda3 = v1 - a
-
-    lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :)
-    lambda2_m = negative_part(lambda2)
-    lambda3_m = negative_part(lambda3)
-
-    alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m
-
-    rho_2gamma = 0.5 * rho / equations.gamma
-    f1m = rho_2gamma * alpha_m
-    f2m = rho_2gamma * (alpha_m * v1 + a * (lambda2_m - lambda3_m))
-    f3m = rho_2gamma * alpha_m * v2
-    f4m = rho_2gamma * alpha_m * v3
-    f5m = rho_2gamma * (alpha_m * 0.5 * (v1^2 + v2^2 + v3^2) + a * v1 * (lambda2_m - lambda3_m)
-                        + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one)
-  elseif orientation == 2
-    lambda1 = v2
-    lambda2 = v2 + a
-    lambda3 = v2 - a
-
-    lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :)
-    lambda2_m = negative_part(lambda2)
-    lambda3_m = negative_part(lambda3)
-
-    alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m
-
-    rho_2gamma = 0.5 * rho / equations.gamma
-    f1m = rho_2gamma * alpha_m
-    f2m = rho_2gamma * alpha_m * v1
-    f3m = rho_2gamma * (alpha_m * v2 + a * (lambda2_m - lambda3_m))
-    f4m = rho_2gamma * alpha_m * v3
-    f5m = rho_2gamma * (alpha_m * 0.5 * (v1^2 + v2^2 + v3^2) + a * v2 * (lambda2_m - lambda3_m)
-                        + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one)
-  else # orientation == 3
-    lambda1 = v3
-    lambda2 = v3 + a
-    lambda3 = v3 - a
-
-    lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :)
-    lambda2_m = negative_part(lambda2)
-    lambda3_m = negative_part(lambda3)
-
-    alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m
-
-    rho_2gamma = 0.5 * rho / equations.gamma
-    f1m = rho_2gamma * alpha_m
-    f2m = rho_2gamma * alpha_m * v1
-    f3m = rho_2gamma * alpha_m * v2
-    f4m = rho_2gamma * (alpha_m * v3 + a * (lambda2_m - lambda3_m))
-    f5m = rho_2gamma * (alpha_m * 0.5 * (v1^2 + v2^2 + v3^2) + a * v3 * (lambda2_m - lambda3_m)
-                        + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one)
-  end
-  return SVector(f1m, f2m, f3m, f4m, f5m)
+    rho, rho_v1, rho_v2, rho_v3, rho_e = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    p = (equations.gamma - 1) *
+        (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
+    a = sqrt(equations.gamma * p / rho)
+
+    if orientation == 1
+        lambda1 = v1
+        lambda2 = v1 + a
+        lambda3 = v1 - a
+
+        lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :)
+        lambda2_m = negative_part(lambda2)
+        lambda3_m = negative_part(lambda3)
+
+        alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m
+
+        rho_2gamma = 0.5 * rho / equations.gamma
+        f1m = rho_2gamma * alpha_m
+        f2m = rho_2gamma * (alpha_m * v1 + a * (lambda2_m - lambda3_m))
+        f3m = rho_2gamma * alpha_m * v2
+        f4m = rho_2gamma * alpha_m * v3
+        f5m = rho_2gamma *
+              (alpha_m * 0.5 * (v1^2 + v2^2 + v3^2) + a * v1 * (lambda2_m - lambda3_m)
+               + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one)
+    elseif orientation == 2
+        lambda1 = v2
+        lambda2 = v2 + a
+        lambda3 = v2 - a
+
+        lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :)
+        lambda2_m = negative_part(lambda2)
+        lambda3_m = negative_part(lambda3)
+
+        alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m
+
+        rho_2gamma = 0.5 * rho / equations.gamma
+        f1m = rho_2gamma * alpha_m
+        f2m = rho_2gamma * alpha_m * v1
+        f3m = rho_2gamma * (alpha_m * v2 + a * (lambda2_m - lambda3_m))
+        f4m = rho_2gamma * alpha_m * v3
+        f5m = rho_2gamma *
+              (alpha_m * 0.5 * (v1^2 + v2^2 + v3^2) + a * v2 * (lambda2_m - lambda3_m)
+               + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one)
+    else # orientation == 3
+        lambda1 = v3
+        lambda2 = v3 + a
+        lambda3 = v3 - a
+
+        lambda1_m = negative_part(lambda1) # Same as (lambda_i - abs(lambda_i)) / 2, but faster :)
+        lambda2_m = negative_part(lambda2)
+        lambda3_m = negative_part(lambda3)
+
+        alpha_m = 2 * (equations.gamma - 1) * lambda1_m + lambda2_m + lambda3_m
+
+        rho_2gamma = 0.5 * rho / equations.gamma
+        f1m = rho_2gamma * alpha_m
+        f2m = rho_2gamma * alpha_m * v1
+        f3m = rho_2gamma * alpha_m * v2
+        f4m = rho_2gamma * (alpha_m * v3 + a * (lambda2_m - lambda3_m))
+        f5m = rho_2gamma *
+              (alpha_m * 0.5 * (v1^2 + v2^2 + v3^2) + a * v3 * (lambda2_m - lambda3_m)
+               + a^2 * (lambda2_m + lambda3_m) * equations.inv_gamma_minus_one)
+    end
+    return SVector(f1m, f2m, f3m, f4m, f5m)
 end
 
-
 """
     FluxLMARS(c)(u_ll, u_rr, orientation_or_normal_direction,
                  equations::CompressibleEulerEquations3D)
@@ -906,197 +945,204 @@ References:
   [DOI: 10.1175/MWR-D-12-00129.1](https://doi.org/10.1175/mwr-d-12-00129.1)
 """
 struct FluxLMARS{SpeedOfSound}
-  # Estimate for the speed of sound
-  speed_of_sound::SpeedOfSound
+    # Estimate for the speed of sound
+    speed_of_sound::SpeedOfSound
 end
 
-@inline function (flux_lmars::FluxLMARS)(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D)
-  c = flux_lmars.speed_of_sound
-
-  # Unpack left and right state
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
-
-  if orientation == 1
-    v_ll = v1_ll
-    v_rr = v1_rr
-  elseif orientation == 2
-    v_ll = v2_ll
-    v_rr = v2_rr
-  else # orientation == 3
-    v_ll = v3_ll
-    v_rr = v3_rr
-  end
-
-  rho = 0.5 * (rho_ll + rho_rr)
-  p = 0.5 * (p_ll + p_rr) - 0.5 * c * rho * (v_rr - v_ll)
-  v = 0.5 * (v_ll + v_rr) - 1 / (2 * c * rho) * (p_rr - p_ll)
-
-  if v >= 0
-    f1, f2, f3, f4, f5 = v * u_ll
-  else
-    f1, f2, f3, f4, f5 = v * u_rr
-  end
-
-  if orientation == 1
-    f2 += p
-  elseif orientation == 2
-    f3 += p
-  else # orientation == 3
-    f4 += p
-  end
-  f5 += p * v
-
-  return SVector(f1, f2, f3, f4, f5)
+@inline function (flux_lmars::FluxLMARS)(u_ll, u_rr, orientation::Integer,
+                                         equations::CompressibleEulerEquations3D)
+    c = flux_lmars.speed_of_sound
+
+    # Unpack left and right state
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+
+    if orientation == 1
+        v_ll = v1_ll
+        v_rr = v1_rr
+    elseif orientation == 2
+        v_ll = v2_ll
+        v_rr = v2_rr
+    else # orientation == 3
+        v_ll = v3_ll
+        v_rr = v3_rr
+    end
+
+    rho = 0.5 * (rho_ll + rho_rr)
+    p = 0.5 * (p_ll + p_rr) - 0.5 * c * rho * (v_rr - v_ll)
+    v = 0.5 * (v_ll + v_rr) - 1 / (2 * c * rho) * (p_rr - p_ll)
+
+    if v >= 0
+        f1, f2, f3, f4, f5 = v * u_ll
+    else
+        f1, f2, f3, f4, f5 = v * u_rr
+    end
+
+    if orientation == 1
+        f2 += p
+    elseif orientation == 2
+        f3 += p
+    else # orientation == 3
+        f4 += p
+    end
+    f5 += p * v
+
+    return SVector(f1, f2, f3, f4, f5)
 end
 
-@inline function (flux_lmars::FluxLMARS)(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations3D)
-  c = flux_lmars.speed_of_sound
+@inline function (flux_lmars::FluxLMARS)(u_ll, u_rr, normal_direction::AbstractVector,
+                                         equations::CompressibleEulerEquations3D)
+    c = flux_lmars.speed_of_sound
 
-  # Unpack left and right state
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+    # Unpack left and right state
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
 
-  v_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v3_ll * normal_direction[3]
-  v_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + v3_rr * normal_direction[3]
+    v_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] +
+           v3_ll * normal_direction[3]
+    v_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] +
+           v3_rr * normal_direction[3]
 
-  # Note that this is the same as computing v_ll and v_rr with a normalized normal vector
-  # and then multiplying v by `norm_` again, but this version is slightly faster.
-  norm_ = norm(normal_direction)
+    # Note that this is the same as computing v_ll and v_rr with a normalized normal vector
+    # and then multiplying v by `norm_` again, but this version is slightly faster.
+    norm_ = norm(normal_direction)
 
-  rho = 0.5 * (rho_ll + rho_rr)
-  p = 0.5 * (p_ll + p_rr) - 0.5 * c * rho * (v_rr - v_ll) / norm_
-  v = 0.5 * (v_ll + v_rr) - 1 / (2 * c * rho) * (p_rr - p_ll) * norm_
+    rho = 0.5 * (rho_ll + rho_rr)
+    p = 0.5 * (p_ll + p_rr) - 0.5 * c * rho * (v_rr - v_ll) / norm_
+    v = 0.5 * (v_ll + v_rr) - 1 / (2 * c * rho) * (p_rr - p_ll) * norm_
 
-  if v >= 0
-    f1, f2, f3, f4, f5 = v * u_ll
-  else
-    f1, f2, f3, f4, f5 = v * u_rr
-  end
+    if v >= 0
+        f1, f2, f3, f4, f5 = v * u_ll
+    else
+        f1, f2, f3, f4, f5 = v * u_rr
+    end
 
-  f2 += p * normal_direction[1]
-  f3 += p * normal_direction[2]
-  f4 += p * normal_direction[3]
-  f5 += p * v
+    f2 += p * normal_direction[1]
+    f3 += p * normal_direction[2]
+    f4 += p * normal_direction[3]
+    f5 += p * v
 
-  return SVector(f1, f2, f3, f4, f5)
+    return SVector(f1, f2, f3, f4, f5)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the
 # maximum velocity magnitude plus the maximum speed of sound
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D)
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
-
-  # Get the velocity value in the appropriate direction
-  if orientation == 1
-    v_ll = v1_ll
-    v_rr = v1_rr
-  elseif orientation == 2
-    v_ll = v2_ll
-    v_rr = v2_rr
-  else # orientation == 3
-    v_ll = v3_ll
-    v_rr = v3_rr
-  end
-  # Calculate sound speeds
-  c_ll = sqrt(equations.gamma * p_ll / rho_ll)
-  c_rr = sqrt(equations.gamma * p_rr / rho_rr)
-
-  λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
-end
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerEquations3D)
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+
+    # Get the velocity value in the appropriate direction
+    if orientation == 1
+        v_ll = v1_ll
+        v_rr = v1_rr
+    elseif orientation == 2
+        v_ll = v2_ll
+        v_rr = v2_rr
+    else # orientation == 3
+        v_ll = v3_ll
+        v_rr = v3_rr
+    end
+    # Calculate sound speeds
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
 
-@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::CompressibleEulerEquations3D)
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
-
-  # Calculate normal velocities and sound speed
-  # left
-  v_ll = (  v1_ll * normal_direction[1]
-          + v2_ll * normal_direction[2]
-          + v3_ll * normal_direction[3] )
-  c_ll = sqrt(equations.gamma * p_ll / rho_ll)
-  # right
-  v_rr = (  v1_rr * normal_direction[1]
-          + v2_rr * normal_direction[2]
-          + v3_rr * normal_direction[3] )
-  c_rr = sqrt(equations.gamma * p_rr / rho_rr)
-
-  return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction)
+    λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
 end
 
+@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::CompressibleEulerEquations3D)
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+
+    # Calculate normal velocities and sound speed
+    # left
+    v_ll = (v1_ll * normal_direction[1]
+            + v2_ll * normal_direction[2]
+            + v3_ll * normal_direction[3])
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+    # right
+    v_rr = (v1_rr * normal_direction[1]
+            + v2_rr * normal_direction[2]
+            + v3_rr * normal_direction[3])
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+
+    return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction)
+end
 
 # Calculate minimum and maximum wave speeds for HLL-type fluxes
-@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D)
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
-
-  if orientation == 1 # x-direction
-    λ_min = v1_ll - sqrt(equations.gamma * p_ll / rho_ll)
-    λ_max = v1_rr + sqrt(equations.gamma * p_rr / rho_rr)
-  elseif orientation == 2 # y-direction
-    λ_min = v2_ll - sqrt(equations.gamma * p_ll / rho_ll)
-    λ_max = v2_rr + sqrt(equations.gamma * p_rr / rho_rr)
-  else # z-direction
-    λ_min = v3_ll - sqrt(equations.gamma * p_ll / rho_ll)
-    λ_max = v3_rr + sqrt(equations.gamma * p_rr / rho_rr)
-  end
-
-  return λ_min, λ_max
+@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerEquations3D)
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+
+    if orientation == 1 # x-direction
+        λ_min = v1_ll - sqrt(equations.gamma * p_ll / rho_ll)
+        λ_max = v1_rr + sqrt(equations.gamma * p_rr / rho_rr)
+    elseif orientation == 2 # y-direction
+        λ_min = v2_ll - sqrt(equations.gamma * p_ll / rho_ll)
+        λ_max = v2_rr + sqrt(equations.gamma * p_rr / rho_rr)
+    else # z-direction
+        λ_min = v3_ll - sqrt(equations.gamma * p_ll / rho_ll)
+        λ_max = v3_rr + sqrt(equations.gamma * p_rr / rho_rr)
+    end
+
+    return λ_min, λ_max
 end
 
 @inline function min_max_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
                                      equations::CompressibleEulerEquations3D)
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
 
-  v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v3_ll * normal_direction[3]
-  v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + v3_rr * normal_direction[3]
+    v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] +
+                  v3_ll * normal_direction[3]
+    v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] +
+                  v3_rr * normal_direction[3]
 
-  norm_ = norm(normal_direction)
-  # The v_normals are already scaled by the norm
-  λ_min = v_normal_ll - sqrt(equations.gamma * p_ll / rho_ll) * norm_
-  λ_max = v_normal_rr + sqrt(equations.gamma * p_rr / rho_rr) * norm_
+    norm_ = norm(normal_direction)
+    # The v_normals are already scaled by the norm
+    λ_min = v_normal_ll - sqrt(equations.gamma * p_ll / rho_ll) * norm_
+    λ_max = v_normal_rr + sqrt(equations.gamma * p_rr / rho_rr) * norm_
 
-  return λ_min, λ_max
+    return λ_min, λ_max
 end
 
-
 # Rotate normal vector to x-axis; normal, tangent1 and tangent2 need to be orthonormal
 # Called inside `FluxRotated` in `numerical_fluxes.jl` so the directions
 # has been normalized prior to this rotation of the state vector
-@inline function rotate_to_x(u, normal_vector, tangent1, tangent2, equations::CompressibleEulerEquations3D)
-  # Multiply with [ 1   0        0       0   0;
-  #                 0   ―  normal_vector ―   0;
-  #                 0   ―    tangent1    ―   0;
-  #                 0   ―    tangent2    ―   0;
-  #                 0   0        0       0   1 ]
-  return SVector(u[1],
-                 normal_vector[1] * u[2] + normal_vector[2] * u[3] + normal_vector[3] * u[4],
-                 tangent1[1] * u[2] + tangent1[2] * u[3] + tangent1[3] * u[4],
-                 tangent2[1] * u[2] + tangent2[2] * u[3] + tangent2[3] * u[4],
-                 u[5])
+@inline function rotate_to_x(u, normal_vector, tangent1, tangent2,
+                             equations::CompressibleEulerEquations3D)
+    # Multiply with [ 1   0        0       0   0;
+    #                 0   ―  normal_vector ―   0;
+    #                 0   ―    tangent1    ―   0;
+    #                 0   ―    tangent2    ―   0;
+    #                 0   0        0       0   1 ]
+    return SVector(u[1],
+                   normal_vector[1] * u[2] + normal_vector[2] * u[3] +
+                   normal_vector[3] * u[4],
+                   tangent1[1] * u[2] + tangent1[2] * u[3] + tangent1[3] * u[4],
+                   tangent2[1] * u[2] + tangent2[2] * u[3] + tangent2[3] * u[4],
+                   u[5])
 end
 
-
 # Rotate x-axis to normal vector; normal, tangent1 and tangent2 need to be orthonormal
 # Called inside `FluxRotated` in `numerical_fluxes.jl` so the directions
 # has been normalized prior to this back-rotation of the state vector
-@inline function rotate_from_x(u, normal_vector, tangent1, tangent2, equations::CompressibleEulerEquations3D)
-  # Multiply with [ 1        0          0        0      0;
-  #                 0        |          |        |      0;
-  #                 0  normal_vector tangent1 tangent2  0;
-  #                 0        |          |        |      0;
-  #                 0        0          0        0      1 ]
-  return SVector(u[1],
-                 normal_vector[1] * u[2] + tangent1[1] * u[3] + tangent2[1] * u[4],
-                 normal_vector[2] * u[2] + tangent1[2] * u[3] + tangent2[2] * u[4],
-                 normal_vector[3] * u[2] + tangent1[3] * u[3] + tangent2[3] * u[4],
-                 u[5])
+@inline function rotate_from_x(u, normal_vector, tangent1, tangent2,
+                               equations::CompressibleEulerEquations3D)
+    # Multiply with [ 1        0          0        0      0;
+    #                 0        |          |        |      0;
+    #                 0  normal_vector tangent1 tangent2  0;
+    #                 0        |          |        |      0;
+    #                 0        0          0        0      1 ]
+    return SVector(u[1],
+                   normal_vector[1] * u[2] + tangent1[1] * u[3] + tangent2[1] * u[4],
+                   normal_vector[2] * u[2] + tangent1[2] * u[3] + tangent2[2] * u[4],
+                   normal_vector[3] * u[2] + tangent1[3] * u[3] + tangent2[3] * u[4],
+                   u[5])
 end
 
-
 """
     flux_hllc(u_ll, u_rr, orientation, equations::CompressibleEulerEquations3D)
 
@@ -1104,124 +1150,129 @@ Computes the HLLC flux (HLL with Contact) for compressible Euler equations devel
 [Lecture slides](http://www.prague-sum.com/download/2012/Toro_2-HLLC-RiemannSolver.pdf)
 Signal speeds: [DOI: 10.1137/S1064827593260140](https://doi.org/10.1137/S1064827593260140)
 """
-function flux_hllc(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D)
-  # Calculate primitive variables and speed of sound
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr = u_rr
-
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-  e_ll  = rho_e_ll / rho_ll
-  p_ll = (equations.gamma - 1) * (rho_e_ll - 1/2 * rho_ll * (v1_ll^2 + v2_ll^2 + v3_ll^2))
-  c_ll = sqrt(equations.gamma*p_ll/rho_ll)
-
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
-  v3_rr = rho_v3_rr / rho_rr
-  e_rr  = rho_e_rr / rho_rr
-  p_rr = (equations.gamma - 1) * (rho_e_rr - 1/2 * rho_rr * (v1_rr^2 + v2_rr^2 + v3_rr^2))
-  c_rr = sqrt(equations.gamma*p_rr/rho_rr)
-
-  # Obtain left and right fluxes
-  f_ll = flux(u_ll, orientation, equations)
-  f_rr = flux(u_rr, orientation, equations)
-
-
-  # Compute Roe averages
-  sqrt_rho_ll = sqrt(rho_ll)
-  sqrt_rho_rr = sqrt(rho_rr)
-  sum_sqrt_rho = sqrt_rho_ll + sqrt_rho_rr
-  if orientation == 1 # x-direction
-    vel_L = v1_ll
-    vel_R = v1_rr
-    ekin_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr)^2 + (sqrt_rho_ll * v3_ll + sqrt_rho_rr * v3_rr)^2
-  elseif orientation == 2 # y-direction
-    vel_L = v2_ll
-    vel_R = v2_rr
-    ekin_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr)^2 + (sqrt_rho_ll * v3_ll + sqrt_rho_rr * v3_rr)^2
-  else # z-direction
-    vel_L = v3_ll
-    vel_R = v3_rr
-    ekin_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr)^2 + (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr)^2
-  end
-  vel_roe = (sqrt_rho_ll * vel_L + sqrt_rho_rr * vel_R) / sum_sqrt_rho
-  ekin_roe = 0.5 * (vel_roe^2 + ekin_roe / sum_sqrt_rho^2)
-  H_ll = (rho_e_ll + p_ll) / rho_ll
-  H_rr = (rho_e_rr + p_rr) / rho_rr
-  H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) / sum_sqrt_rho
-  c_roe = sqrt((equations.gamma - 1) * (H_roe - ekin_roe))
-  Ssl = min(vel_L - c_ll, vel_roe - c_roe)
-  Ssr = max(vel_R + c_rr, vel_roe + c_roe)
-  sMu_L = Ssl - vel_L
-  sMu_R = Ssr - vel_R
-
-  if Ssl >= 0.0
-    f1 = f_ll[1]
-    f2 = f_ll[2]
-    f3 = f_ll[3]
-    f4 = f_ll[4]
-    f5 = f_ll[5]
-  elseif Ssr <= 0.0
-    f1 = f_rr[1]
-    f2 = f_rr[2]
-    f3 = f_rr[3]
-    f4 = f_rr[4]
-    f5 = f_rr[5]
-  else
-    SStar = (p_rr - p_ll + rho_ll*vel_L*sMu_L - rho_rr*vel_R*sMu_R) / (rho_ll*sMu_L - rho_rr*sMu_R)
-    if Ssl <= 0.0 <= SStar
-      densStar = rho_ll*sMu_L / (Ssl-SStar)
-      enerStar = e_ll + (SStar - vel_L) * (SStar + p_ll / (rho_ll * sMu_L))
-      UStar1 = densStar
-      UStar5 = densStar*enerStar
-      if orientation == 1 # x-direction
-        UStar2 = densStar*SStar
-        UStar3 = densStar*v2_ll
-        UStar4 = densStar*v3_ll
-      elseif orientation == 2 # y-direction
-        UStar2 = densStar*v1_ll
-        UStar3 = densStar*SStar
-        UStar4 = densStar*v3_ll
-      else # z-direction
-        UStar2 = densStar*v1_ll
-        UStar3 = densStar*v2_ll
-        UStar4 = densStar*SStar
-      end
-      f1 = f_ll[1]+Ssl*(UStar1 - rho_ll)
-      f2 = f_ll[2]+Ssl*(UStar2 - rho_v1_ll)
-      f3 = f_ll[3]+Ssl*(UStar3 - rho_v2_ll)
-      f4 = f_ll[4]+Ssl*(UStar4 - rho_v3_ll)
-      f5 = f_ll[5]+Ssl*(UStar5 - rho_e_ll)
+function flux_hllc(u_ll, u_rr, orientation::Integer,
+                   equations::CompressibleEulerEquations3D)
+    # Calculate primitive variables and speed of sound
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr = u_rr
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    e_ll = rho_e_ll / rho_ll
+    p_ll = (equations.gamma - 1) *
+           (rho_e_ll - 1 / 2 * rho_ll * (v1_ll^2 + v2_ll^2 + v3_ll^2))
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+    e_rr = rho_e_rr / rho_rr
+    p_rr = (equations.gamma - 1) *
+           (rho_e_rr - 1 / 2 * rho_rr * (v1_rr^2 + v2_rr^2 + v3_rr^2))
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+
+    # Obtain left and right fluxes
+    f_ll = flux(u_ll, orientation, equations)
+    f_rr = flux(u_rr, orientation, equations)
+
+    # Compute Roe averages
+    sqrt_rho_ll = sqrt(rho_ll)
+    sqrt_rho_rr = sqrt(rho_rr)
+    sum_sqrt_rho = sqrt_rho_ll + sqrt_rho_rr
+    if orientation == 1 # x-direction
+        vel_L = v1_ll
+        vel_R = v1_rr
+        ekin_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr)^2 +
+                   (sqrt_rho_ll * v3_ll + sqrt_rho_rr * v3_rr)^2
+    elseif orientation == 2 # y-direction
+        vel_L = v2_ll
+        vel_R = v2_rr
+        ekin_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr)^2 +
+                   (sqrt_rho_ll * v3_ll + sqrt_rho_rr * v3_rr)^2
+    else # z-direction
+        vel_L = v3_ll
+        vel_R = v3_rr
+        ekin_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr)^2 +
+                   (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr)^2
+    end
+    vel_roe = (sqrt_rho_ll * vel_L + sqrt_rho_rr * vel_R) / sum_sqrt_rho
+    ekin_roe = 0.5 * (vel_roe^2 + ekin_roe / sum_sqrt_rho^2)
+    H_ll = (rho_e_ll + p_ll) / rho_ll
+    H_rr = (rho_e_rr + p_rr) / rho_rr
+    H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) / sum_sqrt_rho
+    c_roe = sqrt((equations.gamma - 1) * (H_roe - ekin_roe))
+    Ssl = min(vel_L - c_ll, vel_roe - c_roe)
+    Ssr = max(vel_R + c_rr, vel_roe + c_roe)
+    sMu_L = Ssl - vel_L
+    sMu_R = Ssr - vel_R
+
+    if Ssl >= 0.0
+        f1 = f_ll[1]
+        f2 = f_ll[2]
+        f3 = f_ll[3]
+        f4 = f_ll[4]
+        f5 = f_ll[5]
+    elseif Ssr <= 0.0
+        f1 = f_rr[1]
+        f2 = f_rr[2]
+        f3 = f_rr[3]
+        f4 = f_rr[4]
+        f5 = f_rr[5]
     else
-      densStar = rho_rr*sMu_R / (Ssr-SStar)
-      enerStar = e_rr + (SStar - vel_R) * (SStar + p_rr / (rho_rr * sMu_R))
-      UStar1 = densStar
-      UStar5 = densStar*enerStar
-      if orientation == 1 # x-direction
-        UStar2 = densStar*SStar
-        UStar3 = densStar*v2_rr
-        UStar4 = densStar*v3_rr
-      elseif orientation == 2 # y-direction
-        UStar2 = densStar*v1_rr
-        UStar3 = densStar*SStar
-        UStar4 = densStar*v3_rr
-      else # z-direction
-        UStar2 = densStar*v1_rr
-        UStar3 = densStar*v2_rr
-        UStar4 = densStar*SStar
-      end
-      f1 = f_rr[1]+Ssr*(UStar1 - rho_rr)
-      f2 = f_rr[2]+Ssr*(UStar2 - rho_v1_rr)
-      f3 = f_rr[3]+Ssr*(UStar3 - rho_v2_rr)
-      f4 = f_rr[4]+Ssr*(UStar4 - rho_v3_rr)
-      f5 = f_rr[5]+Ssr*(UStar5 - rho_e_rr)
+        SStar = (p_rr - p_ll + rho_ll * vel_L * sMu_L - rho_rr * vel_R * sMu_R) /
+                (rho_ll * sMu_L - rho_rr * sMu_R)
+        if Ssl <= 0.0 <= SStar
+            densStar = rho_ll * sMu_L / (Ssl - SStar)
+            enerStar = e_ll + (SStar - vel_L) * (SStar + p_ll / (rho_ll * sMu_L))
+            UStar1 = densStar
+            UStar5 = densStar * enerStar
+            if orientation == 1 # x-direction
+                UStar2 = densStar * SStar
+                UStar3 = densStar * v2_ll
+                UStar4 = densStar * v3_ll
+            elseif orientation == 2 # y-direction
+                UStar2 = densStar * v1_ll
+                UStar3 = densStar * SStar
+                UStar4 = densStar * v3_ll
+            else # z-direction
+                UStar2 = densStar * v1_ll
+                UStar3 = densStar * v2_ll
+                UStar4 = densStar * SStar
+            end
+            f1 = f_ll[1] + Ssl * (UStar1 - rho_ll)
+            f2 = f_ll[2] + Ssl * (UStar2 - rho_v1_ll)
+            f3 = f_ll[3] + Ssl * (UStar3 - rho_v2_ll)
+            f4 = f_ll[4] + Ssl * (UStar4 - rho_v3_ll)
+            f5 = f_ll[5] + Ssl * (UStar5 - rho_e_ll)
+        else
+            densStar = rho_rr * sMu_R / (Ssr - SStar)
+            enerStar = e_rr + (SStar - vel_R) * (SStar + p_rr / (rho_rr * sMu_R))
+            UStar1 = densStar
+            UStar5 = densStar * enerStar
+            if orientation == 1 # x-direction
+                UStar2 = densStar * SStar
+                UStar3 = densStar * v2_rr
+                UStar4 = densStar * v3_rr
+            elseif orientation == 2 # y-direction
+                UStar2 = densStar * v1_rr
+                UStar3 = densStar * SStar
+                UStar4 = densStar * v3_rr
+            else # z-direction
+                UStar2 = densStar * v1_rr
+                UStar3 = densStar * v2_rr
+                UStar4 = densStar * SStar
+            end
+            f1 = f_rr[1] + Ssr * (UStar1 - rho_rr)
+            f2 = f_rr[2] + Ssr * (UStar2 - rho_v1_rr)
+            f3 = f_rr[3] + Ssr * (UStar3 - rho_v2_rr)
+            f4 = f_rr[4] + Ssr * (UStar4 - rho_v3_rr)
+            f5 = f_rr[5] + Ssr * (UStar5 - rho_e_rr)
+        end
     end
-  end
-  return SVector(f1, f2, f3, f4, f5)
+    return SVector(f1, f2, f3, f4, f5)
 end
 
-
 """
     flux_hlle(u_ll, u_rr, orientation, equations::CompressibleEulerEquations3D)
 
@@ -1237,221 +1288,220 @@ of the numerical flux.
   On Godunov-type methods near low densities.
   [DOI: 10.1016/0021-9991(91)90211-3](https://doi.org/10.1016/0021-9991(91)90211-3)
 """
-function flux_hlle(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerEquations3D)
-  # Calculate primitive variables, enthalpy and speed of sound
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
-
-  # `u_ll[5]` is total energy `rho_e_ll` on the left
-  H_ll = (u_ll[5] + p_ll) / rho_ll
-  c_ll = sqrt(equations.gamma * p_ll / rho_ll)
-
-  # `u_rr[5]` is total energy `rho_e_rr` on the right
-  H_rr = (u_rr[5] + p_rr) / rho_rr
-  c_rr = sqrt(equations.gamma * p_rr / rho_rr)
-
-  # Compute Roe averages
-  sqrt_rho_ll = sqrt(rho_ll)
-  sqrt_rho_rr = sqrt(rho_rr)
-  inv_sum_sqrt_rho = inv(sqrt_rho_ll + sqrt_rho_rr)
-
-  v1_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr) * inv_sum_sqrt_rho
-  v2_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr) * inv_sum_sqrt_rho
-  v3_roe = (sqrt_rho_ll * v3_ll + sqrt_rho_rr * v3_rr) * inv_sum_sqrt_rho
-  v_roe_mag = v1_roe^2 + v2_roe^2 + v3_roe^2
-
-  H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) * inv_sum_sqrt_rho
-  c_roe = sqrt((equations.gamma - 1) * (H_roe - 0.5 * v_roe_mag))
-
-  # Compute convenience constant for positivity preservation, see
-  # https://doi.org/10.1016/0021-9991(91)90211-3
-  beta = sqrt(0.5 * (equations.gamma - 1) / equations.gamma)
-
-  # Estimate the edges of the Riemann fan (with positivity conservation)
-  if orientation == 1 # x-direction
-    SsL = min(v1_roe - c_roe, v1_ll - beta * c_ll, zero(v1_roe))
-    SsR = max(v1_roe + c_roe, v1_rr + beta * c_rr, zero(v1_roe))
-  elseif orientation == 2 # y-direction
-    SsL = min(v2_roe - c_roe, v2_ll - beta * c_ll, zero(v2_roe))
-    SsR = max(v2_roe + c_roe, v2_rr + beta * c_rr, zero(v2_roe))
-  else # z-direction
-    SsL = min(v3_roe - c_roe, v3_ll - beta * c_ll, zero(v3_roe))
-    SsR = max(v3_roe + c_roe, v3_rr + beta * c_rr, zero(v3_roe))
-  end
-
-  if SsL >= 0.0 && SsR > 0.0
-    # Positive supersonic speed
-    f_ll = flux(u_ll, orientation, equations)
-
-    f1 = f_ll[1]
-    f2 = f_ll[2]
-    f3 = f_ll[3]
-    f4 = f_ll[4]
-    f5 = f_ll[5]
-  elseif SsR <= 0.0 && SsL < 0.0
-    # Negative supersonic speed
-    f_rr = flux(u_rr, orientation, equations)
-
-    f1 = f_rr[1]
-    f2 = f_rr[2]
-    f3 = f_rr[3]
-    f4 = f_rr[4]
-    f5 = f_rr[5]
-  else
-    # Subsonic case
-    # Compute left and right fluxes
-    f_ll = flux(u_ll, orientation, equations)
-    f_rr = flux(u_rr, orientation, equations)
+function flux_hlle(u_ll, u_rr, orientation::Integer,
+                   equations::CompressibleEulerEquations3D)
+    # Calculate primitive variables, enthalpy and speed of sound
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+
+    # `u_ll[5]` is total energy `rho_e_ll` on the left
+    H_ll = (u_ll[5] + p_ll) / rho_ll
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+
+    # `u_rr[5]` is total energy `rho_e_rr` on the right
+    H_rr = (u_rr[5] + p_rr) / rho_rr
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+
+    # Compute Roe averages
+    sqrt_rho_ll = sqrt(rho_ll)
+    sqrt_rho_rr = sqrt(rho_rr)
+    inv_sum_sqrt_rho = inv(sqrt_rho_ll + sqrt_rho_rr)
+
+    v1_roe = (sqrt_rho_ll * v1_ll + sqrt_rho_rr * v1_rr) * inv_sum_sqrt_rho
+    v2_roe = (sqrt_rho_ll * v2_ll + sqrt_rho_rr * v2_rr) * inv_sum_sqrt_rho
+    v3_roe = (sqrt_rho_ll * v3_ll + sqrt_rho_rr * v3_rr) * inv_sum_sqrt_rho
+    v_roe_mag = v1_roe^2 + v2_roe^2 + v3_roe^2
+
+    H_roe = (sqrt_rho_ll * H_ll + sqrt_rho_rr * H_rr) * inv_sum_sqrt_rho
+    c_roe = sqrt((equations.gamma - 1) * (H_roe - 0.5 * v_roe_mag))
+
+    # Compute convenience constant for positivity preservation, see
+    # https://doi.org/10.1016/0021-9991(91)90211-3
+    beta = sqrt(0.5 * (equations.gamma - 1) / equations.gamma)
+
+    # Estimate the edges of the Riemann fan (with positivity conservation)
+    if orientation == 1 # x-direction
+        SsL = min(v1_roe - c_roe, v1_ll - beta * c_ll, zero(v1_roe))
+        SsR = max(v1_roe + c_roe, v1_rr + beta * c_rr, zero(v1_roe))
+    elseif orientation == 2 # y-direction
+        SsL = min(v2_roe - c_roe, v2_ll - beta * c_ll, zero(v2_roe))
+        SsR = max(v2_roe + c_roe, v2_rr + beta * c_rr, zero(v2_roe))
+    else # z-direction
+        SsL = min(v3_roe - c_roe, v3_ll - beta * c_ll, zero(v3_roe))
+        SsR = max(v3_roe + c_roe, v3_rr + beta * c_rr, zero(v3_roe))
+    end
 
-    f1 = (SsR * f_ll[1] - SsL * f_rr[1] + SsL * SsR * (u_rr[1] - u_ll[1])) / (SsR - SsL)
-    f2 = (SsR * f_ll[2] - SsL * f_rr[2] + SsL * SsR * (u_rr[2] - u_ll[2])) / (SsR - SsL)
-    f3 = (SsR * f_ll[3] - SsL * f_rr[3] + SsL * SsR * (u_rr[3] - u_ll[3])) / (SsR - SsL)
-    f4 = (SsR * f_ll[4] - SsL * f_rr[4] + SsL * SsR * (u_rr[4] - u_ll[4])) / (SsR - SsL)
-    f5 = (SsR * f_ll[5] - SsL * f_rr[5] + SsL * SsR * (u_rr[5] - u_ll[5])) / (SsR - SsL)
-  end
+    if SsL >= 0.0 && SsR > 0.0
+        # Positive supersonic speed
+        f_ll = flux(u_ll, orientation, equations)
+
+        f1 = f_ll[1]
+        f2 = f_ll[2]
+        f3 = f_ll[3]
+        f4 = f_ll[4]
+        f5 = f_ll[5]
+    elseif SsR <= 0.0 && SsL < 0.0
+        # Negative supersonic speed
+        f_rr = flux(u_rr, orientation, equations)
+
+        f1 = f_rr[1]
+        f2 = f_rr[2]
+        f3 = f_rr[3]
+        f4 = f_rr[4]
+        f5 = f_rr[5]
+    else
+        # Subsonic case
+        # Compute left and right fluxes
+        f_ll = flux(u_ll, orientation, equations)
+        f_rr = flux(u_rr, orientation, equations)
+
+        f1 = (SsR * f_ll[1] - SsL * f_rr[1] + SsL * SsR * (u_rr[1] - u_ll[1])) /
+             (SsR - SsL)
+        f2 = (SsR * f_ll[2] - SsL * f_rr[2] + SsL * SsR * (u_rr[2] - u_ll[2])) /
+             (SsR - SsL)
+        f3 = (SsR * f_ll[3] - SsL * f_rr[3] + SsL * SsR * (u_rr[3] - u_ll[3])) /
+             (SsR - SsL)
+        f4 = (SsR * f_ll[4] - SsL * f_rr[4] + SsL * SsR * (u_rr[4] - u_ll[4])) /
+             (SsR - SsL)
+        f5 = (SsR * f_ll[5] - SsL * f_rr[5] + SsL * SsR * (u_rr[5] - u_ll[5])) /
+             (SsR - SsL)
+    end
 
-  return SVector(f1, f2, f3, f4, f5)
+    return SVector(f1, f2, f3, f4, f5)
 end
 
-
 @inline function max_abs_speeds(u, equations::CompressibleEulerEquations3D)
-  rho, v1, v2, v3, p = cons2prim(u, equations)
-  c = sqrt(equations.gamma * p / rho)
+    rho, v1, v2, v3, p = cons2prim(u, equations)
+    c = sqrt(equations.gamma * p / rho)
 
-  return abs(v1) + c, abs(v2) + c, abs(v3) + c
+    return abs(v1) + c, abs(v2) + c, abs(v3) + c
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::CompressibleEulerEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e = u
+    rho, rho_v1, rho_v2, rho_v3, rho_e = u
 
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    p = (equations.gamma - 1) *
+        (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
 
-  return SVector(rho, v1, v2, v3, p)
+    return SVector(rho, v1, v2, v3, p)
 end
 
 # Convert conservative variables to entropy
 @inline function cons2entropy(u, equations::CompressibleEulerEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e = u
-
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  v_square = v1^2 + v2^2 + v3^2
-  p = (equations.gamma - 1) * (rho_e - 0.5 * rho * v_square)
-  s = log(p) - equations.gamma*log(rho)
-  rho_p = rho / p
-
-  w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square
-  w2 = rho_p * v1
-  w3 = rho_p * v2
-  w4 = rho_p * v3
-  w5 = -rho_p
-
-  return SVector(w1, w2, w3, w4, w5)
+    rho, rho_v1, rho_v2, rho_v3, rho_e = u
+
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    v_square = v1^2 + v2^2 + v3^2
+    p = (equations.gamma - 1) * (rho_e - 0.5 * rho * v_square)
+    s = log(p) - equations.gamma * log(rho)
+    rho_p = rho / p
+
+    w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square
+    w2 = rho_p * v1
+    w3 = rho_p * v2
+    w4 = rho_p * v3
+    w5 = -rho_p
+
+    return SVector(w1, w2, w3, w4, w5)
 end
 
 @inline function entropy2cons(w, equations::CompressibleEulerEquations3D)
-  # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
-  # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
-  @unpack gamma = equations
-
-  # convert to entropy `-rho * s` used by Hughes, France, Mallet (1986)
-  # instead of `-rho * s / (gamma - 1)`
-  V1, V2, V3, V4, V5 = w .* (gamma-1)
-
-  # s = specific entropy, eq. (53)
-  V_square    = V2^2 + V3^2 + V4^2
-  s = gamma - V1 + V_square/(2*V5)
-
-  # eq. (52)
-  rho_iota = ((gamma-1) / (-V5)^gamma)^(equations.inv_gamma_minus_one)*exp(-s * equations.inv_gamma_minus_one)
-
-  # eq. (51)
-  rho     = -rho_iota * V5
-  rho_v1  =  rho_iota * V2
-  rho_v2  =  rho_iota * V3
-  rho_v3  =  rho_iota * V4
-  rho_e   =  rho_iota*(1-V_square/(2*V5))
-  return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e)
+    # See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
+    # [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
+    @unpack gamma = equations
+
+    # convert to entropy `-rho * s` used by Hughes, France, Mallet (1986)
+    # instead of `-rho * s / (gamma - 1)`
+    V1, V2, V3, V4, V5 = w .* (gamma - 1)
+
+    # s = specific entropy, eq. (53)
+    V_square = V2^2 + V3^2 + V4^2
+    s = gamma - V1 + V_square / (2 * V5)
+
+    # eq. (52)
+    rho_iota = ((gamma - 1) / (-V5)^gamma)^(equations.inv_gamma_minus_one) *
+               exp(-s * equations.inv_gamma_minus_one)
+
+    # eq. (51)
+    rho = -rho_iota * V5
+    rho_v1 = rho_iota * V2
+    rho_v2 = rho_iota * V3
+    rho_v3 = rho_iota * V4
+    rho_e = rho_iota * (1 - V_square / (2 * V5))
+    return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::CompressibleEulerEquations3D)
-  rho, v1, v2, v3, p = prim
-  rho_v1 = rho * v1
-  rho_v2 = rho * v2
-  rho_v3 = rho * v3
-  rho_e  = p * equations.inv_gamma_minus_one + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
-  return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e)
+    rho, v1, v2, v3, p = prim
+    rho_v1 = rho * v1
+    rho_v2 = rho * v2
+    rho_v3 = rho * v3
+    rho_e = p * equations.inv_gamma_minus_one +
+            0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
+    return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e)
 end
 
-
 @inline function density(u, equations::CompressibleEulerEquations3D)
-  rho = u[1]
-  return rho
+    rho = u[1]
+    return rho
 end
 
-
 @inline function pressure(u, equations::CompressibleEulerEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e = u
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho)
-  return p
+    rho, rho_v1, rho_v2, rho_v3, rho_e = u
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho)
+    return p
 end
 
-
 @inline function density_pressure(u, equations::CompressibleEulerEquations3D)
- rho, rho_v1, rho_v2, rho_v3, rho_e = u
- rho_times_p = (equations.gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2))
- return rho_times_p
+    rho, rho_v1, rho_v2, rho_v3, rho_e = u
+    rho_times_p = (equations.gamma - 1) *
+                  (rho * rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2))
+    return rho_times_p
 end
 
-
 # Calculate thermodynamic entropy for a conservative state `u`
 @inline function entropy_thermodynamic(u, equations::CompressibleEulerEquations3D)
-  rho, _ = u
-  p = pressure(u, equations)
+    rho, _ = u
+    p = pressure(u, equations)
 
-  # Thermodynamic entropy
-  s = log(p) - equations.gamma * log(rho)
+    # Thermodynamic entropy
+    s = log(p) - equations.gamma * log(rho)
 
-  return s
+    return s
 end
 
-
 # Calculate mathematical entropy for a conservative state `cons`
 @inline function entropy_math(cons, equations::CompressibleEulerEquations3D)
-  S = -entropy_thermodynamic(cons, equations) * cons[1] * equations.inv_gamma_minus_one
-  # Mathematical entropy
+    S = -entropy_thermodynamic(cons, equations) * cons[1] *
+        equations.inv_gamma_minus_one
+    # Mathematical entropy
 
-  return S
+    return S
 end
 
-
 # Default entropy is the mathematical entropy
-@inline entropy(cons, equations::CompressibleEulerEquations3D) = entropy_math(cons, equations)
-
+@inline function entropy(cons, equations::CompressibleEulerEquations3D)
+    entropy_math(cons, equations)
+end
 
 # Calculate total energy for a conservative state `cons`
 @inline energy_total(cons, ::CompressibleEulerEquations3D) = cons[5]
 
-
 # Calculate kinetic energy for a conservative state `cons`
 @inline function energy_kinetic(u, equations::CompressibleEulerEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, _ = u
-  return 0.5 * (rho_v1^2 + rho_v2^2 +rho_v3^2) / rho
+    rho, rho_v1, rho_v2, rho_v3, _ = u
+    return 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
 end
 
-
 # Calculate internal energy for a conservative state `cons`
 @inline function energy_internal(cons, equations::CompressibleEulerEquations3D)
-  return energy_total(cons, equations) - energy_kinetic(cons, equations)
+    return energy_total(cons, equations) - energy_kinetic(cons, equations)
 end
-
-
 end # @muladd
diff --git a/src/equations/compressible_euler_multicomponent_1d.jl b/src/equations/compressible_euler_multicomponent_1d.jl
index c5a3579ab3e..4a50d60471a 100644
--- a/src/equations/compressible_euler_multicomponent_1d.jl
+++ b/src/equations/compressible_euler_multicomponent_1d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     CompressibleEulerMulticomponentEquations1D(; gammas, gas_constants)
@@ -47,59 +47,74 @@ In case of more than one component, the specific heat ratios `gammas` and the ga
 The remaining variables like the specific heats at constant volume 'cv' or the specific heats at
 constant pressure 'cp' are then calculated considering a calorically perfect gas.
 """
-struct CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT<:Real} <: AbstractCompressibleEulerMulticomponentEquations{1, NVARS, NCOMP}
-  gammas                 ::SVector{NCOMP, RealT}
-  gas_constants          ::SVector{NCOMP, RealT}
-  cv                     ::SVector{NCOMP, RealT}
-  cp                     ::SVector{NCOMP, RealT}
-
-  function CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT}(gammas                 ::SVector{NCOMP, RealT},
-                                                                           gas_constants          ::SVector{NCOMP, RealT}) where {NVARS, NCOMP, RealT<:Real}
-
-    NCOMP >= 1 || throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value"))
-
-    cv = gas_constants ./ (gammas .- 1)
-    cp = gas_constants + gas_constants ./ (gammas .- 1)
-
-    new(gammas, gas_constants, cv, cp)
-  end
+struct CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT <: Real} <:
+       AbstractCompressibleEulerMulticomponentEquations{1, NVARS, NCOMP}
+    gammas::SVector{NCOMP, RealT}
+    gas_constants::SVector{NCOMP, RealT}
+    cv::SVector{NCOMP, RealT}
+    cp::SVector{NCOMP, RealT}
+
+    function CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT}(gammas::SVector{
+                                                                                             NCOMP,
+                                                                                             RealT
+                                                                                             },
+                                                                             gas_constants::SVector{
+                                                                                                    NCOMP,
+                                                                                                    RealT
+                                                                                                    }) where {
+                                                                                                              NVARS,
+                                                                                                              NCOMP,
+                                                                                                              RealT <:
+                                                                                                              Real
+                                                                                                              }
+        NCOMP >= 1 ||
+            throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value"))
+
+        cv = gas_constants ./ (gammas .- 1)
+        cp = gas_constants + gas_constants ./ (gammas .- 1)
+
+        new(gammas, gas_constants, cv, cp)
+    end
 end
 
-
 function CompressibleEulerMulticomponentEquations1D(; gammas, gas_constants)
+    _gammas = promote(gammas...)
+    _gas_constants = promote(gas_constants...)
+    RealT = promote_type(eltype(_gammas), eltype(_gas_constants),
+                         typeof(gas_constants[1] / (gammas[1] - 1)))
 
-  _gammas                 = promote(gammas...)
-  _gas_constants          = promote(gas_constants...)
-  RealT                   = promote_type(eltype(_gammas), eltype(_gas_constants), typeof(gas_constants[1] / (gammas[1] - 1)))
+    NVARS = length(_gammas) + 2
+    NCOMP = length(_gammas)
 
-  NVARS = length(_gammas) + 2
-  NCOMP = length(_gammas)
+    __gammas = SVector(map(RealT, _gammas))
+    __gas_constants = SVector(map(RealT, _gas_constants))
 
-  __gammas                = SVector(map(RealT, _gammas))
-  __gas_constants         = SVector(map(RealT, _gas_constants))
-
-  return CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT}(__gammas, __gas_constants)
+    return CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT}(__gammas,
+                                                                           __gas_constants)
 end
 
-
-@inline Base.real(::CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT}) where {NVARS, NCOMP, RealT} = RealT
-
-
-function varnames(::typeof(cons2cons), equations::CompressibleEulerMulticomponentEquations1D)
-
-  cons  = ("rho_v1", "rho_e")
-  rhos  = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
-  return (cons..., rhos...)
+@inline function Base.real(::CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP,
+                                                                        RealT}) where {
+                                                                                       NVARS,
+                                                                                       NCOMP,
+                                                                                       RealT
+                                                                                       }
+    RealT
 end
 
-
-function varnames(::typeof(cons2prim), equations::CompressibleEulerMulticomponentEquations1D)
-
-  prim  = ("v1", "p")
-  rhos  = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
-  return (prim..., rhos...)
+function varnames(::typeof(cons2cons),
+                  equations::CompressibleEulerMulticomponentEquations1D)
+    cons = ("rho_v1", "rho_e")
+    rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
+    return (cons..., rhos...)
 end
 
+function varnames(::typeof(cons2prim),
+                  equations::CompressibleEulerMulticomponentEquations1D)
+    prim = ("v1", "p")
+    rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
+    return (prim..., rhos...)
+end
 
 # Set initial conditions at physical location `x` for time `t`
 
@@ -110,27 +125,32 @@ A smooth initial condition used for convergence tests in combination with
 [`source_terms_convergence_test`](@ref)
 (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
 """
-function initial_condition_convergence_test(x, t, equations::CompressibleEulerMulticomponentEquations1D)
-  c       = 2
-  A       = 0.1
-  L       = 2
-  f       = 1/L
-  omega   = 2 * pi * f
-  ini     = c + A * sin(omega * (x[1] - t))
+function initial_condition_convergence_test(x, t,
+                                            equations::CompressibleEulerMulticomponentEquations1D)
+    c = 2
+    A = 0.1
+    L = 2
+    f = 1 / L
+    omega = 2 * pi * f
+    ini = c + A * sin(omega * (x[1] - t))
 
-  v1      = 1.0
+    v1 = 1.0
 
-  rho     = ini
+    rho = ini
 
-  # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1)
-  prim_rho  = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * rho for i in eachcomponent(equations))
+    # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1)
+    prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) * (1 - 2) /
+                                                                (1 -
+                                                                 2^ncomponents(equations)) *
+                                                                rho
+                                                                for i in eachcomponent(equations))
 
-  prim1 = rho * v1
-  prim2 = rho^2
+    prim1 = rho * v1
+    prim2 = rho^2
 
-  prim_other = SVector{2, real(equations)}(prim1, prim2)
+    prim_other = SVector{2, real(equations)}(prim1, prim2)
 
-  return vcat(prim_other, prim_rho)
+    return vcat(prim_other, prim_rho)
 end
 
 """
@@ -140,32 +160,33 @@ Source terms used for convergence tests in combination with
 [`initial_condition_convergence_test`](@ref)
 (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
 """
-@inline function source_terms_convergence_test(u, x, t, equations::CompressibleEulerMulticomponentEquations1D)
-  # Same settings as in `initial_condition`
-  c       = 2
-  A       = 0.1
-  L       = 2
-  f       = 1/L
-  omega   = 2 * pi * f
+@inline function source_terms_convergence_test(u, x, t,
+                                               equations::CompressibleEulerMulticomponentEquations1D)
+    # Same settings as in `initial_condition`
+    c = 2
+    A = 0.1
+    L = 2
+    f = 1 / L
+    omega = 2 * pi * f
 
-  gamma  = totalgamma(u, equations)
+    gamma = totalgamma(u, equations)
 
-  x1,     = x
-  si, co  = sincos((t - x1)*omega)
-  tmp = (-((4 * si * A - 4c) + 1) * (gamma - 1) * co * A * omega) / 2
+    x1, = x
+    si, co = sincos((t - x1) * omega)
+    tmp = (-((4 * si * A - 4c) + 1) * (gamma - 1) * co * A * omega) / 2
 
-  # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1
-  du_rho  = SVector{ncomponents(equations), real(equations)}(0.0 for i in eachcomponent(equations))
+    # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1
+    du_rho = SVector{ncomponents(equations), real(equations)}(0.0
+                                                              for i in eachcomponent(equations))
 
-  du1 = tmp
-  du2 = tmp
+    du1 = tmp
+    du2 = tmp
 
-  du_other  = SVector{2, real(equations)}(du1, du2)
+    du_other = SVector{2, real(equations)}(du1, du2)
 
-  return vcat(du_other, du_rho)
+    return vcat(du_other, du_rho)
 end
 
-
 """
     initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerMulticomponentEquations1D)
 
@@ -174,44 +195,53 @@ A for multicomponent adapted weak blast wave adapted to multicomponent and taken
   A provably entropy stable subcell shock capturing approach for high order split form DG
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
-function initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerMulticomponentEquations1D)
-  # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
-  inicenter         = SVector(0.0)
-  x_norm            = x[1] - inicenter[1]
-  r                 = abs(x_norm)
-  cos_phi           = x_norm > 0 ? one(x_norm) : -one(x_norm)
-
-  prim_rho          = SVector{ncomponents(equations), real(equations)}(r > 0.5 ? 2^(i-1) * (1-2)/(1-2^ncomponents(equations))*1.0 : 2^(i-1) * (1-2)/(1-2^ncomponents(equations))*1.1691 for i in eachcomponent(equations))
-
-  v1                = r > 0.5 ? 0.0 : 0.1882 * cos_phi
-  p                 = r > 0.5 ? 1.0 : 1.245
-
-  prim_other         = SVector{2, real(equations)}(v1, p)
-
-  return prim2cons(vcat(prim_other, prim_rho), equations)
+function initial_condition_weak_blast_wave(x, t,
+                                           equations::CompressibleEulerMulticomponentEquations1D)
+    # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
+    inicenter = SVector(0.0)
+    x_norm = x[1] - inicenter[1]
+    r = abs(x_norm)
+    cos_phi = x_norm > 0 ? one(x_norm) : -one(x_norm)
+
+    prim_rho = SVector{ncomponents(equations), real(equations)}(r > 0.5 ?
+                                                                2^(i - 1) * (1 - 2) /
+                                                                (1 -
+                                                                 2^ncomponents(equations)) *
+                                                                1.0 :
+                                                                2^(i - 1) * (1 - 2) /
+                                                                (1 -
+                                                                 2^ncomponents(equations)) *
+                                                                1.1691
+                                                                for i in eachcomponent(equations))
+
+    v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi
+    p = r > 0.5 ? 1.0 : 1.245
+
+    prim_other = SVector{2, real(equations)}(v1, p)
+
+    return prim2cons(vcat(prim_other, prim_rho), equations)
 end
 
-
 # Calculate 1D flux for a single point
-@inline function flux(u, orientation::Integer, equations::CompressibleEulerMulticomponentEquations1D)
-  rho_v1, rho_e  = u
+@inline function flux(u, orientation::Integer,
+                      equations::CompressibleEulerMulticomponentEquations1D)
+    rho_v1, rho_e = u
 
-  rho = density(u, equations)
+    rho = density(u, equations)
 
-  v1    = rho_v1/rho
-  gamma = totalgamma(u, equations)
-  p     = (gamma - 1) * (rho_e - 0.5 * rho * v1^2)
+    v1 = rho_v1 / rho
+    gamma = totalgamma(u, equations)
+    p = (gamma - 1) * (rho_e - 0.5 * rho * v1^2)
 
-  f_rho = densities(u, v1, equations)
-  f1  = rho_v1 * v1 + p
-  f2  = (rho_e + p) * v1
+    f_rho = densities(u, v1, equations)
+    f1 = rho_v1 * v1 + p
+    f2 = (rho_e + p) * v1
 
-  f_other  = SVector{2, real(equations)}(f1, f2)
+    f_other = SVector{2, real(equations)}(f1, f2)
 
-  return vcat(f_other, f_rho)
+    return vcat(f_other, f_rho)
 end
 
-
 """
     flux_chandrashekar(u_ll, u_rr, orientation, equations::CompressibleEulerMulticomponentEquations1D)
 
@@ -220,61 +250,66 @@ Entropy conserving two-point flux by
   "Formulation of Entropy-Stable schemes for the multicomponent compressible Euler equations""
   arXiv:1904.00972v3 [math.NA] 4 Feb 2020
 """
-@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerMulticomponentEquations1D)
-  # Unpack left and right state
-  @unpack gammas, gas_constants, cv = equations
-  rho_v1_ll, rho_e_ll = u_ll
-  rho_v1_rr, rho_e_rr = u_rr
-  rhok_mean   = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+2], u_rr[i+2]) for i in eachcomponent(equations))
-  rhok_avg    = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+2] + u_rr[i+2]) for i in eachcomponent(equations))
-
-  # Iterating over all partial densities
-  rho_ll      = density(u_ll, equations)
-  rho_rr      = density(u_rr, equations)
-
-  gamma_ll = totalgamma(u_ll, equations)
-  gamma_rr = totalgamma(u_rr, equations)
-
-  # extract velocities
-  v1_ll       = rho_v1_ll/rho_ll
-  v1_rr       = rho_v1_rr/rho_rr
-  v1_avg      = 0.5 * (v1_ll + v1_rr)
-  v1_square   = 0.5 * (v1_ll^2 + v1_rr^2)
-  v_sum       = v1_avg
-
-  enth      = zero(v_sum)
-  help1_ll  = zero(v1_ll)
-  help1_rr  = zero(v1_rr)
-
-  for i in eachcomponent(equations)
-    enth      += rhok_avg[i] * gas_constants[i]
-    help1_ll  += u_ll[i+2] * cv[i]
-    help1_rr  += u_rr[i+2] * cv[i]
-  end
-
-  T_ll        = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2)) / help1_ll
-  T_rr        = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2)) / help1_rr
-  T           = 0.5 * (1.0/T_ll + 1.0/T_rr)
-  T_log       = ln_mean(1.0/T_ll, 1.0/T_rr)
-
-  # Calculate fluxes depending on orientation
-  help1       = zero(T_ll)
-  help2       = zero(T_rr)
-
-  f_rho       = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations))
-  for i in eachcomponent(equations)
-    help1     += f_rho[i] * cv[i]
-    help2     += f_rho[i]
-  end
-  f1 = (help2) * v1_avg + enth/T
-  f2 = (help1)/T_log - 0.5 * (v1_square) * (help2) + v1_avg * f1
-
-  f_other  = SVector{2, real(equations)}(f1, f2)
-
-  return vcat(f_other, f_rho)
+@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer,
+                                    equations::CompressibleEulerMulticomponentEquations1D)
+    # Unpack left and right state
+    @unpack gammas, gas_constants, cv = equations
+    rho_v1_ll, rho_e_ll = u_ll
+    rho_v1_rr, rho_e_rr = u_rr
+    rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 2],
+                                                                         u_rr[i + 2])
+                                                                 for i in eachcomponent(equations))
+    rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 2] +
+                                                                 u_rr[i + 2])
+                                                                for i in eachcomponent(equations))
+
+    # Iterating over all partial densities
+    rho_ll = density(u_ll, equations)
+    rho_rr = density(u_rr, equations)
+
+    gamma_ll = totalgamma(u_ll, equations)
+    gamma_rr = totalgamma(u_rr, equations)
+
+    # extract velocities
+    v1_ll = rho_v1_ll / rho_ll
+    v1_rr = rho_v1_rr / rho_rr
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v1_square = 0.5 * (v1_ll^2 + v1_rr^2)
+    v_sum = v1_avg
+
+    enth = zero(v_sum)
+    help1_ll = zero(v1_ll)
+    help1_rr = zero(v1_rr)
+
+    for i in eachcomponent(equations)
+        enth += rhok_avg[i] * gas_constants[i]
+        help1_ll += u_ll[i + 2] * cv[i]
+        help1_rr += u_rr[i + 2] * cv[i]
+    end
+
+    T_ll = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2)) / help1_ll
+    T_rr = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2)) / help1_rr
+    T = 0.5 * (1.0 / T_ll + 1.0 / T_rr)
+    T_log = ln_mean(1.0 / T_ll, 1.0 / T_rr)
+
+    # Calculate fluxes depending on orientation
+    help1 = zero(T_ll)
+    help2 = zero(T_rr)
+
+    f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg
+                                                             for i in eachcomponent(equations))
+    for i in eachcomponent(equations)
+        help1 += f_rho[i] * cv[i]
+        help2 += f_rho[i]
+    end
+    f1 = (help2) * v1_avg + enth / T
+    f2 = (help1) / T_log - 0.5 * (v1_square) * (help2) + v1_avg * f1
+
+    f_other = SVector{2, real(equations)}(f1, f2)
+
+    return vcat(f_other, f_rho)
 end
 
-
 """
     flux_ranocha(u_ll, u_rr, orientation_or_normal_direction,
                  equations::CompressibleEulerMulticomponentEquations1D)
@@ -290,170 +325,180 @@ See also
   the Euler Equations Using Summation-by-Parts Operators
   [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42)
 """
-@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerMulticomponentEquations1D)
-  # Unpack left and right state
-  @unpack gammas, gas_constants, cv = equations
-  rho_v1_ll, rho_e_ll = u_ll
-  rho_v1_rr, rho_e_rr = u_rr
-  rhok_mean   = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+2], u_rr[i+2]) for i in eachcomponent(equations))
-  rhok_avg    = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+2] + u_rr[i+2]) for i in eachcomponent(equations))
-
-  # Iterating over all partial densities
-  rho_ll      = density(u_ll, equations)
-  rho_rr      = density(u_rr, equations)
-
-  # Calculating gamma
-  gamma               = totalgamma(0.5*(u_ll+u_rr), equations)
-  inv_gamma_minus_one = 1/(gamma-1)
-
-  # extract velocities
-  v1_ll               = rho_v1_ll / rho_ll
-  v1_rr               = rho_v1_rr / rho_rr
-  v1_avg              = 0.5 * (v1_ll + v1_rr)
-  velocity_square_avg = 0.5 * (v1_ll * v1_rr)
-
-  # density flux
-  f_rho     = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations))
-
-  # helpful variables
-  f_rho_sum = zero(v1_ll)
-  help1_ll  = zero(v1_ll)
-  help1_rr  = zero(v1_rr)
-  enth_ll   = zero(v1_ll)
-  enth_rr   = zero(v1_rr)
-  for i in eachcomponent(equations)
-    enth_ll   += u_ll[i+2] * gas_constants[i]
-    enth_rr   += u_rr[i+2] * gas_constants[i]
-    f_rho_sum += f_rho[i]
-    help1_ll  += u_ll[i+2] * cv[i]
-    help1_rr  += u_rr[i+2] * cv[i]
-  end
-
-  # temperature and pressure
-  T_ll            = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2)) / help1_ll
-  T_rr            = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2)) / help1_rr
-  p_ll            = T_ll * enth_ll
-  p_rr            = T_rr * enth_rr
-  p_avg           = 0.5 * (p_ll + p_rr)
-  inv_rho_p_mean  = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-
-  # momentum and energy flux
-  f1 = f_rho_sum * v1_avg + p_avg
-  f2 = f_rho_sum * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll)
-  f_other  = SVector{2, real(equations)}(f1, f2)
-
-  return vcat(f_other, f_rho)
+@inline function flux_ranocha(u_ll, u_rr, orientation::Integer,
+                              equations::CompressibleEulerMulticomponentEquations1D)
+    # Unpack left and right state
+    @unpack gammas, gas_constants, cv = equations
+    rho_v1_ll, rho_e_ll = u_ll
+    rho_v1_rr, rho_e_rr = u_rr
+    rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 2],
+                                                                         u_rr[i + 2])
+                                                                 for i in eachcomponent(equations))
+    rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 2] +
+                                                                 u_rr[i + 2])
+                                                                for i in eachcomponent(equations))
+
+    # Iterating over all partial densities
+    rho_ll = density(u_ll, equations)
+    rho_rr = density(u_rr, equations)
+
+    # Calculating gamma
+    gamma = totalgamma(0.5 * (u_ll + u_rr), equations)
+    inv_gamma_minus_one = 1 / (gamma - 1)
+
+    # extract velocities
+    v1_ll = rho_v1_ll / rho_ll
+    v1_rr = rho_v1_rr / rho_rr
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr)
+
+    # density flux
+    f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg
+                                                             for i in eachcomponent(equations))
+
+    # helpful variables
+    f_rho_sum = zero(v1_ll)
+    help1_ll = zero(v1_ll)
+    help1_rr = zero(v1_rr)
+    enth_ll = zero(v1_ll)
+    enth_rr = zero(v1_rr)
+    for i in eachcomponent(equations)
+        enth_ll += u_ll[i + 2] * gas_constants[i]
+        enth_rr += u_rr[i + 2] * gas_constants[i]
+        f_rho_sum += f_rho[i]
+        help1_ll += u_ll[i + 2] * cv[i]
+        help1_rr += u_rr[i + 2] * cv[i]
+    end
+
+    # temperature and pressure
+    T_ll = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2)) / help1_ll
+    T_rr = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2)) / help1_rr
+    p_ll = T_ll * enth_ll
+    p_rr = T_rr * enth_rr
+    p_avg = 0.5 * (p_ll + p_rr)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+
+    # momentum and energy flux
+    f1 = f_rho_sum * v1_avg + p_avg
+    f2 = f_rho_sum * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) +
+         0.5 * (p_ll * v1_rr + p_rr * v1_ll)
+    f_other = SVector{2, real(equations)}(f1, f2)
+
+    return vcat(f_other, f_rho)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerMulticomponentEquations1D)
-  rho_v1_ll, rho_e_ll = u_ll
-  rho_v1_rr, rho_e_rr = u_rr
-
-  # Calculate primitive variables and speed of sound
-  rho_ll   = density(u_ll, equations)
-  rho_rr   = density(u_rr, equations)
-  gamma_ll = totalgamma(u_ll, equations)
-  gamma_rr = totalgamma(u_rr, equations)
-
-  v_ll = rho_v1_ll / rho_ll
-  v_rr = rho_v1_rr / rho_rr
-
-  p_ll = (gamma_ll - 1) * (rho_e_ll - 1/2 * rho_ll * v_ll^2)
-  p_rr = (gamma_rr - 1) * (rho_e_rr - 1/2 * rho_rr * v_rr^2)
-  c_ll = sqrt(gamma_ll * p_ll / rho_ll)
-  c_rr = sqrt(gamma_rr * p_rr / rho_rr)
-
-  λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerMulticomponentEquations1D)
+    rho_v1_ll, rho_e_ll = u_ll
+    rho_v1_rr, rho_e_rr = u_rr
+
+    # Calculate primitive variables and speed of sound
+    rho_ll = density(u_ll, equations)
+    rho_rr = density(u_rr, equations)
+    gamma_ll = totalgamma(u_ll, equations)
+    gamma_rr = totalgamma(u_rr, equations)
+
+    v_ll = rho_v1_ll / rho_ll
+    v_rr = rho_v1_rr / rho_rr
+
+    p_ll = (gamma_ll - 1) * (rho_e_ll - 1 / 2 * rho_ll * v_ll^2)
+    p_rr = (gamma_rr - 1) * (rho_e_rr - 1 / 2 * rho_rr * v_rr^2)
+    c_ll = sqrt(gamma_ll * p_ll / rho_ll)
+    c_rr = sqrt(gamma_rr * p_rr / rho_rr)
+
+    λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
 end
 
+@inline function max_abs_speeds(u,
+                                equations::CompressibleEulerMulticomponentEquations1D)
+    rho_v1, rho_e = u
 
-@inline function max_abs_speeds(u, equations::CompressibleEulerMulticomponentEquations1D)
-  rho_v1, rho_e = u
+    rho = density(u, equations)
+    v1 = rho_v1 / rho
 
-  rho   = density(u, equations)
-  v1    = rho_v1 / rho
+    gamma = totalgamma(u, equations)
+    p = (gamma - 1) * (rho_e - 1 / 2 * rho * (v1^2))
+    c = sqrt(gamma * p / rho)
 
-  gamma = totalgamma(u, equations)
-  p     = (gamma - 1) * (rho_e - 1/2 * rho * (v1^2))
-  c     = sqrt(gamma * p / rho)
-
-  return (abs(v1) + c, )
+    return (abs(v1) + c,)
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::CompressibleEulerMulticomponentEquations1D)
-  rho_v1, rho_e = u
+    rho_v1, rho_e = u
 
-  prim_rho = SVector{ncomponents(equations), real(equations)}(u[i+2] for i in eachcomponent(equations))
+    prim_rho = SVector{ncomponents(equations), real(equations)}(u[i + 2]
+                                                                for i in eachcomponent(equations))
 
-  rho   = density(u, equations)
-  v1    = rho_v1 / rho
-  gamma = totalgamma(u, equations)
+    rho = density(u, equations)
+    v1 = rho_v1 / rho
+    gamma = totalgamma(u, equations)
 
-  p     = (gamma - 1) * (rho_e - 0.5 * rho * (v1^2))
-  prim_other =  SVector{2, real(equations)}(v1, p)
+    p = (gamma - 1) * (rho_e - 0.5 * rho * (v1^2))
+    prim_other = SVector{2, real(equations)}(v1, p)
 
-  return vcat(prim_other, prim_rho)
+    return vcat(prim_other, prim_rho)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::CompressibleEulerMulticomponentEquations1D)
-  @unpack cv, gammas = equations
-  v1, p = prim
+    @unpack cv, gammas = equations
+    v1, p = prim
 
-  RealT = eltype(prim)
+    RealT = eltype(prim)
 
-  cons_rho = SVector{ncomponents(equations), RealT}(prim[i+2] for i in eachcomponent(equations))
-  rho     = density(prim, equations)
-  gamma   = totalgamma(prim, equations)
+    cons_rho = SVector{ncomponents(equations), RealT}(prim[i + 2]
+                                                      for i in eachcomponent(equations))
+    rho = density(prim, equations)
+    gamma = totalgamma(prim, equations)
 
-  rho_v1  = rho * v1
+    rho_v1 = rho * v1
 
-  rho_e   = p/(gamma-1) + 0.5 * (rho_v1 * v1)
+    rho_e = p / (gamma - 1) + 0.5 * (rho_v1 * v1)
 
-  cons_other = SVector{2, RealT}(rho_v1, rho_e)
+    cons_other = SVector{2, RealT}(rho_v1, rho_e)
 
-  return vcat(cons_other, cons_rho)
+    return vcat(cons_other, cons_rho)
 end
 
-
 # Convert conservative variables to entropy
 @inline function cons2entropy(u, equations::CompressibleEulerMulticomponentEquations1D)
-  @unpack cv, gammas, gas_constants = equations
-  rho_v1, rho_e = u
-
-  rho       = density(u, equations)
-
-  help1         = zero(rho)
-  gas_constant  = zero(rho)
-  for i in eachcomponent(equations)
-    help1         += u[i+2] * cv[i]
-    gas_constant  += gas_constants[i] * (u[i+2]/rho)
-  end
-
-  v1        = rho_v1 / rho
-  v_square  = v1^2
-  gamma     = totalgamma(u, equations)
-
-  p         = (gamma - 1) * (rho_e - 0.5 * rho * v_square)
-  s         = log(p) - gamma * log(rho) - log(gas_constant)
-  rho_p     = rho / p
-  T         = (rho_e - 0.5 * rho * v_square) / (help1)
-  entrop_rho  = SVector{ncomponents(equations), real(equations)}( gas_constant * ((gamma - s)/(gamma - 1.0) - (0.5 * v_square * rho_p)) for i in eachcomponent(equations))
-
-  w1        = gas_constant * v1 * rho_p
-  w2        = gas_constant * (-1.0 * rho_p)
-
-  entrop_other = SVector{2, real(equations)}(w1, w2)
-
-  return vcat(entrop_other, entrop_rho)
+    @unpack cv, gammas, gas_constants = equations
+    rho_v1, rho_e = u
+
+    rho = density(u, equations)
+
+    help1 = zero(rho)
+    gas_constant = zero(rho)
+    for i in eachcomponent(equations)
+        help1 += u[i + 2] * cv[i]
+        gas_constant += gas_constants[i] * (u[i + 2] / rho)
+    end
+
+    v1 = rho_v1 / rho
+    v_square = v1^2
+    gamma = totalgamma(u, equations)
+
+    p = (gamma - 1) * (rho_e - 0.5 * rho * v_square)
+    s = log(p) - gamma * log(rho) - log(gas_constant)
+    rho_p = rho / p
+    T = (rho_e - 0.5 * rho * v_square) / (help1)
+    entrop_rho = SVector{ncomponents(equations), real(equations)}(gas_constant *
+                                                                  ((gamma - s) /
+                                                                   (gamma - 1.0) -
+                                                                   (0.5 * v_square *
+                                                                    rho_p))
+                                                                  for i in eachcomponent(equations))
+
+    w1 = gas_constant * v1 * rho_p
+    w2 = gas_constant * (-1.0 * rho_p)
+
+    entrop_other = SVector{2, real(equations)}(w1, w2)
+
+    return vcat(entrop_other, entrop_rho)
 end
 
-
 """
     totalgamma(u, equations::CompressibleEulerMulticomponentEquations1D)
 
@@ -461,47 +506,42 @@ Function that calculates the total gamma out of all partial gammas using the
 partial density fractions as well as the partial specific heats at constant volume.
 """
 @inline function totalgamma(u, equations::CompressibleEulerMulticomponentEquations1D)
-  @unpack cv, gammas = equations
+    @unpack cv, gammas = equations
 
-  help1 = zero(u[1])
-  help2 = zero(u[1])
+    help1 = zero(u[1])
+    help2 = zero(u[1])
 
-  for i in eachcomponent(equations)
-    help1 += u[i+2] * cv[i] * gammas[i]
-    help2 += u[i+2] * cv[i]
-  end
+    for i in eachcomponent(equations)
+        help1 += u[i + 2] * cv[i] * gammas[i]
+        help2 += u[i + 2] * cv[i]
+    end
 
-  return help1/help2
+    return help1 / help2
 end
 
-
 @inline function pressure(u, equations::CompressibleEulerMulticomponentEquations1D)
-  rho_v1, rho_e = u
+    rho_v1, rho_e = u
 
-  rho          = density(u, equations)
-  gamma        = totalgamma(u, equations)
+    rho = density(u, equations)
+    gamma = totalgamma(u, equations)
 
-  p  = (gamma - 1) * (rho_e - 0.5 * (rho_v1^2)/rho)
+    p = (gamma - 1) * (rho_e - 0.5 * (rho_v1^2) / rho)
 
-  return p
+    return p
 end
 
-
 @inline function density(u, equations::CompressibleEulerMulticomponentEquations1D)
-  rho = zero(u[1])
-
-  for i in eachcomponent(equations)
-    rho += u[i+2]
-  end
+    rho = zero(u[1])
 
-  return rho
- end
-
-
- @inline function densities(u, v, equations::CompressibleEulerMulticomponentEquations1D)
-
-  return SVector{ncomponents(equations), real(equations)}(u[i+2]*v for i in eachcomponent(equations))
- end
+    for i in eachcomponent(equations)
+        rho += u[i + 2]
+    end
 
+    return rho
+end
 
+@inline function densities(u, v, equations::CompressibleEulerMulticomponentEquations1D)
+    return SVector{ncomponents(equations), real(equations)}(u[i + 2] * v
+                                                            for i in eachcomponent(equations))
+end
 end # @muladd
diff --git a/src/equations/compressible_euler_multicomponent_2d.jl b/src/equations/compressible_euler_multicomponent_2d.jl
index bb91cfbcb4e..5a015777cb1 100644
--- a/src/equations/compressible_euler_multicomponent_2d.jl
+++ b/src/equations/compressible_euler_multicomponent_2d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     CompressibleEulerMulticomponentEquations2D(; gammas, gas_constants)
@@ -51,59 +51,74 @@ In case of more than one component, the specific heat ratios `gammas` and the ga
 The remaining variables like the specific heats at constant volume 'cv' or the specific heats at
 constant pressure 'cp' are then calculated considering a calorically perfect gas.
 """
-struct CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT<:Real} <: AbstractCompressibleEulerMulticomponentEquations{2, NVARS, NCOMP}
-  gammas            ::SVector{NCOMP, RealT}
-  gas_constants     ::SVector{NCOMP, RealT}
-  cv                ::SVector{NCOMP, RealT}
-  cp                ::SVector{NCOMP, RealT}
-
-  function CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT}(gammas       ::SVector{NCOMP, RealT},
-                                                                           gas_constants::SVector{NCOMP, RealT}) where {NVARS, NCOMP, RealT<:Real}
-
-    NCOMP >= 1 || throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value"))
-
-    cv = gas_constants ./ (gammas .- 1)
-    cp = gas_constants + gas_constants ./ (gammas .- 1)
-
-    new(gammas, gas_constants,cv, cp)
-  end
+struct CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT <: Real} <:
+       AbstractCompressibleEulerMulticomponentEquations{2, NVARS, NCOMP}
+    gammas::SVector{NCOMP, RealT}
+    gas_constants::SVector{NCOMP, RealT}
+    cv::SVector{NCOMP, RealT}
+    cp::SVector{NCOMP, RealT}
+
+    function CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT}(gammas::SVector{
+                                                                                             NCOMP,
+                                                                                             RealT
+                                                                                             },
+                                                                             gas_constants::SVector{
+                                                                                                    NCOMP,
+                                                                                                    RealT
+                                                                                                    }) where {
+                                                                                                              NVARS,
+                                                                                                              NCOMP,
+                                                                                                              RealT <:
+                                                                                                              Real
+                                                                                                              }
+        NCOMP >= 1 ||
+            throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value"))
+
+        cv = gas_constants ./ (gammas .- 1)
+        cp = gas_constants + gas_constants ./ (gammas .- 1)
+
+        new(gammas, gas_constants, cv, cp)
+    end
 end
 
-
 function CompressibleEulerMulticomponentEquations2D(; gammas, gas_constants)
+    _gammas = promote(gammas...)
+    _gas_constants = promote(gas_constants...)
+    RealT = promote_type(eltype(_gammas), eltype(_gas_constants),
+                         typeof(gas_constants[1] / (gammas[1] - 1)))
 
-  _gammas                 = promote(gammas...)
-  _gas_constants          = promote(gas_constants...)
-  RealT                   = promote_type(eltype(_gammas), eltype(_gas_constants), typeof(gas_constants[1] / (gammas[1] - 1)))
-
-  NVARS = length(_gammas) + 3
-  NCOMP = length(_gammas)
+    NVARS = length(_gammas) + 3
+    NCOMP = length(_gammas)
 
-  __gammas        = SVector(map(RealT, _gammas))
-  __gas_constants = SVector(map(RealT, _gas_constants))
+    __gammas = SVector(map(RealT, _gammas))
+    __gas_constants = SVector(map(RealT, _gas_constants))
 
-  return CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT}(__gammas, __gas_constants)
+    return CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT}(__gammas,
+                                                                           __gas_constants)
 end
 
-
-@inline Base.real(::CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT}) where {NVARS, NCOMP, RealT} = RealT
-
-
-function varnames(::typeof(cons2cons), equations::CompressibleEulerMulticomponentEquations2D)
-
-  cons  = ("rho_v1", "rho_v2", "rho_e")
-  rhos  = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
-  return (cons..., rhos...)
+@inline function Base.real(::CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP,
+                                                                        RealT}) where {
+                                                                                       NVARS,
+                                                                                       NCOMP,
+                                                                                       RealT
+                                                                                       }
+    RealT
 end
 
-
-function varnames(::typeof(cons2prim), equations::CompressibleEulerMulticomponentEquations2D)
-
-  prim  = ("v1", "v2", "p")
-  rhos  = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
-  return (prim..., rhos...)
+function varnames(::typeof(cons2cons),
+                  equations::CompressibleEulerMulticomponentEquations2D)
+    cons = ("rho_v1", "rho_v2", "rho_e")
+    rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
+    return (cons..., rhos...)
 end
 
+function varnames(::typeof(cons2prim),
+                  equations::CompressibleEulerMulticomponentEquations2D)
+    prim = ("v1", "v2", "p")
+    rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
+    return (prim..., rhos...)
+end
 
 # Set initial conditions at physical location `x` for time `t`
 
@@ -114,29 +129,34 @@ A smooth initial condition used for convergence tests in combination with
 [`source_terms_convergence_test`](@ref)
 (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
 """
-function initial_condition_convergence_test(x, t, equations::CompressibleEulerMulticomponentEquations2D)
-  c       = 2
-  A       = 0.1
-  L       = 2
-  f       = 1/L
-  omega   = 2 * pi * f
-  ini     = c + A * sin(omega * (x[1] + x[2] - t))
-
-  v1      = 1.0
-  v2      = 1.0
-
-  rho     = ini
-
-  # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1)
-  prim_rho  = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * rho for i in eachcomponent(equations))
-
-  prim1 = rho * v1
-  prim2 = rho * v2
-  prim3 = rho^2
-
-  prim_other = SVector{3, real(equations)}(prim1, prim2, prim3)
-
-  return vcat(prim_other, prim_rho)
+function initial_condition_convergence_test(x, t,
+                                            equations::CompressibleEulerMulticomponentEquations2D)
+    c = 2
+    A = 0.1
+    L = 2
+    f = 1 / L
+    omega = 2 * pi * f
+    ini = c + A * sin(omega * (x[1] + x[2] - t))
+
+    v1 = 1.0
+    v2 = 1.0
+
+    rho = ini
+
+    # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1)
+    prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) * (1 - 2) /
+                                                                (1 -
+                                                                 2^ncomponents(equations)) *
+                                                                rho
+                                                                for i in eachcomponent(equations))
+
+    prim1 = rho * v1
+    prim2 = rho * v2
+    prim3 = rho^2
+
+    prim_other = SVector{3, real(equations)}(prim1, prim2, prim3)
+
+    return vcat(prim_other, prim_rho)
 end
 
 """
@@ -146,38 +166,42 @@ Source terms used for convergence tests in combination with
 [`initial_condition_convergence_test`](@ref)
 (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
 """
-@inline function source_terms_convergence_test(u, x, t, equations::CompressibleEulerMulticomponentEquations2D)
-  # Same settings as in `initial_condition`
-  c       = 2
-  A       = 0.1
-  L       = 2
-  f       = 1/L
-  omega   = 2 * pi * f
-
-  gamma  = totalgamma(u, equations)
-
-  x1, x2  = x
-  si, co  = sincos((x1 + x2 - t)*omega)
-  tmp1    = co * A * omega
-  tmp2    = si * A
-  tmp3    = gamma - 1
-  tmp4    = (2*c - 1)*tmp3
-  tmp5    = (2*tmp2*gamma - 2*tmp2 + tmp4 + 1)*tmp1
-  tmp6    = tmp2 + c
-
-  # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1
-  du_rho  = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * tmp1 for i in eachcomponent(equations))
-
-  du1 = tmp5
-  du2 = tmp5
-  du3 = 2*((tmp6 - 1.0)*tmp3 + tmp6*gamma)*tmp1
-
-  du_other  = SVector{3, real(equations)}(du1, du2, du3)
-
-  return vcat(du_other, du_rho)
+@inline function source_terms_convergence_test(u, x, t,
+                                               equations::CompressibleEulerMulticomponentEquations2D)
+    # Same settings as in `initial_condition`
+    c = 2
+    A = 0.1
+    L = 2
+    f = 1 / L
+    omega = 2 * pi * f
+
+    gamma = totalgamma(u, equations)
+
+    x1, x2 = x
+    si, co = sincos((x1 + x2 - t) * omega)
+    tmp1 = co * A * omega
+    tmp2 = si * A
+    tmp3 = gamma - 1
+    tmp4 = (2 * c - 1) * tmp3
+    tmp5 = (2 * tmp2 * gamma - 2 * tmp2 + tmp4 + 1) * tmp1
+    tmp6 = tmp2 + c
+
+    # Here we compute an arbitrary number of different rhos. (one rho is double the next rho while the sum of all rhos is 1
+    du_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) * (1 - 2) /
+                                                              (1 -
+                                                               2^ncomponents(equations)) *
+                                                              tmp1
+                                                              for i in eachcomponent(equations))
+
+    du1 = tmp5
+    du2 = tmp5
+    du3 = 2 * ((tmp6 - 1.0) * tmp3 + tmp6 * gamma) * tmp1
+
+    du_other = SVector{3, real(equations)}(du1, du2, du3)
+
+    return vcat(du_other, du_rho)
 end
 
-
 """
     initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerMulticomponentEquations2D)
 
@@ -186,56 +210,65 @@ A for multicomponent adapted weak blast wave taken from
   A provably entropy stable subcell shock capturing approach for high order split form DG
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
-function initial_condition_weak_blast_wave(x, t, equations::CompressibleEulerMulticomponentEquations2D)
-  # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
-  # Set up polar coordinates
-  inicenter         = SVector(0.0, 0.0)
-  x_norm            = x[1] - inicenter[1]
-  y_norm            = x[2] - inicenter[2]
-  r                 = sqrt(x_norm^2 + y_norm^2)
-  phi               = atan(y_norm, x_norm)
-  sin_phi, cos_phi  = sincos(phi)
-
-  prim_rho          = SVector{ncomponents(equations), real(equations)}(r > 0.5 ? 2^(i-1) * (1-2)/(1-2^ncomponents(equations))*1.0 : 2^(i-1) * (1-2)/(1-2^ncomponents(equations))*1.1691 for i in eachcomponent(equations))
-
-  v1                = r > 0.5 ? 0.0 : 0.1882 * cos_phi
-  v2                = r > 0.5 ? 0.0 : 0.1882 * sin_phi
-  p                 = r > 0.5 ? 1.0 : 1.245
-
-  prim_other         = SVector{3, real(equations)}(v1, v2, p)
-
-  return prim2cons(vcat(prim_other, prim_rho),equations)
+function initial_condition_weak_blast_wave(x, t,
+                                           equations::CompressibleEulerMulticomponentEquations2D)
+    # From Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
+    # Set up polar coordinates
+    inicenter = SVector(0.0, 0.0)
+    x_norm = x[1] - inicenter[1]
+    y_norm = x[2] - inicenter[2]
+    r = sqrt(x_norm^2 + y_norm^2)
+    phi = atan(y_norm, x_norm)
+    sin_phi, cos_phi = sincos(phi)
+
+    prim_rho = SVector{ncomponents(equations), real(equations)}(r > 0.5 ?
+                                                                2^(i - 1) * (1 - 2) /
+                                                                (1 -
+                                                                 2^ncomponents(equations)) *
+                                                                1.0 :
+                                                                2^(i - 1) * (1 - 2) /
+                                                                (1 -
+                                                                 2^ncomponents(equations)) *
+                                                                1.1691
+                                                                for i in eachcomponent(equations))
+
+    v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi
+    v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi
+    p = r > 0.5 ? 1.0 : 1.245
+
+    prim_other = SVector{3, real(equations)}(v1, v2, p)
+
+    return prim2cons(vcat(prim_other, prim_rho), equations)
 end
 
-
 # Calculate 1D flux for a single point
-@inline function flux(u, orientation::Integer, equations::CompressibleEulerMulticomponentEquations2D)
-  rho_v1, rho_v2, rho_e  = u
-
-  rho = density(u, equations)
-
-  v1    = rho_v1/rho
-  v2    = rho_v2/rho
-  gamma = totalgamma(u, equations)
-  p     = (gamma - 1) * (rho_e - 0.5 * rho * (v1^2 + v2^2))
-
-  if orientation == 1
-    f_rho = densities(u, v1, equations)
-    f1  = rho_v1 * v1 + p
-    f2  = rho_v2 * v1
-    f3  = (rho_e + p) * v1
-  else
-    f_rho = densities(u, v2, equations)
-    f1  = rho_v1 * v2
-    f2  = rho_v2 * v2 + p
-    f3  = (rho_e + p) * v2
-  end
-
-  f_other  = SVector{3, real(equations)}(f1, f2, f3)
-
-  return vcat(f_other, f_rho)
-end
+@inline function flux(u, orientation::Integer,
+                      equations::CompressibleEulerMulticomponentEquations2D)
+    rho_v1, rho_v2, rho_e = u
+
+    rho = density(u, equations)
+
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    gamma = totalgamma(u, equations)
+    p = (gamma - 1) * (rho_e - 0.5 * rho * (v1^2 + v2^2))
+
+    if orientation == 1
+        f_rho = densities(u, v1, equations)
+        f1 = rho_v1 * v1 + p
+        f2 = rho_v2 * v1
+        f3 = (rho_e + p) * v1
+    else
+        f_rho = densities(u, v2, equations)
+        f1 = rho_v1 * v2
+        f2 = rho_v2 * v2 + p
+        f3 = (rho_e + p) * v2
+    end
+
+    f_other = SVector{3, real(equations)}(f1, f2, f3)
 
+    return vcat(f_other, f_rho)
+end
 
 """
     flux_chandrashekar(u_ll, u_rr, orientation, equations::CompressibleEulerMulticomponentEquations2D)
@@ -245,72 +278,80 @@ Adaption of the entropy conserving two-point flux by
   "Formulation of Entropy-Stable schemes for the multicomponent compressible Euler equations""
   arXiv:1904.00972v3 [math.NA] 4 Feb 2020
 """
-@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerMulticomponentEquations2D)
-  # Unpack left and right state
-  @unpack gammas, gas_constants, cv = equations
-  rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll
-  rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr
-  rhok_mean   = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+3], u_rr[i+3]) for i in eachcomponent(equations))
-  rhok_avg    = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+3] + u_rr[i+3]) for i in eachcomponent(equations))
-
-  # Iterating over all partial densities
-  rho_ll      = density(u_ll, equations)
-  rho_rr      = density(u_rr, equations)
-
-  # extract velocities
-  v1_ll       = rho_v1_ll/rho_ll
-  v2_ll       = rho_v2_ll/rho_ll
-  v1_rr       = rho_v1_rr/rho_rr
-  v2_rr       = rho_v2_rr/rho_rr
-  v1_avg      = 0.5 * (v1_ll + v1_rr)
-  v2_avg      = 0.5 * (v2_ll + v2_rr)
-  v1_square   = 0.5 * (v1_ll^2 + v1_rr^2)
-  v2_square   = 0.5 * (v2_ll^2 + v2_rr^2)
-  v_sum       = v1_avg + v2_avg
-
-  enth      = zero(v_sum)
-  help1_ll  = zero(v1_ll)
-  help1_rr  = zero(v1_rr)
-
-  for i in eachcomponent(equations)
-    enth      += rhok_avg[i] * gas_constants[i]
-    help1_ll  += u_ll[i+3] * cv[i]
-    help1_rr  += u_rr[i+3] * cv[i]
-  end
-
-  T_ll        = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2 + v2_ll^2)) / help1_ll
-  T_rr        = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2 + v2_rr^2)) / help1_rr
-  T           = 0.5 * (1.0/T_ll + 1.0/T_rr)
-  T_log       = ln_mean(1.0/T_ll, 1.0/T_rr)
-
-  # Calculate fluxes depending on orientation
-  help1       = zero(T_ll)
-  help2       = zero(T_rr)
-  if orientation == 1
-    f_rho       = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations))
+@inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer,
+                                    equations::CompressibleEulerMulticomponentEquations2D)
+    # Unpack left and right state
+    @unpack gammas, gas_constants, cv = equations
+    rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll
+    rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr
+    rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 3],
+                                                                         u_rr[i + 3])
+                                                                 for i in eachcomponent(equations))
+    rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 3] +
+                                                                 u_rr[i + 3])
+                                                                for i in eachcomponent(equations))
+
+    # Iterating over all partial densities
+    rho_ll = density(u_ll, equations)
+    rho_rr = density(u_rr, equations)
+
+    # extract velocities
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v1_square = 0.5 * (v1_ll^2 + v1_rr^2)
+    v2_square = 0.5 * (v2_ll^2 + v2_rr^2)
+    v_sum = v1_avg + v2_avg
+
+    enth = zero(v_sum)
+    help1_ll = zero(v1_ll)
+    help1_rr = zero(v1_rr)
+
     for i in eachcomponent(equations)
-      help1     += f_rho[i] * cv[i]
-      help2     += f_rho[i]
+        enth += rhok_avg[i] * gas_constants[i]
+        help1_ll += u_ll[i + 3] * cv[i]
+        help1_rr += u_rr[i + 3] * cv[i]
     end
-    f1 = (help2) * v1_avg + enth/T
-    f2 = (help2) * v2_avg
-    f3 = (help1)/T_log - 0.5 * (v1_square + v2_square) * (help2) + v1_avg * f1 + v2_avg * f2
-  else
-    f_rho       = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v2_avg for i in eachcomponent(equations))
-    for i in eachcomponent(equations)
-      help1     += f_rho[i] * cv[i]
-      help2     += f_rho[i]
+
+    T_ll = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2 + v2_ll^2)) / help1_ll
+    T_rr = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2 + v2_rr^2)) / help1_rr
+    T = 0.5 * (1.0 / T_ll + 1.0 / T_rr)
+    T_log = ln_mean(1.0 / T_ll, 1.0 / T_rr)
+
+    # Calculate fluxes depending on orientation
+    help1 = zero(T_ll)
+    help2 = zero(T_rr)
+    if orientation == 1
+        f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg
+                                                                 for i in eachcomponent(equations))
+        for i in eachcomponent(equations)
+            help1 += f_rho[i] * cv[i]
+            help2 += f_rho[i]
+        end
+        f1 = (help2) * v1_avg + enth / T
+        f2 = (help2) * v2_avg
+        f3 = (help1) / T_log - 0.5 * (v1_square + v2_square) * (help2) + v1_avg * f1 +
+             v2_avg * f2
+    else
+        f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v2_avg
+                                                                 for i in eachcomponent(equations))
+        for i in eachcomponent(equations)
+            help1 += f_rho[i] * cv[i]
+            help2 += f_rho[i]
+        end
+        f1 = (help2) * v1_avg
+        f2 = (help2) * v2_avg + enth / T
+        f3 = (help1) / T_log - 0.5 * (v1_square + v2_square) * (help2) + v1_avg * f1 +
+             v2_avg * f2
     end
-    f1 = (help2) * v1_avg
-    f2 = (help2) * v2_avg + enth/T
-    f3 = (help1)/T_log - 0.5 * (v1_square + v2_square) * (help2) + v1_avg * f1 + v2_avg * f2
-  end
-  f_other  = SVector{3, real(equations)}(f1, f2, f3)
+    f_other = SVector{3, real(equations)}(f1, f2, f3)
 
-  return vcat(f_other, f_rho)
+    return vcat(f_other, f_rho)
 end
 
-
 """
     flux_ranocha(u_ll, u_rr, orientation_or_normal_direction,
                  equations::CompressibleEulerMulticomponentEquations2D)
@@ -326,194 +367,206 @@ See also
   the Euler Equations Using Summation-by-Parts Operators
   [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42)
 """
-@inline function flux_ranocha(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerMulticomponentEquations2D)
-  # Unpack left and right state
-  @unpack gammas, gas_constants, cv = equations
-  rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll
-  rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr
-  rhok_mean   = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+3], u_rr[i+3]) for i in eachcomponent(equations))
-  rhok_avg    = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+3] + u_rr[i+3]) for i in eachcomponent(equations))
-
-  # Iterating over all partial densities
-  rho_ll      = density(u_ll, equations)
-  rho_rr      = density(u_rr, equations)
-
-  # Calculating gamma
-  gamma               = totalgamma(0.5*(u_ll+u_rr), equations)
-  inv_gamma_minus_one = 1/(gamma-1)
-
-  # extract velocities
-  v1_ll               = rho_v1_ll / rho_ll
-  v1_rr               = rho_v1_rr / rho_rr
-  v1_avg              = 0.5 * (v1_ll + v1_rr)
-  v2_ll               = rho_v2_ll / rho_ll
-  v2_rr               = rho_v2_rr / rho_rr
-  v2_avg              = 0.5 * (v2_ll + v2_rr)
-  velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
-
-  # helpful variables
-  help1_ll  = zero(v1_ll)
-  help1_rr  = zero(v1_rr)
-  enth_ll   = zero(v1_ll)
-  enth_rr   = zero(v1_rr)
-  for i in eachcomponent(equations)
-    enth_ll   += u_ll[i+3] * gas_constants[i]
-    enth_rr   += u_rr[i+3] * gas_constants[i]
-    help1_ll  += u_ll[i+3] * cv[i]
-    help1_rr  += u_rr[i+3] * cv[i]
-  end
-
-  # temperature and pressure
-  T_ll            = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2 + v2_ll^2)) / help1_ll
-  T_rr            = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2 + v2_rr^2)) / help1_rr
-  p_ll            = T_ll * enth_ll
-  p_rr            = T_rr * enth_rr
-  p_avg           = 0.5 * (p_ll + p_rr)
-  inv_rho_p_mean  = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-
-  f_rho_sum = zero(T_rr)
-  if orientation == 1
-    f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations))
+@inline function flux_ranocha(u_ll, u_rr, orientation::Integer,
+                              equations::CompressibleEulerMulticomponentEquations2D)
+    # Unpack left and right state
+    @unpack gammas, gas_constants, cv = equations
+    rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll
+    rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr
+    rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 3],
+                                                                         u_rr[i + 3])
+                                                                 for i in eachcomponent(equations))
+    rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 3] +
+                                                                 u_rr[i + 3])
+                                                                for i in eachcomponent(equations))
+
+    # Iterating over all partial densities
+    rho_ll = density(u_ll, equations)
+    rho_rr = density(u_rr, equations)
+
+    # Calculating gamma
+    gamma = totalgamma(0.5 * (u_ll + u_rr), equations)
+    inv_gamma_minus_one = 1 / (gamma - 1)
+
+    # extract velocities
+    v1_ll = rho_v1_ll / rho_ll
+    v1_rr = rho_v1_rr / rho_rr
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_ll = rho_v2_ll / rho_ll
+    v2_rr = rho_v2_rr / rho_rr
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
+
+    # helpful variables
+    help1_ll = zero(v1_ll)
+    help1_rr = zero(v1_rr)
+    enth_ll = zero(v1_ll)
+    enth_rr = zero(v1_rr)
     for i in eachcomponent(equations)
-      f_rho_sum += f_rho[i]
+        enth_ll += u_ll[i + 3] * gas_constants[i]
+        enth_rr += u_rr[i + 3] * gas_constants[i]
+        help1_ll += u_ll[i + 3] * cv[i]
+        help1_rr += u_rr[i + 3] * cv[i]
     end
-    f1 = f_rho_sum * v1_avg + p_avg
-    f2 = f_rho_sum * v2_avg
-    f3 = f_rho_sum * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll)
-  else
-    f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v2_avg for i in eachcomponent(equations))
-    for i in eachcomponent(equations)
-      f_rho_sum += f_rho[i]
+
+    # temperature and pressure
+    T_ll = (rho_e_ll - 0.5 * rho_ll * (v1_ll^2 + v2_ll^2)) / help1_ll
+    T_rr = (rho_e_rr - 0.5 * rho_rr * (v1_rr^2 + v2_rr^2)) / help1_rr
+    p_ll = T_ll * enth_ll
+    p_rr = T_rr * enth_rr
+    p_avg = 0.5 * (p_ll + p_rr)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+
+    f_rho_sum = zero(T_rr)
+    if orientation == 1
+        f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg
+                                                                 for i in eachcomponent(equations))
+        for i in eachcomponent(equations)
+            f_rho_sum += f_rho[i]
+        end
+        f1 = f_rho_sum * v1_avg + p_avg
+        f2 = f_rho_sum * v2_avg
+        f3 = f_rho_sum * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) +
+             0.5 * (p_ll * v1_rr + p_rr * v1_ll)
+    else
+        f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v2_avg
+                                                                 for i in eachcomponent(equations))
+        for i in eachcomponent(equations)
+            f_rho_sum += f_rho[i]
+        end
+        f1 = f_rho_sum * v1_avg
+        f2 = f_rho_sum * v2_avg + p_avg
+        f3 = f_rho_sum * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) +
+             0.5 * (p_ll * v2_rr + p_rr * v2_ll)
     end
-    f1 = f_rho_sum * v1_avg
-    f2 = f_rho_sum * v2_avg + p_avg
-    f3 = f_rho_sum * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one) + 0.5 * (p_ll*v2_rr + p_rr*v2_ll)
-  end
 
-  # momentum and energy flux
-  f_other  = SVector{3, real(equations)}(f1, f2, f3)
+    # momentum and energy flux
+    f_other = SVector{3, real(equations)}(f1, f2, f3)
 
-  return vcat(f_other, f_rho)
+    return vcat(f_other, f_rho)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::CompressibleEulerMulticomponentEquations2D)
-  rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll
-  rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr
-
-  # Get the density and gas gamma
-  rho_ll   = density(u_ll, equations)
-  rho_rr   = density(u_rr, equations)
-  gamma_ll = totalgamma(u_ll, equations)
-  gamma_rr = totalgamma(u_rr, equations)
-
-  # Get the velocities based on direction
-  if orientation == 1
-    v_ll = rho_v1_ll / rho_ll
-    v_rr = rho_v1_rr / rho_rr
-  else # orientation == 2
-    v_ll = rho_v2_ll / rho_ll
-    v_rr = rho_v2_rr / rho_rr
-  end
-
-  # Compute the sound speeds on the left and right
-  p_ll = (gamma_ll - 1) * (rho_e_ll - 1/2 * (rho_v1_ll^2 + rho_v2_ll^2) / rho_ll)
-  c_ll = sqrt(gamma_ll * p_ll / rho_ll)
-  p_rr = (gamma_rr - 1) * (rho_e_rr - 1/2 * (rho_v1_rr^2 + rho_v2_rr^2) / rho_rr)
-  c_rr = sqrt(gamma_rr * p_rr / rho_rr)
-
-  λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
-end
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerMulticomponentEquations2D)
+    rho_v1_ll, rho_v2_ll, rho_e_ll = u_ll
+    rho_v1_rr, rho_v2_rr, rho_e_rr = u_rr
+
+    # Get the density and gas gamma
+    rho_ll = density(u_ll, equations)
+    rho_rr = density(u_rr, equations)
+    gamma_ll = totalgamma(u_ll, equations)
+    gamma_rr = totalgamma(u_rr, equations)
+
+    # Get the velocities based on direction
+    if orientation == 1
+        v_ll = rho_v1_ll / rho_ll
+        v_rr = rho_v1_rr / rho_rr
+    else # orientation == 2
+        v_ll = rho_v2_ll / rho_ll
+        v_rr = rho_v2_rr / rho_rr
+    end
 
+    # Compute the sound speeds on the left and right
+    p_ll = (gamma_ll - 1) * (rho_e_ll - 1 / 2 * (rho_v1_ll^2 + rho_v2_ll^2) / rho_ll)
+    c_ll = sqrt(gamma_ll * p_ll / rho_ll)
+    p_rr = (gamma_rr - 1) * (rho_e_rr - 1 / 2 * (rho_v1_rr^2 + rho_v2_rr^2) / rho_rr)
+    c_rr = sqrt(gamma_rr * p_rr / rho_rr)
 
-@inline function max_abs_speeds(u, equations::CompressibleEulerMulticomponentEquations2D)
-  rho_v1, rho_v2, rho_e = u
+    λ_max = max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
+end
 
-  rho   = density(u, equations)
-  v1    = rho_v1 / rho
-  v2    = rho_v2 / rho
+@inline function max_abs_speeds(u,
+                                equations::CompressibleEulerMulticomponentEquations2D)
+    rho_v1, rho_v2, rho_e = u
 
-  gamma = totalgamma(u, equations)
-  p     = (gamma - 1) * (rho_e - 1/2 * rho * (v1^2 + v2^2))
-  c     = sqrt(gamma * p / rho)
+    rho = density(u, equations)
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
 
-  return (abs(v1) + c, abs(v2) + c, )
-end
+    gamma = totalgamma(u, equations)
+    p = (gamma - 1) * (rho_e - 1 / 2 * rho * (v1^2 + v2^2))
+    c = sqrt(gamma * p / rho)
 
+    return (abs(v1) + c, abs(v2) + c)
+end
 
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::CompressibleEulerMulticomponentEquations2D)
-  rho_v1, rho_v2, rho_e = u
+    rho_v1, rho_v2, rho_e = u
 
-  prim_rho = SVector{ncomponents(equations), real(equations)}(u[i+3] for i in eachcomponent(equations))
+    prim_rho = SVector{ncomponents(equations), real(equations)}(u[i + 3]
+                                                                for i in eachcomponent(equations))
 
-  rho   = density(u, equations)
-  v1    = rho_v1 / rho
-  v2    = rho_v2 / rho
-  gamma = totalgamma(u, equations)
-  p     = (gamma - 1) * (rho_e - 0.5 * rho * (v1^2 + v2^2))
-  prim_other =  SVector{3, real(equations)}(v1, v2, p)
+    rho = density(u, equations)
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    gamma = totalgamma(u, equations)
+    p = (gamma - 1) * (rho_e - 0.5 * rho * (v1^2 + v2^2))
+    prim_other = SVector{3, real(equations)}(v1, v2, p)
 
-  return vcat(prim_other, prim_rho)
+    return vcat(prim_other, prim_rho)
 end
 
-
 # Convert conservative variables to entropy
 @inline function cons2entropy(u, equations::CompressibleEulerMulticomponentEquations2D)
-  @unpack cv, gammas, gas_constants = equations
-  rho_v1, rho_v2, rho_e = u
-
-  rho       = density(u, equations)
-
-  # Multicomponent stuff
-  help1         = zero(rho)
-  gas_constant  = zero(rho)
-  for i in eachcomponent(equations)
-    help1         += u[i+3] * cv[i]
-    gas_constant  += gas_constants[i] * (u[i+3]/rho)
-  end
-
-  v1        = rho_v1 / rho
-  v2        = rho_v2 / rho
-  v_square  = v1^2 + v2^2
-  gamma     = totalgamma(u, equations)
-
-  p         = (gamma - 1) * (rho_e - 0.5 * rho * v_square)
-  s         = log(p) - gamma * log(rho) - log(gas_constant)
-  rho_p     = rho / p
-  T         = (rho_e - 0.5 * rho * v_square) / (help1)
-  entrop_rho  = SVector{ncomponents(equations), real(equations)}( gas_constant * ((gamma - s)/(gamma - 1.0) - (0.5 * v_square * rho_p)) for i in eachcomponent(equations))
+    @unpack cv, gammas, gas_constants = equations
+    rho_v1, rho_v2, rho_e = u
 
-  w1        = gas_constant * v1 * rho_p
-  w2        = gas_constant * v2 * rho_p
-  w3        = gas_constant * rho_p * (-1)
+    rho = density(u, equations)
 
-  entrop_other = SVector{3, real(equations)}(w1, w2, w3)
+    # Multicomponent stuff
+    help1 = zero(rho)
+    gas_constant = zero(rho)
+    for i in eachcomponent(equations)
+        help1 += u[i + 3] * cv[i]
+        gas_constant += gas_constants[i] * (u[i + 3] / rho)
+    end
 
-  return vcat(entrop_other, entrop_rho)
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v_square = v1^2 + v2^2
+    gamma = totalgamma(u, equations)
+
+    p = (gamma - 1) * (rho_e - 0.5 * rho * v_square)
+    s = log(p) - gamma * log(rho) - log(gas_constant)
+    rho_p = rho / p
+    T = (rho_e - 0.5 * rho * v_square) / (help1)
+    entrop_rho = SVector{ncomponents(equations), real(equations)}(gas_constant *
+                                                                  ((gamma - s) /
+                                                                   (gamma - 1.0) -
+                                                                   (0.5 * v_square *
+                                                                    rho_p))
+                                                                  for i in eachcomponent(equations))
+
+    w1 = gas_constant * v1 * rho_p
+    w2 = gas_constant * v2 * rho_p
+    w3 = gas_constant * rho_p * (-1)
+
+    entrop_other = SVector{3, real(equations)}(w1, w2, w3)
+
+    return vcat(entrop_other, entrop_rho)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::CompressibleEulerMulticomponentEquations2D)
-  @unpack cv, gammas = equations
-  v1, v2, p = prim
+    @unpack cv, gammas = equations
+    v1, v2, p = prim
 
-  cons_rho = SVector{ncomponents(equations), real(equations)}(prim[i+3] for i in eachcomponent(equations))
-  rho     = density(prim, equations)
-  gamma   = totalgamma(prim, equations)
+    cons_rho = SVector{ncomponents(equations), real(equations)}(prim[i + 3]
+                                                                for i in eachcomponent(equations))
+    rho = density(prim, equations)
+    gamma = totalgamma(prim, equations)
 
-  rho_v1  = rho * v1
-  rho_v2  = rho * v2
-  rho_e   = p/(gamma-1) + 0.5 * (rho_v1 * v1 + rho_v2 * v2)
+    rho_v1 = rho * v1
+    rho_v2 = rho * v2
+    rho_e = p / (gamma - 1) + 0.5 * (rho_v1 * v1 + rho_v2 * v2)
 
-  cons_other = SVector{3, real(equations)}(rho_v1, rho_v2, rho_e)
+    cons_other = SVector{3, real(equations)}(rho_v1, rho_v2, rho_e)
 
-  return vcat(cons_other, cons_rho)
+    return vcat(cons_other, cons_rho)
 end
 
-
 """
     totalgamma(u, equations::CompressibleEulerMulticomponentEquations2D)
 
@@ -521,45 +574,42 @@ Function that calculates the total gamma out of all partial gammas using the
 partial density fractions as well as the partial specific heats at constant volume.
 """
 @inline function totalgamma(u, equations::CompressibleEulerMulticomponentEquations2D)
-  @unpack cv, gammas = equations
+    @unpack cv, gammas = equations
 
-  help1 = zero(u[1])
-  help2 = zero(u[1])
+    help1 = zero(u[1])
+    help2 = zero(u[1])
 
-  for i in eachcomponent(equations)
-    help1 += u[i+3] * cv[i] * gammas[i]
-    help2 += u[i+3] * cv[i]
-  end
+    for i in eachcomponent(equations)
+        help1 += u[i + 3] * cv[i] * gammas[i]
+        help2 += u[i + 3] * cv[i]
+    end
 
-  return help1/help2
+    return help1 / help2
 end
 
+@inline function density_pressure(u,
+                                  equations::CompressibleEulerMulticomponentEquations2D)
+    rho_v1, rho_v2, rho_e = u
 
-@inline function density_pressure(u, equations::CompressibleEulerMulticomponentEquations2D)
-  rho_v1, rho_v2, rho_e = u
+    rho = density(u, equations)
+    gamma = totalgamma(u, equations)
+    rho_times_p = (gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2 + rho_v2^2))
 
-  rho          = density(u, equations)
-  gamma        = totalgamma(u, equations)
-  rho_times_p  = (gamma - 1) * (rho * rho_e - 0.5 * (rho_v1^2 + rho_v2^2))
-
-  return rho_times_p
+    return rho_times_p
 end
 
-
 @inline function density(u, equations::CompressibleEulerMulticomponentEquations2D)
-  rho = zero(u[1])
+    rho = zero(u[1])
 
-  for i in eachcomponent(equations)
-    rho += u[i+3]
-  end
-
-  return rho
- end
-
- @inline function densities(u, v, equations::CompressibleEulerMulticomponentEquations2D)
-
-  return SVector{ncomponents(equations), real(equations)}(u[i+3]*v for i in eachcomponent(equations))
- end
+    for i in eachcomponent(equations)
+        rho += u[i + 3]
+    end
 
+    return rho
+end
 
+@inline function densities(u, v, equations::CompressibleEulerMulticomponentEquations2D)
+    return SVector{ncomponents(equations), real(equations)}(u[i + 3] * v
+                                                            for i in eachcomponent(equations))
+end
 end # @muladd
diff --git a/src/equations/compressible_navier_stokes_2d.jl b/src/equations/compressible_navier_stokes_2d.jl
index 895fd2f2ae3..33badba15d9 100644
--- a/src/equations/compressible_navier_stokes_2d.jl
+++ b/src/equations/compressible_navier_stokes_2d.jl
@@ -76,19 +76,21 @@ w_2 = \frac{\rho v_1}{p},\, w_3 = \frac{\rho v_2}{p},\, w_4 = -\frac{\rho}{p}
 #!!! warning "Experimental code"
 #    This code is experimental and may be changed or removed in any future release.
 """
-struct CompressibleNavierStokesDiffusion2D{GradientVariables, RealT <: Real, E <: AbstractCompressibleEulerEquations{2}} <: AbstractCompressibleNavierStokesDiffusion{2, 4}
-  # TODO: parabolic
-  # 1) For now save gamma and inv(gamma-1) again, but could potentially reuse them from the Euler equations
-  # 2) Add NGRADS as a type parameter here and in AbstractEquationsParabolic, add `ngradients(...)` accessor function
-  gamma::RealT               # ratio of specific heats
-  inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
-
-  mu::RealT                  # viscosity
-  Pr::RealT                  # Prandtl number
-  kappa::RealT               # thermal diffusivity for Fick's law
-
-  equations_hyperbolic::E    # CompressibleEulerEquations2D
-  gradient_variables::GradientVariables # GradientVariablesPrimitive or GradientVariablesEntropy
+struct CompressibleNavierStokesDiffusion2D{GradientVariables, RealT <: Real,
+                                           E <: AbstractCompressibleEulerEquations{2}} <:
+       AbstractCompressibleNavierStokesDiffusion{2, 4}
+    # TODO: parabolic
+    # 1) For now save gamma and inv(gamma-1) again, but could potentially reuse them from the Euler equations
+    # 2) Add NGRADS as a type parameter here and in AbstractEquationsParabolic, add `ngradients(...)` accessor function
+    gamma::RealT               # ratio of specific heats
+    inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
+
+    mu::RealT                  # viscosity
+    Pr::RealT                  # Prandtl number
+    kappa::RealT               # thermal diffusivity for Fick's law
+
+    equations_hyperbolic::E    # CompressibleEulerEquations2D
+    gradient_variables::GradientVariables # GradientVariablesPrimitive or GradientVariablesEntropy
 end
 
 """
@@ -113,18 +115,19 @@ struct GradientVariablesEntropy end
 function CompressibleNavierStokesDiffusion2D(equations::CompressibleEulerEquations2D;
                                              mu, Prandtl,
                                              gradient_variables = GradientVariablesPrimitive())
-  gamma = equations.gamma
-  inv_gamma_minus_one = equations.inv_gamma_minus_one
-  μ, Pr = promote(mu, Prandtl)
-
-  # Under the assumption of constant Prandtl number the thermal conductivity
-  # constant is kappa = gamma μ / ((gamma-1) Pr).
-  # Important note! Factor of μ is accounted for later in `flux`.
-  kappa = gamma * inv_gamma_minus_one / Pr
-
-  CompressibleNavierStokesDiffusion2D{typeof(gradient_variables), typeof(gamma), typeof(equations)}(gamma, inv_gamma_minus_one,
-                                                                                                    μ, Pr, kappa,
-                                                                                                    equations, gradient_variables)
+    gamma = equations.gamma
+    inv_gamma_minus_one = equations.inv_gamma_minus_one
+    μ, Pr = promote(mu, Prandtl)
+
+    # Under the assumption of constant Prandtl number the thermal conductivity
+    # constant is kappa = gamma μ / ((gamma-1) Pr).
+    # Important note! Factor of μ is accounted for later in `flux`.
+    kappa = gamma * inv_gamma_minus_one / Pr
+
+    CompressibleNavierStokesDiffusion2D{typeof(gradient_variables), typeof(gamma),
+                                        typeof(equations)}(gamma, inv_gamma_minus_one,
+                                                           μ, Pr, kappa,
+                                                           equations, gradient_variables)
 end
 
 # TODO: parabolic
@@ -132,148 +135,169 @@ end
 # varnames(::typeof(cons2prim)   , ::CompressibleNavierStokesDiffusion2D) = ("v1", "v2", "T")
 # varnames(::typeof(cons2entropy), ::CompressibleNavierStokesDiffusion2D) = ("w2", "w3", "w4")
 
-varnames(variable_mapping, equations_parabolic::CompressibleNavierStokesDiffusion2D) =
-  varnames(variable_mapping, equations_parabolic.equations_hyperbolic)
+function varnames(variable_mapping,
+                  equations_parabolic::CompressibleNavierStokesDiffusion2D)
+    varnames(variable_mapping, equations_parabolic.equations_hyperbolic)
+end
 
 # we specialize this function to compute gradients of primitive variables instead of
 # conservative variables.
-gradient_variable_transformation(::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive}) = cons2prim
-gradient_variable_transformation(::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy}) = cons2entropy
-
+function gradient_variable_transformation(::CompressibleNavierStokesDiffusion2D{
+                                                                                GradientVariablesPrimitive
+                                                                                })
+    cons2prim
+end
+function gradient_variable_transformation(::CompressibleNavierStokesDiffusion2D{
+                                                                                GradientVariablesEntropy
+                                                                                })
+    cons2entropy
+end
 
 # Explicit formulas for the diffusive Navier-Stokes fluxes are available, e.g., in Section 2
 # of the paper by Rueda-Ramírez, Hennemann, Hindenlang, Winters, and Gassner
 # "An Entropy Stable Nodal Discontinuous Galerkin Method for the resistive
 #  MHD Equations. Part II: Subcell Finite Volume Shock Capturing"
 # where one sets the magnetic field components equal to 0.
-function flux(u, gradients, orientation::Integer, equations::CompressibleNavierStokesDiffusion2D)
-  # Here, `u` is assumed to be the "transformed" variables specified by `gradient_variable_transformation`.
-  rho, v1, v2, _ = convert_transformed_to_primitive(u, equations)
-  # Here `gradients` is assumed to contain the gradients of the primitive variables (rho, v1, v2, T)
-  # either computed directly or reverse engineered from the gradient of the entropy variables
-  # by way of the `convert_gradient_variables` function.
-  _, dv1dx, dv2dx, dTdx = convert_derivative_to_primitive(u, gradients[1], equations)
-  _, dv1dy, dv2dy, dTdy = convert_derivative_to_primitive(u, gradients[2], equations)
-
-  # Components of viscous stress tensor
-
-  # (4/3 * (v1)_x - 2/3 * (v2)_y)
-  tau_11 = 4.0 / 3.0 * dv1dx - 2.0 / 3.0 * dv2dy
-  # ((v1)_y + (v2)_x)
-  # stress tensor is symmetric
-  tau_12 = dv1dy + dv2dx # = tau_21
-  # (4/3 * (v2)_y - 2/3 * (v1)_x)
-  tau_22 = 4.0 / 3.0 * dv2dy - 2.0 / 3.0 * dv1dx
-
-  # Fick's law q = -kappa * grad(T) = -kappa * grad(p / (R rho))
-  # with thermal diffusivity constant kappa = gamma μ R / ((gamma-1) Pr)
-  # Note, the gas constant cancels under this formulation, so it is not present
-  # in the implementation
-  q1 = equations.kappa * dTdx
-  q2 = equations.kappa * dTdy
-
-  # Constant dynamic viscosity is copied to a variable for readability.
-  # Offers flexibility for dynamic viscosity via Sutherland's law where it depends
-  # on temperature and reference values, Ts and Tref such that mu(T)
-  mu = equations.mu
-
-  if orientation == 1
-    # viscous flux components in the x-direction
-    f1 = zero(rho)
-    f2 = tau_11 * mu
-    f3 = tau_12 * mu
-    f4 = ( v1 * tau_11 + v2 * tau_12 + q1 ) * mu
-
-    return SVector(f1, f2, f3, f4)
-  else # if orientation == 2
-    # viscous flux components in the y-direction
-    # Note, symmetry is exploited for tau_12 = tau_21
-    g1 = zero(rho)
-    g2 = tau_12 * mu # tau_21 * mu
-    g3 = tau_22 * mu
-    g4 = ( v1 * tau_12 + v2 * tau_22 + q2 ) * mu
-
-    return SVector(g1, g2, g3, g4)
-  end
+function flux(u, gradients, orientation::Integer,
+              equations::CompressibleNavierStokesDiffusion2D)
+    # Here, `u` is assumed to be the "transformed" variables specified by `gradient_variable_transformation`.
+    rho, v1, v2, _ = convert_transformed_to_primitive(u, equations)
+    # Here `gradients` is assumed to contain the gradients of the primitive variables (rho, v1, v2, T)
+    # either computed directly or reverse engineered from the gradient of the entropy variables
+    # by way of the `convert_gradient_variables` function.
+    _, dv1dx, dv2dx, dTdx = convert_derivative_to_primitive(u, gradients[1], equations)
+    _, dv1dy, dv2dy, dTdy = convert_derivative_to_primitive(u, gradients[2], equations)
+
+    # Components of viscous stress tensor
+
+    # (4/3 * (v1)_x - 2/3 * (v2)_y)
+    tau_11 = 4.0 / 3.0 * dv1dx - 2.0 / 3.0 * dv2dy
+    # ((v1)_y + (v2)_x)
+    # stress tensor is symmetric
+    tau_12 = dv1dy + dv2dx # = tau_21
+    # (4/3 * (v2)_y - 2/3 * (v1)_x)
+    tau_22 = 4.0 / 3.0 * dv2dy - 2.0 / 3.0 * dv1dx
+
+    # Fick's law q = -kappa * grad(T) = -kappa * grad(p / (R rho))
+    # with thermal diffusivity constant kappa = gamma μ R / ((gamma-1) Pr)
+    # Note, the gas constant cancels under this formulation, so it is not present
+    # in the implementation
+    q1 = equations.kappa * dTdx
+    q2 = equations.kappa * dTdy
+
+    # Constant dynamic viscosity is copied to a variable for readability.
+    # Offers flexibility for dynamic viscosity via Sutherland's law where it depends
+    # on temperature and reference values, Ts and Tref such that mu(T)
+    mu = equations.mu
+
+    if orientation == 1
+        # viscous flux components in the x-direction
+        f1 = zero(rho)
+        f2 = tau_11 * mu
+        f3 = tau_12 * mu
+        f4 = (v1 * tau_11 + v2 * tau_12 + q1) * mu
+
+        return SVector(f1, f2, f3, f4)
+    else # if orientation == 2
+        # viscous flux components in the y-direction
+        # Note, symmetry is exploited for tau_12 = tau_21
+        g1 = zero(rho)
+        g2 = tau_12 * mu # tau_21 * mu
+        g3 = tau_22 * mu
+        g4 = (v1 * tau_12 + v2 * tau_22 + q2) * mu
+
+        return SVector(g1, g2, g3, g4)
+    end
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::CompressibleNavierStokesDiffusion2D)
-  rho, rho_v1, rho_v2, _ = u
+    rho, rho_v1, rho_v2, _ = u
 
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  T  = temperature(u, equations)
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    T = temperature(u, equations)
 
-  return SVector(rho, v1, v2, T)
+    return SVector(rho, v1, v2, T)
 end
 
 # Convert conservative variables to entropy
 # TODO: parabolic. We can improve efficiency by not computing w_1, which involves logarithms
 # This can be done by specializing `cons2entropy` and `entropy2cons` to `CompressibleNavierStokesDiffusion2D`,
 # but this may be confusing to new users.
-cons2entropy(u, equations::CompressibleNavierStokesDiffusion2D) = cons2entropy(u, equations.equations_hyperbolic)
-entropy2cons(w, equations::CompressibleNavierStokesDiffusion2D) = entropy2cons(w, equations.equations_hyperbolic)
+function cons2entropy(u, equations::CompressibleNavierStokesDiffusion2D)
+    cons2entropy(u, equations.equations_hyperbolic)
+end
+function entropy2cons(w, equations::CompressibleNavierStokesDiffusion2D)
+    entropy2cons(w, equations.equations_hyperbolic)
+end
 
 # the `flux` function takes in transformed variables `u` which depend on the type of the gradient variables.
 # For CNS, it is simplest to formulate the viscous terms in primitive variables, so we transform the transformed
 # variables into primitive variables.
-@inline function convert_transformed_to_primitive(u_transformed, equations::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive})
-  return u_transformed
+@inline function convert_transformed_to_primitive(u_transformed,
+                                                  equations::CompressibleNavierStokesDiffusion2D{
+                                                                                                 GradientVariablesPrimitive
+                                                                                                 })
+    return u_transformed
 end
 
 # TODO: parabolic. Make this more efficient!
-@inline function convert_transformed_to_primitive(u_transformed, equations::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy})
-  # note: this uses CompressibleNavierStokesDiffusion2D versions of cons2prim and entropy2cons
-  return cons2prim(entropy2cons(u_transformed, equations), equations)
+@inline function convert_transformed_to_primitive(u_transformed,
+                                                  equations::CompressibleNavierStokesDiffusion2D{
+                                                                                                 GradientVariablesEntropy
+                                                                                                 })
+    # note: this uses CompressibleNavierStokesDiffusion2D versions of cons2prim and entropy2cons
+    return cons2prim(entropy2cons(u_transformed, equations), equations)
 end
 
-
 # Takes the solution values `u` and gradient of the entropy variables (w_2, w_3, w_4) and
 # reverse engineers the gradients to be terms of the primitive variables (v1, v2, T).
 # Helpful because then the diffusive fluxes have the same form as on paper.
 # Note, the first component of `gradient_entropy_vars` contains gradient(rho) which is unused.
 # TODO: parabolic; entropy stable viscous terms
-@inline function convert_derivative_to_primitive(u, gradient, ::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive})
-  return gradient
+@inline function convert_derivative_to_primitive(u, gradient,
+                                                 ::CompressibleNavierStokesDiffusion2D{
+                                                                                       GradientVariablesPrimitive
+                                                                                       })
+    return gradient
 end
 
 # the first argument is always the "transformed" variables.
 @inline function convert_derivative_to_primitive(w, gradient_entropy_vars,
-                                                 equations::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy})
-
-  # TODO: parabolic. This is inefficient to pass in transformed variables but then transform them back.
-  # We can fix this if we directly compute v1, v2, T from the entropy variables
-  u = entropy2cons(w, equations) # calls a "modified" entropy2cons defined for CompressibleNavierStokesDiffusion2D
-  rho, rho_v1, rho_v2, _ = u
-
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  T  = temperature(u, equations)
-
-  return SVector(gradient_entropy_vars[1],
-                 T * (gradient_entropy_vars[2] + v1 * gradient_entropy_vars[4]), # grad(u) = T*(grad(w_2)+v1*grad(w_4))
-                 T * (gradient_entropy_vars[3] + v2 * gradient_entropy_vars[4]), # grad(v) = T*(grad(w_3)+v2*grad(w_4))
-                 T * T * gradient_entropy_vars[4]                                # grad(T) = T^2*grad(w_4))
-                )
+                                                 equations::CompressibleNavierStokesDiffusion2D{
+                                                                                                GradientVariablesEntropy
+                                                                                                })
+
+    # TODO: parabolic. This is inefficient to pass in transformed variables but then transform them back.
+    # We can fix this if we directly compute v1, v2, T from the entropy variables
+    u = entropy2cons(w, equations) # calls a "modified" entropy2cons defined for CompressibleNavierStokesDiffusion2D
+    rho, rho_v1, rho_v2, _ = u
+
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    T = temperature(u, equations)
+
+    return SVector(gradient_entropy_vars[1],
+                   T * (gradient_entropy_vars[2] + v1 * gradient_entropy_vars[4]), # grad(u) = T*(grad(w_2)+v1*grad(w_4))
+                   T * (gradient_entropy_vars[3] + v2 * gradient_entropy_vars[4]), # grad(v) = T*(grad(w_3)+v2*grad(w_4))
+                   T * T * gradient_entropy_vars[4])
 end
 
-
 # This routine is required because `prim2cons` is called in `initial_condition`, which
 # is called with `equations::CompressibleEulerEquations2D`. This means it is inconsistent
 # with `cons2prim(..., ::CompressibleNavierStokesDiffusion2D)` as defined above.
 # TODO: parabolic. Is there a way to clean this up?
-@inline prim2cons(u, equations::CompressibleNavierStokesDiffusion2D) =
+@inline function prim2cons(u, equations::CompressibleNavierStokesDiffusion2D)
     prim2cons(u, equations.equations_hyperbolic)
-
+end
 
 @inline function temperature(u, equations::CompressibleNavierStokesDiffusion2D)
-  rho, rho_v1, rho_v2, rho_e = u
+    rho, rho_v1, rho_v2, rho_e = u
 
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2) / rho)
-  T = p / rho
-  return T
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2) / rho)
+    T = p / rho
+    return T
 end
 
 # TODO: can we generalize this to MHD?
@@ -289,8 +313,8 @@ to be boundary condition types such as the `NoSlip` velocity boundary condition
     This is an experimental feature and may change in future releases.
 """
 struct BoundaryConditionNavierStokesWall{V, H}
-  boundary_condition_velocity::V
-  boundary_condition_heat_flux::H
+    boundary_condition_velocity::V
+    boundary_condition_heat_flux::H
 end
 
 """
@@ -302,7 +326,7 @@ and should return a `SVector{NDIMS}` whose entries are the velocity vector at a
 point `x` and time `t`.
 """
 struct NoSlip{F}
-  boundary_value_function::F # value of the velocity vector on the boundary
+    boundary_value_function::F # value of the velocity vector on the boundary
 end
 
 """
@@ -314,7 +338,7 @@ The field `boundary_value_function` should be a function with signature
 temperature at point `x` and time `t`.
 """
 struct Isothermal{F}
-  boundary_value_function::F # value of the temperature on the boundary
+    boundary_value_function::F # value of the temperature on the boundary
 end
 
 """
@@ -326,40 +350,69 @@ The field `boundary_value_normal_flux_function` should be a function with signat
 normal heat flux at point `x` and time `t`.
 """
 struct Adiabatic{F}
-  boundary_value_normal_flux_function::F # scaled heat flux 1/T * kappa * dT/dn
+    boundary_value_normal_flux_function::F # scaled heat flux 1/T * kappa * dT/dn
 end
 
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, u_inner, normal::AbstractVector,
-                                                                                           x, t, operator_type::Gradient,
-                                                                                           equations::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive})
-  v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations)
-  return SVector(u_inner[1], v1, v2, u_inner[4])
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Adiabatic})(flux_inner,
+                                                                                      u_inner,
+                                                                                      normal::AbstractVector,
+                                                                                      x, t,
+                                                                                      operator_type::Gradient,
+                                                                                      equations::CompressibleNavierStokesDiffusion2D{
+                                                                                                                                     GradientVariablesPrimitive
+                                                                                                                                     })
+    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+                                                                                    equations)
+    return SVector(u_inner[1], v1, v2, u_inner[4])
 end
 
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, u_inner, normal::AbstractVector,
-                                                                                           x, t, operator_type::Divergence,
-                                                                                           equations::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive})
-  # rho, v1, v2, _ = u_inner
-  normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x, t, equations)
-  v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations)
-  _, tau_1n, tau_2n, _ = flux_inner # extract fluxes for 2nd and 3rd equations
-  normal_energy_flux = v1 * tau_1n + v2 * tau_2n + normal_heat_flux
-  return SVector(flux_inner[1], flux_inner[2], flux_inner[3], normal_energy_flux)
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Adiabatic})(flux_inner,
+                                                                                      u_inner,
+                                                                                      normal::AbstractVector,
+                                                                                      x, t,
+                                                                                      operator_type::Divergence,
+                                                                                      equations::CompressibleNavierStokesDiffusion2D{
+                                                                                                                                     GradientVariablesPrimitive
+                                                                                                                                     })
+    # rho, v1, v2, _ = u_inner
+    normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x,
+                                                                                                           t,
+                                                                                                           equations)
+    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+                                                                                    equations)
+    _, tau_1n, tau_2n, _ = flux_inner # extract fluxes for 2nd and 3rd equations
+    normal_energy_flux = v1 * tau_1n + v2 * tau_2n + normal_heat_flux
+    return SVector(flux_inner[1], flux_inner[2], flux_inner[3], normal_energy_flux)
 end
 
-
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, u_inner, normal::AbstractVector,
-                                                                                            x, t, operator_type::Gradient,
-                                                                                            equations::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive})
-  v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations)
-  T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t, equations)
-  return SVector(u_inner[1], v1, v2, T)
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Isothermal})(flux_inner,
+                                                                                       u_inner,
+                                                                                       normal::AbstractVector,
+                                                                                       x, t,
+                                                                                       operator_type::Gradient,
+                                                                                       equations::CompressibleNavierStokesDiffusion2D{
+                                                                                                                                      GradientVariablesPrimitive
+                                                                                                                                      })
+    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+                                                                                    equations)
+    T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t,
+                                                                                equations)
+    return SVector(u_inner[1], v1, v2, T)
 end
 
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, u_inner, normal::AbstractVector,
-                                                                                            x, t, operator_type::Divergence,
-                                                                                            equations::CompressibleNavierStokesDiffusion2D{GradientVariablesPrimitive})
-  return flux_inner
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Isothermal})(flux_inner,
+                                                                                       u_inner,
+                                                                                       normal::AbstractVector,
+                                                                                       x, t,
+                                                                                       operator_type::Divergence,
+                                                                                       equations::CompressibleNavierStokesDiffusion2D{
+                                                                                                                                      GradientVariablesPrimitive
+                                                                                                                                      })
+    return flux_inner
 end
 
 # specialized BC impositions for GradientVariablesEntropy.
@@ -370,38 +423,69 @@ end
 # Taken from "Entropy stable modal discontinuous Galerkin schemes and wall boundary conditions
 #             for the compressible Navier-Stokes equations" by Chan, Lin, Warburton 2022.
 # DOI: 10.1016/j.jcp.2021.110723
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, w_inner, normal::AbstractVector,
-                                                                                                x, t, operator_type::Gradient,
-                                                                                                equations::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy})
-  v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations)
-  negative_rho_inv_p = w_inner[4] # w_4 = -rho / p
-  return SVector(w_inner[1], -v1 * negative_rho_inv_p, -v2 * negative_rho_inv_p, negative_rho_inv_p)
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Adiabatic})(flux_inner,
+                                                                                      w_inner,
+                                                                                      normal::AbstractVector,
+                                                                                      x, t,
+                                                                                      operator_type::Gradient,
+                                                                                      equations::CompressibleNavierStokesDiffusion2D{
+                                                                                                                                     GradientVariablesEntropy
+                                                                                                                                     })
+    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+                                                                                    equations)
+    negative_rho_inv_p = w_inner[4] # w_4 = -rho / p
+    return SVector(w_inner[1], -v1 * negative_rho_inv_p, -v2 * negative_rho_inv_p,
+                   negative_rho_inv_p)
 end
 
 # this is actually identical to the specialization for GradientVariablesPrimitive, but included for completeness.
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, w_inner, normal::AbstractVector,
-                                                                                                x, t, operator_type::Divergence,
-                                                                                                equations::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy})
-  normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x, t, equations)
-  v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations)
-  _, tau_1n, tau_2n, _ = flux_inner # extract fluxes for 2nd and 3rd equations
-  normal_energy_flux = v1 * tau_1n + v2 * tau_2n + normal_heat_flux
-  return SVector(flux_inner[1], flux_inner[2], flux_inner[3], normal_energy_flux)
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Adiabatic})(flux_inner,
+                                                                                      w_inner,
+                                                                                      normal::AbstractVector,
+                                                                                      x, t,
+                                                                                      operator_type::Divergence,
+                                                                                      equations::CompressibleNavierStokesDiffusion2D{
+                                                                                                                                     GradientVariablesEntropy
+                                                                                                                                     })
+    normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x,
+                                                                                                           t,
+                                                                                                           equations)
+    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+                                                                                    equations)
+    _, tau_1n, tau_2n, _ = flux_inner # extract fluxes for 2nd and 3rd equations
+    normal_energy_flux = v1 * tau_1n + v2 * tau_2n + normal_heat_flux
+    return SVector(flux_inner[1], flux_inner[2], flux_inner[3], normal_energy_flux)
 end
 
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, w_inner, normal::AbstractVector,
-                                                                                                 x, t, operator_type::Gradient,
-                                                                                                 equations::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy})
-  v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations)
-  T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t, equations)
-
-  # the entropy variables w2 = rho * v1 / p = v1 / T = -v1 * w4. Similarly for w3
-  w4 = -1 / T
-  return SVector(w_inner[1], -v1 * w4, -v2 * w4, w4)
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Isothermal})(flux_inner,
+                                                                                       w_inner,
+                                                                                       normal::AbstractVector,
+                                                                                       x, t,
+                                                                                       operator_type::Gradient,
+                                                                                       equations::CompressibleNavierStokesDiffusion2D{
+                                                                                                                                      GradientVariablesEntropy
+                                                                                                                                      })
+    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+                                                                                    equations)
+    T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t,
+                                                                                equations)
+
+    # the entropy variables w2 = rho * v1 / p = v1 / T = -v1 * w4. Similarly for w3
+    w4 = -1 / T
+    return SVector(w_inner[1], -v1 * w4, -v2 * w4, w4)
 end
 
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, w_inner, normal::AbstractVector,
-                                                                                           x, t, operator_type::Divergence,
-                                                                                           equations::CompressibleNavierStokesDiffusion2D{GradientVariablesEntropy})
-  return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4])
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Isothermal})(flux_inner,
+                                                                                       w_inner,
+                                                                                       normal::AbstractVector,
+                                                                                       x, t,
+                                                                                       operator_type::Divergence,
+                                                                                       equations::CompressibleNavierStokesDiffusion2D{
+                                                                                                                                      GradientVariablesEntropy
+                                                                                                                                      })
+    return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4])
 end
diff --git a/src/equations/compressible_navier_stokes_3d.jl b/src/equations/compressible_navier_stokes_3d.jl
index 5a968261503..8930489295d 100644
--- a/src/equations/compressible_navier_stokes_3d.jl
+++ b/src/equations/compressible_navier_stokes_3d.jl
@@ -76,37 +76,40 @@ w_2 = \frac{\rho v_1}{p},\, w_3 = \frac{\rho v_2}{p},\, w_4 = \frac{\rho v_3}{p}
 #!!! warning "Experimental code"
 #    This code is experimental and may be changed or removed in any future release.
 """
-struct CompressibleNavierStokesDiffusion3D{GradientVariables, RealT <: Real, E <: AbstractCompressibleEulerEquations{3}} <: AbstractCompressibleNavierStokesDiffusion{3, 5}
-  # TODO: parabolic
-  # 1) For now save gamma and inv(gamma-1) again, but could potentially reuse them from the Euler equations
-  # 2) Add NGRADS as a type parameter here and in AbstractEquationsParabolic, add `ngradients(...)` accessor function
-  gamma::RealT               # ratio of specific heats
-  inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
-
-  mu::RealT                  # viscosity
-  Pr::RealT                  # Prandtl number
-  kappa::RealT               # thermal diffusivity for Fick's law
-
-  equations_hyperbolic::E    # CompressibleEulerEquations3D
-  gradient_variables::GradientVariables # GradientVariablesPrimitive or GradientVariablesEntropy
+struct CompressibleNavierStokesDiffusion3D{GradientVariables, RealT <: Real,
+                                           E <: AbstractCompressibleEulerEquations{3}} <:
+       AbstractCompressibleNavierStokesDiffusion{3, 5}
+    # TODO: parabolic
+    # 1) For now save gamma and inv(gamma-1) again, but could potentially reuse them from the Euler equations
+    # 2) Add NGRADS as a type parameter here and in AbstractEquationsParabolic, add `ngradients(...)` accessor function
+    gamma::RealT               # ratio of specific heats
+    inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
+
+    mu::RealT                  # viscosity
+    Pr::RealT                  # Prandtl number
+    kappa::RealT               # thermal diffusivity for Fick's law
+
+    equations_hyperbolic::E    # CompressibleEulerEquations3D
+    gradient_variables::GradientVariables # GradientVariablesPrimitive or GradientVariablesEntropy
 end
 
 # default to primitive gradient variables
 function CompressibleNavierStokesDiffusion3D(equations::CompressibleEulerEquations3D;
                                              mu, Prandtl,
                                              gradient_variables = GradientVariablesPrimitive())
-  gamma = equations.gamma
-  inv_gamma_minus_one = equations.inv_gamma_minus_one
-  μ, Pr = promote(mu, Prandtl)
-
-  # Under the assumption of constant Prandtl number the thermal conductivity
-  # constant is kappa = gamma μ / ((gamma-1) Pr).
-  # Important note! Factor of μ is accounted for later in `flux`.
-  kappa = gamma * inv_gamma_minus_one / Pr
-
-  CompressibleNavierStokesDiffusion3D{typeof(gradient_variables), typeof(gamma), typeof(equations)}(gamma, inv_gamma_minus_one,
-                                                                                                    μ, Pr, kappa,
-                                                                                                    equations, gradient_variables)
+    gamma = equations.gamma
+    inv_gamma_minus_one = equations.inv_gamma_minus_one
+    μ, Pr = promote(mu, Prandtl)
+
+    # Under the assumption of constant Prandtl number the thermal conductivity
+    # constant is kappa = gamma μ / ((gamma-1) Pr).
+    # Important note! Factor of μ is accounted for later in `flux`.
+    kappa = gamma * inv_gamma_minus_one / Pr
+
+    CompressibleNavierStokesDiffusion3D{typeof(gradient_variables), typeof(gamma),
+                                        typeof(equations)}(gamma, inv_gamma_minus_one,
+                                                           μ, Pr, kappa,
+                                                           equations, gradient_variables)
 end
 
 # TODO: parabolic
@@ -114,225 +117,279 @@ end
 # varnames(::typeof(cons2prim)   , ::CompressibleNavierStokesDiffusion3D) = ("v1", "v2", "v3", "T")
 # varnames(::typeof(cons2entropy), ::CompressibleNavierStokesDiffusion3D) = ("w2", "w3", "w4", "w5")
 
-varnames(variable_mapping, equations_parabolic::CompressibleNavierStokesDiffusion3D) =
-  varnames(variable_mapping, equations_parabolic.equations_hyperbolic)
+function varnames(variable_mapping,
+                  equations_parabolic::CompressibleNavierStokesDiffusion3D)
+    varnames(variable_mapping, equations_parabolic.equations_hyperbolic)
+end
 
 # we specialize this function to compute gradients of primitive variables instead of
 # conservative variables.
-gradient_variable_transformation(::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive}) = cons2prim
-gradient_variable_transformation(::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy}) = cons2entropy
-
+function gradient_variable_transformation(::CompressibleNavierStokesDiffusion3D{
+                                                                                GradientVariablesPrimitive
+                                                                                })
+    cons2prim
+end
+function gradient_variable_transformation(::CompressibleNavierStokesDiffusion3D{
+                                                                                GradientVariablesEntropy
+                                                                                })
+    cons2entropy
+end
 
 # Explicit formulas for the diffusive Navier-Stokes fluxes are available, e.g., in Section 2
 # of the paper by Rueda-Ramírez, Hennemann, Hindenlang, Winters, and Gassner
 # "An Entropy Stable Nodal Discontinuous Galerkin Method for the resistive
 #  MHD Equations. Part II: Subcell Finite Volume Shock Capturing"
 # where one sets the magnetic field components equal to 0.
-function flux(u, gradients, orientation::Integer, equations::CompressibleNavierStokesDiffusion3D)
-  # Here, `u` is assumed to be the "transformed" variables specified by `gradient_variable_transformation`.
-  rho, v1, v2, v3, _ = convert_transformed_to_primitive(u, equations)
-  # Here `gradients` is assumed to contain the gradients of the primitive variables (rho, v1, v2, v3, T)
-  # either computed directly or reverse engineered from the gradient of the entropy variables
-  # by way of the `convert_gradient_variables` function.
-  _, dv1dx, dv2dx, dv3dx, dTdx = convert_derivative_to_primitive(u, gradients[1], equations)
-  _, dv1dy, dv2dy, dv3dy, dTdy = convert_derivative_to_primitive(u, gradients[2], equations)
-  _, dv1dz, dv2dz, dv3dz, dTdz = convert_derivative_to_primitive(u, gradients[3], equations)
-
-  # Components of viscous stress tensor
-
-  # Diagonal parts
-  # (4/3 * (v1)_x - 2/3 * ((v2)_y + (v3)_z)
-  tau_11 = 4.0 / 3.0 * dv1dx - 2.0 / 3.0 * (dv2dy + dv3dz)
-  # (4/3 * (v2)_y - 2/3 * ((v1)_x + (v3)_z)
-  tau_22 = 4.0 / 3.0 * dv2dy - 2.0 / 3.0 * (dv1dx + dv3dz)
-  # (4/3 * (v3)_z - 2/3 * ((v1)_x + (v2)_y)
-  tau_33 = 4.0 / 3.0 * dv3dz - 2.0 / 3.0 * (dv1dx + dv2dy)
-
-  # Off diagonal parts, exploit that stress tensor is symmetric
-  # ((v1)_y + (v2)_x)
-  tau_12 = dv1dy + dv2dx # = tau_21
-  # ((v1)_z + (v3)_x)
-  tau_13 = dv1dz + dv3dx # = tau_31
-  # ((v2)_z + (v3)_y)
-  tau_23 = dv2dz + dv3dy # = tau_32
-
-  # Fick's law q = -kappa * grad(T) = -kappa * grad(p / (R rho))
-  # with thermal diffusivity constant kappa = gamma μ R / ((gamma-1) Pr)
-  # Note, the gas constant cancels under this formulation, so it is not present
-  # in the implementation
-  q1 = equations.kappa * dTdx
-  q2 = equations.kappa * dTdy
-  q3 = equations.kappa * dTdz
-
-  # Constant dynamic viscosity is copied to a variable for readability.
-  # Offers flexibility for dynamic viscosity via Sutherland's law where it depends
-  # on temperature and reference values, Ts and Tref such that mu(T)
-  mu = equations.mu
-
-  if orientation == 1
-    # viscous flux components in the x-direction
-    f1 = zero(rho)
-    f2 = tau_11 * mu
-    f3 = tau_12 * mu
-    f4 = tau_13 * mu
-    f5 = ( v1 * tau_11 + v2 * tau_12 + v3 * tau_13 + q1 ) * mu
-
-    return SVector(f1, f2, f3, f4, f5)
-  elseif orientation == 2
-    # viscous flux components in the y-direction
-    # Note, symmetry is exploited for tau_12 = tau_21
-    g1 = zero(rho)
-    g2 = tau_12 * mu # tau_21 * mu
-    g3 = tau_22 * mu
-    g4 = tau_23 * mu
-    g5 = ( v1 * tau_12 + v2 * tau_22 + v3 * tau_23 + q2 ) * mu
-
-    return SVector(g1, g2, g3, g4, g5)
-  else # if orientation == 3
-    # viscous flux components in the z-direction
-    # Note, symmetry is exploited for tau_13 = tau_31, tau_23 = tau_32
-    h1 = zero(rho)
-    h2 = tau_13 * mu # tau_31 * mu
-    h3 = tau_23 * mu # tau_32 * mu
-    h4 = tau_33 * mu
-    h5 = ( v1 * tau_13 + v2 * tau_23 + v3 * tau_33 + q3 ) * mu
-
-    return SVector(h1, h2, h3, h4, h5)
-  end
+function flux(u, gradients, orientation::Integer,
+              equations::CompressibleNavierStokesDiffusion3D)
+    # Here, `u` is assumed to be the "transformed" variables specified by `gradient_variable_transformation`.
+    rho, v1, v2, v3, _ = convert_transformed_to_primitive(u, equations)
+    # Here `gradients` is assumed to contain the gradients of the primitive variables (rho, v1, v2, v3, T)
+    # either computed directly or reverse engineered from the gradient of the entropy variables
+    # by way of the `convert_gradient_variables` function.
+    _, dv1dx, dv2dx, dv3dx, dTdx = convert_derivative_to_primitive(u, gradients[1],
+                                                                   equations)
+    _, dv1dy, dv2dy, dv3dy, dTdy = convert_derivative_to_primitive(u, gradients[2],
+                                                                   equations)
+    _, dv1dz, dv2dz, dv3dz, dTdz = convert_derivative_to_primitive(u, gradients[3],
+                                                                   equations)
+
+    # Components of viscous stress tensor
+
+    # Diagonal parts
+    # (4/3 * (v1)_x - 2/3 * ((v2)_y + (v3)_z)
+    tau_11 = 4.0 / 3.0 * dv1dx - 2.0 / 3.0 * (dv2dy + dv3dz)
+    # (4/3 * (v2)_y - 2/3 * ((v1)_x + (v3)_z)
+    tau_22 = 4.0 / 3.0 * dv2dy - 2.0 / 3.0 * (dv1dx + dv3dz)
+    # (4/3 * (v3)_z - 2/3 * ((v1)_x + (v2)_y)
+    tau_33 = 4.0 / 3.0 * dv3dz - 2.0 / 3.0 * (dv1dx + dv2dy)
+
+    # Off diagonal parts, exploit that stress tensor is symmetric
+    # ((v1)_y + (v2)_x)
+    tau_12 = dv1dy + dv2dx # = tau_21
+    # ((v1)_z + (v3)_x)
+    tau_13 = dv1dz + dv3dx # = tau_31
+    # ((v2)_z + (v3)_y)
+    tau_23 = dv2dz + dv3dy # = tau_32
+
+    # Fick's law q = -kappa * grad(T) = -kappa * grad(p / (R rho))
+    # with thermal diffusivity constant kappa = gamma μ R / ((gamma-1) Pr)
+    # Note, the gas constant cancels under this formulation, so it is not present
+    # in the implementation
+    q1 = equations.kappa * dTdx
+    q2 = equations.kappa * dTdy
+    q3 = equations.kappa * dTdz
+
+    # Constant dynamic viscosity is copied to a variable for readability.
+    # Offers flexibility for dynamic viscosity via Sutherland's law where it depends
+    # on temperature and reference values, Ts and Tref such that mu(T)
+    mu = equations.mu
+
+    if orientation == 1
+        # viscous flux components in the x-direction
+        f1 = zero(rho)
+        f2 = tau_11 * mu
+        f3 = tau_12 * mu
+        f4 = tau_13 * mu
+        f5 = (v1 * tau_11 + v2 * tau_12 + v3 * tau_13 + q1) * mu
+
+        return SVector(f1, f2, f3, f4, f5)
+    elseif orientation == 2
+        # viscous flux components in the y-direction
+        # Note, symmetry is exploited for tau_12 = tau_21
+        g1 = zero(rho)
+        g2 = tau_12 * mu # tau_21 * mu
+        g3 = tau_22 * mu
+        g4 = tau_23 * mu
+        g5 = (v1 * tau_12 + v2 * tau_22 + v3 * tau_23 + q2) * mu
+
+        return SVector(g1, g2, g3, g4, g5)
+    else # if orientation == 3
+        # viscous flux components in the z-direction
+        # Note, symmetry is exploited for tau_13 = tau_31, tau_23 = tau_32
+        h1 = zero(rho)
+        h2 = tau_13 * mu # tau_31 * mu
+        h3 = tau_23 * mu # tau_32 * mu
+        h4 = tau_33 * mu
+        h5 = (v1 * tau_13 + v2 * tau_23 + v3 * tau_33 + q3) * mu
+
+        return SVector(h1, h2, h3, h4, h5)
+    end
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::CompressibleNavierStokesDiffusion3D)
-  rho, rho_v1, rho_v2, rho_v3, _ = u
+    rho, rho_v1, rho_v2, rho_v3, _ = u
 
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  T  = temperature(u, equations)
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    T = temperature(u, equations)
 
-  return SVector(rho, v1, v2, v3, T)
+    return SVector(rho, v1, v2, v3, T)
 end
 
 # Convert conservative variables to entropy
 # TODO: parabolic. We can improve efficiency by not computing w_1, which involves logarithms
 # This can be done by specializing `cons2entropy` and `entropy2cons` to `CompressibleNavierStokesDiffusion2D`,
 # but this may be confusing to new users.
-cons2entropy(u, equations::CompressibleNavierStokesDiffusion3D) = cons2entropy(u, equations.equations_hyperbolic)
-entropy2cons(w, equations::CompressibleNavierStokesDiffusion3D) = entropy2cons(w, equations.equations_hyperbolic)
+function cons2entropy(u, equations::CompressibleNavierStokesDiffusion3D)
+    cons2entropy(u, equations.equations_hyperbolic)
+end
+function entropy2cons(w, equations::CompressibleNavierStokesDiffusion3D)
+    entropy2cons(w, equations.equations_hyperbolic)
+end
 
 # the `flux` function takes in transformed variables `u` which depend on the type of the gradient variables.
 # For CNS, it is simplest to formulate the viscous terms in primitive variables, so we transform the transformed
 # variables into primitive variables.
-@inline function convert_transformed_to_primitive(u_transformed, equations::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive})
-  return u_transformed
+@inline function convert_transformed_to_primitive(u_transformed,
+                                                  equations::CompressibleNavierStokesDiffusion3D{
+                                                                                                 GradientVariablesPrimitive
+                                                                                                 })
+    return u_transformed
 end
 
 # TODO: parabolic. Make this more efficient!
-@inline function convert_transformed_to_primitive(u_transformed, equations::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy})
-  # note: this uses CompressibleNavierStokesDiffusion3D versions of cons2prim and entropy2cons
-  return cons2prim(entropy2cons(u_transformed, equations), equations)
+@inline function convert_transformed_to_primitive(u_transformed,
+                                                  equations::CompressibleNavierStokesDiffusion3D{
+                                                                                                 GradientVariablesEntropy
+                                                                                                 })
+    # note: this uses CompressibleNavierStokesDiffusion3D versions of cons2prim and entropy2cons
+    return cons2prim(entropy2cons(u_transformed, equations), equations)
 end
 
-
 # Takes the solution values `u` and gradient of the entropy variables (w_2, w_3, w_4, w_5) and
 # reverse engineers the gradients to be terms of the primitive variables (v1, v2, v3, T).
 # Helpful because then the diffusive fluxes have the same form as on paper.
 # Note, the first component of `gradient_entropy_vars` contains gradient(rho) which is unused.
 # TODO: parabolic; entropy stable viscous terms
-@inline function convert_derivative_to_primitive(u, gradient, ::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive})
-  return gradient
+@inline function convert_derivative_to_primitive(u, gradient,
+                                                 ::CompressibleNavierStokesDiffusion3D{
+                                                                                       GradientVariablesPrimitive
+                                                                                       })
+    return gradient
 end
 
 # the first argument is always the "transformed" variables.
 @inline function convert_derivative_to_primitive(w, gradient_entropy_vars,
-                                                 equations::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy})
-
-  # TODO: parabolic. This is inefficient to pass in transformed variables but then transform them back.
-  # We can fix this if we directly compute v1, v2, v3, T from the entropy variables
-  u = entropy2cons(w, equations) # calls a "modified" entropy2cons defined for CompressibleNavierStokesDiffusion3D
-  rho, rho_v1, rho_v2, rho_v3, _ = u
-
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  T  = temperature(u, equations)
-
-  return SVector(gradient_entropy_vars[1],
-                 T * (gradient_entropy_vars[2] + v1 * gradient_entropy_vars[5]), # grad(u) = T*(grad(w_2)+v1*grad(w_5))
-                 T * (gradient_entropy_vars[3] + v2 * gradient_entropy_vars[5]), # grad(v) = T*(grad(w_3)+v2*grad(w_5))
-                 T * (gradient_entropy_vars[4] + v3 * gradient_entropy_vars[5]), # grad(v) = T*(grad(w_4)+v3*grad(w_5))
-                 T * T * gradient_entropy_vars[5]                                # grad(T) = T^2*grad(w_5))
-                )
+                                                 equations::CompressibleNavierStokesDiffusion3D{
+                                                                                                GradientVariablesEntropy
+                                                                                                })
+
+    # TODO: parabolic. This is inefficient to pass in transformed variables but then transform them back.
+    # We can fix this if we directly compute v1, v2, v3, T from the entropy variables
+    u = entropy2cons(w, equations) # calls a "modified" entropy2cons defined for CompressibleNavierStokesDiffusion3D
+    rho, rho_v1, rho_v2, rho_v3, _ = u
+
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    T = temperature(u, equations)
+
+    return SVector(gradient_entropy_vars[1],
+                   T * (gradient_entropy_vars[2] + v1 * gradient_entropy_vars[5]), # grad(u) = T*(grad(w_2)+v1*grad(w_5))
+                   T * (gradient_entropy_vars[3] + v2 * gradient_entropy_vars[5]), # grad(v) = T*(grad(w_3)+v2*grad(w_5))
+                   T * (gradient_entropy_vars[4] + v3 * gradient_entropy_vars[5]), # grad(v) = T*(grad(w_4)+v3*grad(w_5))
+                   T * T * gradient_entropy_vars[5])
 end
 
-
 # This routine is required because `prim2cons` is called in `initial_condition`, which
 # is called with `equations::CompressibleEulerEquations3D`. This means it is inconsistent
 # with `cons2prim(..., ::CompressibleNavierStokesDiffusion3D)` as defined above.
 # TODO: parabolic. Is there a way to clean this up?
-@inline prim2cons(u, equations::CompressibleNavierStokesDiffusion3D) =
+@inline function prim2cons(u, equations::CompressibleNavierStokesDiffusion3D)
     prim2cons(u, equations.equations_hyperbolic)
-
+end
 
 @inline function temperature(u, equations::CompressibleNavierStokesDiffusion3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e = u
+    rho, rho_v1, rho_v2, rho_v3, rho_e = u
 
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho)
-  T = p / rho
-  return T
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho)
+    T = p / rho
+    return T
 end
 
-
 @inline function enstrophy(u, gradients, equations::CompressibleNavierStokesDiffusion3D)
-  # Enstrophy is 0.5 rho ω⋅ω where ω = ∇ × v
+    # Enstrophy is 0.5 rho ω⋅ω where ω = ∇ × v
 
-  omega = vorticity(u, gradients, equations)
-  return 0.5 * u[1] * (omega[1]^2 + omega[2]^2 + omega[3]^2)
+    omega = vorticity(u, gradients, equations)
+    return 0.5 * u[1] * (omega[1]^2 + omega[2]^2 + omega[3]^2)
 end
 
-
 @inline function vorticity(u, gradients, equations::CompressibleNavierStokesDiffusion3D)
-  # Ensure that we have velocity `gradients` by way of the `convert_gradient_variables` function.
-  _, dv1dx, dv2dx, dv3dx, _ = convert_derivative_to_primitive(u, gradients[1], equations)
-  _, dv1dy, dv2dy, dv3dy, _ = convert_derivative_to_primitive(u, gradients[2], equations)
-  _, dv1dz, dv2dz, dv3dz, _ = convert_derivative_to_primitive(u, gradients[3], equations)
+    # Ensure that we have velocity `gradients` by way of the `convert_gradient_variables` function.
+    _, dv1dx, dv2dx, dv3dx, _ = convert_derivative_to_primitive(u, gradients[1], equations)
+    _, dv1dy, dv2dy, dv3dy, _ = convert_derivative_to_primitive(u, gradients[2], equations)
+    _, dv1dz, dv2dz, dv3dz, _ = convert_derivative_to_primitive(u, gradients[3], equations)
 
-  return SVector(dv3dy - dv2dz , dv1dz - dv3dx , dv2dx - dv1dy)
+    return SVector(dv3dy - dv2dz, dv1dz - dv3dx, dv2dx - dv1dy)
 end
 
-
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, u_inner, normal::AbstractVector,
-                                                                                           x, t, operator_type::Gradient,
-                                                                                           equations::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive})
-  v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations)
-  return SVector(u_inner[1], v1, v2, v3, u_inner[5])
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Adiabatic})(flux_inner,
+                                                                                      u_inner,
+                                                                                      normal::AbstractVector,
+                                                                                      x, t,
+                                                                                      operator_type::Gradient,
+                                                                                      equations::CompressibleNavierStokesDiffusion3D{
+                                                                                                                                     GradientVariablesPrimitive
+                                                                                                                                     })
+    v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x,
+                                                                                        t,
+                                                                                        equations)
+    return SVector(u_inner[1], v1, v2, v3, u_inner[5])
 end
 
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, u_inner, normal::AbstractVector,
-                                                                                           x, t, operator_type::Divergence,
-                                                                                           equations::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive})
-  # rho, v1, v2, v3, _ = u_inner
-  normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x, t, equations)
-  v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations)
-  _, tau_1n, tau_2n, tau_3n, _ = flux_inner # extract fluxes for 2nd, 3rd, and 4th equations
-  normal_energy_flux = v1 * tau_1n + v2 * tau_2n + v3 * tau_3n + normal_heat_flux
-  return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4], normal_energy_flux)
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Adiabatic})(flux_inner,
+                                                                                      u_inner,
+                                                                                      normal::AbstractVector,
+                                                                                      x, t,
+                                                                                      operator_type::Divergence,
+                                                                                      equations::CompressibleNavierStokesDiffusion3D{
+                                                                                                                                     GradientVariablesPrimitive
+                                                                                                                                     })
+    # rho, v1, v2, v3, _ = u_inner
+    normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x,
+                                                                                                           t,
+                                                                                                           equations)
+    v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x,
+                                                                                        t,
+                                                                                        equations)
+    _, tau_1n, tau_2n, tau_3n, _ = flux_inner # extract fluxes for 2nd, 3rd, and 4th equations
+    normal_energy_flux = v1 * tau_1n + v2 * tau_2n + v3 * tau_3n + normal_heat_flux
+    return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4],
+                   normal_energy_flux)
 end
 
-
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, u_inner, normal::AbstractVector,
-                                                                                            x, t, operator_type::Gradient,
-                                                                                            equations::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive})
-  v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations)
-  T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t, equations)
-  return SVector(u_inner[1], v1, v2, v3, T)
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Isothermal})(flux_inner,
+                                                                                       u_inner,
+                                                                                       normal::AbstractVector,
+                                                                                       x, t,
+                                                                                       operator_type::Gradient,
+                                                                                       equations::CompressibleNavierStokesDiffusion3D{
+                                                                                                                                      GradientVariablesPrimitive
+                                                                                                                                      })
+    v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x,
+                                                                                        t,
+                                                                                        equations)
+    T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t,
+                                                                                equations)
+    return SVector(u_inner[1], v1, v2, v3, T)
 end
 
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, u_inner, normal::AbstractVector,
-                                                                                            x, t, operator_type::Divergence,
-                                                                                            equations::CompressibleNavierStokesDiffusion3D{GradientVariablesPrimitive})
-  return flux_inner
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Isothermal})(flux_inner,
+                                                                                       u_inner,
+                                                                                       normal::AbstractVector,
+                                                                                       x, t,
+                                                                                       operator_type::Divergence,
+                                                                                       equations::CompressibleNavierStokesDiffusion3D{
+                                                                                                                                      GradientVariablesPrimitive
+                                                                                                                                      })
+    return flux_inner
 end
 
 # specialized BC impositions for GradientVariablesEntropy.
@@ -343,38 +400,74 @@ end
 # Taken from "Entropy stable modal discontinuous Galerkin schemes and wall boundary conditions
 #             for the compressible Navier-Stokes equations" by Chan, Lin, Warburton 2022.
 # DOI: 10.1016/j.jcp.2021.110723
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, w_inner, normal::AbstractVector,
-                                                                                                x, t, operator_type::Gradient,
-                                                                                                equations::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy})
-  v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations)
-  negative_rho_inv_p = w_inner[5] # w_5 = -rho / p
-  return SVector(w_inner[1], -v1 * negative_rho_inv_p, -v2 * negative_rho_inv_p, -v3 * negative_rho_inv_p, negative_rho_inv_p)
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Adiabatic})(flux_inner,
+                                                                                      w_inner,
+                                                                                      normal::AbstractVector,
+                                                                                      x, t,
+                                                                                      operator_type::Gradient,
+                                                                                      equations::CompressibleNavierStokesDiffusion3D{
+                                                                                                                                     GradientVariablesEntropy
+                                                                                                                                     })
+    v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x,
+                                                                                        t,
+                                                                                        equations)
+    negative_rho_inv_p = w_inner[5] # w_5 = -rho / p
+    return SVector(w_inner[1], -v1 * negative_rho_inv_p, -v2 * negative_rho_inv_p,
+                   -v3 * negative_rho_inv_p, negative_rho_inv_p)
 end
 
 # this is actually identical to the specialization for GradientVariablesPrimitive, but included for completeness.
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Adiabatic})(flux_inner, w_inner, normal::AbstractVector,
-                                                                                                x, t, operator_type::Divergence,
-                                                                                                equations::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy})
-  normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x, t, equations)
-  v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations)
-  _, tau_1n, tau_2n, tau_3n, _ = flux_inner # extract fluxes for 2nd, 3rd, and 4th equations
-  normal_energy_flux = v1 * tau_1n + v2 * tau_2n + v3 * tau_3n + normal_heat_flux
-  return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4], normal_energy_flux)
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Adiabatic})(flux_inner,
+                                                                                      w_inner,
+                                                                                      normal::AbstractVector,
+                                                                                      x, t,
+                                                                                      operator_type::Divergence,
+                                                                                      equations::CompressibleNavierStokesDiffusion3D{
+                                                                                                                                     GradientVariablesEntropy
+                                                                                                                                     })
+    normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x,
+                                                                                                           t,
+                                                                                                           equations)
+    v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x,
+                                                                                        t,
+                                                                                        equations)
+    _, tau_1n, tau_2n, tau_3n, _ = flux_inner # extract fluxes for 2nd, 3rd, and 4th equations
+    normal_energy_flux = v1 * tau_1n + v2 * tau_2n + v3 * tau_3n + normal_heat_flux
+    return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4],
+                   normal_energy_flux)
 end
 
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, w_inner, normal::AbstractVector,
-                                                                                                 x, t, operator_type::Gradient,
-                                                                                                 equations::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy})
-  v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t, equations)
-  T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t, equations)
-
-  # the entropy variables w2 = rho * v1 / p = v1 / T = -v1 * w5. Similarly for w3 and w4
-  w5 = -1 / T
-  return SVector(w_inner[1], -v1 * w5, -v2 * w5, -v3 * w5, w5)
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Isothermal})(flux_inner,
+                                                                                       w_inner,
+                                                                                       normal::AbstractVector,
+                                                                                       x, t,
+                                                                                       operator_type::Gradient,
+                                                                                       equations::CompressibleNavierStokesDiffusion3D{
+                                                                                                                                      GradientVariablesEntropy
+                                                                                                                                      })
+    v1, v2, v3 = boundary_condition.boundary_condition_velocity.boundary_value_function(x,
+                                                                                        t,
+                                                                                        equations)
+    T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t,
+                                                                                equations)
+
+    # the entropy variables w2 = rho * v1 / p = v1 / T = -v1 * w5. Similarly for w3 and w4
+    w5 = -1 / T
+    return SVector(w_inner[1], -v1 * w5, -v2 * w5, -v3 * w5, w5)
 end
 
-@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip, <:Isothermal})(flux_inner, w_inner, normal::AbstractVector,
-                                                                                           x, t, operator_type::Divergence,
-                                                                                           equations::CompressibleNavierStokesDiffusion3D{GradientVariablesEntropy})
-  return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4], flux_inner[5])
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Isothermal})(flux_inner,
+                                                                                       w_inner,
+                                                                                       normal::AbstractVector,
+                                                                                       x, t,
+                                                                                       operator_type::Divergence,
+                                                                                       equations::CompressibleNavierStokesDiffusion3D{
+                                                                                                                                      GradientVariablesEntropy
+                                                                                                                                      })
+    return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4],
+                   flux_inner[5])
 end
diff --git a/src/equations/equations.jl b/src/equations/equations.jl
index 6640ee7cfc7..90b2cd62191 100644
--- a/src/equations/equations.jl
+++ b/src/equations/equations.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Retrieve number of variables from equation instance
 @inline nvariables(::AbstractEquations{NDIMS, NVARS}) where {NDIMS, NVARS} = NVARS
@@ -42,45 +42,42 @@ Common choices of the `conversion_function` are [`cons2cons`](@ref) and
 """
 function varnames end
 
-
 # Add methods to show some information on systems of equations.
 function Base.show(io::IO, equations::AbstractEquations)
-  # Since this is not performance-critical, we can use `@nospecialize` to reduce latency.
-  @nospecialize equations # reduce precompilation time
-
-  print(io, get_name(equations), " with ")
-  if nvariables(equations) == 1
-    print(io, "one variable")
-  else
-    print(io, nvariables(equations), " variables")
-  end
+    # Since this is not performance-critical, we can use `@nospecialize` to reduce latency.
+    @nospecialize equations # reduce precompilation time
+
+    print(io, get_name(equations), " with ")
+    if nvariables(equations) == 1
+        print(io, "one variable")
+    else
+        print(io, nvariables(equations), " variables")
+    end
 end
 
 function Base.show(io::IO, ::MIME"text/plain", equations::AbstractEquations)
-  # Since this is not performance-critical, we can use `@nospecialize` to reduce latency.
-  @nospecialize equations # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, equations)
-  else
-    summary_header(io, get_name(equations))
-    summary_line(io, "#variables", nvariables(equations))
-    for variable in eachvariable(equations)
-      summary_line(increment_indent(io),
-                   "variable " * string(variable),
-                   varnames(cons2cons, equations)[variable])
+    # Since this is not performance-critical, we can use `@nospecialize` to reduce latency.
+    @nospecialize equations # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, equations)
+    else
+        summary_header(io, get_name(equations))
+        summary_line(io, "#variables", nvariables(equations))
+        for variable in eachvariable(equations)
+            summary_line(increment_indent(io),
+                         "variable " * string(variable),
+                         varnames(cons2cons, equations)[variable])
+        end
+        summary_footer(io)
     end
-    summary_footer(io)
-  end
 end
 
-
-@inline Base.ndims(::AbstractEquations{NDIMS}) where NDIMS = NDIMS
+@inline Base.ndims(::AbstractEquations{NDIMS}) where {NDIMS} = NDIMS
 
 # equations act like scalars in broadcasting
 Base.broadcastable(equations::AbstractEquations) = Ref(equations)
 
-
 """
     flux(u, orientation_or_normal, equations)
 
@@ -97,13 +94,13 @@ function flux end
 Enables calling `flux` with a non-integer argument `normal_direction` for one-dimensional
 equations. Returns the value of `flux(u, 1, equations)` scaled by `normal_direction[1]`.
 """
-@inline function flux(u, normal_direction::AbstractVector, equations::AbstractEquations{1})
-  # Call `flux` with `orientation::Int = 1` for dispatch. Note that the actual
-  # `orientation` argument is ignored.
-  return normal_direction[1] * flux(u, 1, equations)
+@inline function flux(u, normal_direction::AbstractVector,
+                      equations::AbstractEquations{1})
+    # Call `flux` with `orientation::Int = 1` for dispatch. Note that the actual
+    # `orientation` argument is ignored.
+    return normal_direction[1] * flux(u, 1, equations)
 end
 
-
 """
     rotate_to_x(u, normal, equations)
 
@@ -126,7 +123,6 @@ See also: [`rotate_to_x`](@ref)
 """
 function rotate_from_x end
 
-
 """
     BoundaryConditionDirichlet(boundary_value_function)
 
@@ -146,24 +142,28 @@ julia> BoundaryConditionDirichlet(initial_condition_convergence_test)
 ```
 """
 struct BoundaryConditionDirichlet{B}
-  boundary_value_function::B
+    boundary_value_function::B
 end
 
 # Dirichlet-type boundary condition for use with TreeMesh or StructuredMesh
-@inline function (boundary_condition::BoundaryConditionDirichlet)(u_inner, orientation_or_normal,
+@inline function (boundary_condition::BoundaryConditionDirichlet)(u_inner,
+                                                                  orientation_or_normal,
                                                                   direction,
                                                                   x, t,
-                                                                  surface_flux_function, equations)
-  u_boundary = boundary_condition.boundary_value_function(x, t, equations)
-
-  # Calculate boundary flux
-  if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    flux = surface_flux_function(u_inner, u_boundary, orientation_or_normal, equations)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    flux = surface_flux_function(u_boundary, u_inner, orientation_or_normal, equations)
-  end
+                                                                  surface_flux_function,
+                                                                  equations)
+    u_boundary = boundary_condition.boundary_value_function(x, t, equations)
+
+    # Calculate boundary flux
+    if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation_or_normal,
+                                     equations)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation_or_normal,
+                                     equations)
+    end
 
-  return flux
+    return flux
 end
 
 # Dirichlet-type boundary condition for use with UnstructuredMesh2D
@@ -173,13 +173,13 @@ end
                                                                   x, t,
                                                                   surface_flux_function,
                                                                   equations)
-  # get the external value of the solution
-  u_boundary = boundary_condition.boundary_value_function(x, t, equations)
+    # get the external value of the solution
+    u_boundary = boundary_condition.boundary_value_function(x, t, equations)
 
-  # Calculate boundary flux
-  flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations)
+    # Calculate boundary flux
+    flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations)
 
-  return flux
+    return flux
 end
 
 # operator types used for dispatch on parabolic boundary fluxes
@@ -199,7 +199,7 @@ boundary_normal_flux_function(x, t, equations)
 where `x` specifies the coordinates, `t` is the current time, and `equation` is the corresponding system of equations.
 """
 struct BoundaryConditionNeumann{B}
-  boundary_normal_flux_function::B
+    boundary_normal_flux_function::B
 end
 
 # set sensible default values that may be overwritten by specific equations
@@ -216,14 +216,13 @@ The return value will be `True()` or `False()` to allow dispatching on the retur
 have_nonconservative_terms(::AbstractEquations) = False()
 have_constant_speed(::AbstractEquations) = False()
 
-default_analysis_errors(::AbstractEquations)     = (:l2_error, :linf_error)
+default_analysis_errors(::AbstractEquations) = (:l2_error, :linf_error)
 """
     default_analysis_integrals(equations)
 
 Default analysis integrals used by the [`AnalysisCallback`](@ref).
 """
-default_analysis_integrals(::AbstractEquations)  = (entropy_timederivative,)
-
+default_analysis_integrals(::AbstractEquations) = (entropy_timederivative,)
 
 """
     cons2cons(u, equations)
@@ -333,35 +332,48 @@ function energy_internal end
 include("numerical_fluxes.jl")
 
 # Linear scalar advection
-abstract type AbstractLinearScalarAdvectionEquation{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end
+abstract type AbstractLinearScalarAdvectionEquation{NDIMS, NVARS} <:
+              AbstractEquations{NDIMS, NVARS} end
 include("linear_scalar_advection_1d.jl")
 include("linear_scalar_advection_2d.jl")
 include("linear_scalar_advection_3d.jl")
 
 # Inviscid Burgers
-abstract type AbstractInviscidBurgersEquation{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end
+abstract type AbstractInviscidBurgersEquation{NDIMS, NVARS} <:
+              AbstractEquations{NDIMS, NVARS} end
 include("inviscid_burgers_1d.jl")
 
 # Shallow water equations
-abstract type AbstractShallowWaterEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end
+abstract type AbstractShallowWaterEquations{NDIMS, NVARS} <:
+              AbstractEquations{NDIMS, NVARS} end
 include("shallow_water_1d.jl")
 include("shallow_water_2d.jl")
 include("shallow_water_two_layer_1d.jl")
 include("shallow_water_two_layer_2d.jl")
 
 # CompressibleEulerEquations
-abstract type AbstractCompressibleEulerEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end
+abstract type AbstractCompressibleEulerEquations{NDIMS, NVARS} <:
+              AbstractEquations{NDIMS, NVARS} end
 include("compressible_euler_1d.jl")
 include("compressible_euler_2d.jl")
 include("compressible_euler_3d.jl")
 
 # CompressibleEulerMulticomponentEquations
-abstract type AbstractCompressibleEulerMulticomponentEquations{NDIMS, NVARS, NCOMP} <: AbstractEquations{NDIMS, NVARS} end
+abstract type AbstractCompressibleEulerMulticomponentEquations{NDIMS, NVARS, NCOMP} <:
+              AbstractEquations{NDIMS, NVARS} end
 include("compressible_euler_multicomponent_1d.jl")
 include("compressible_euler_multicomponent_2d.jl")
 
 # Retrieve number of components from equation instance for the multicomponent case
-@inline ncomponents(::AbstractCompressibleEulerMulticomponentEquations{NDIMS, NVARS, NCOMP}) where {NDIMS, NVARS, NCOMP} = NCOMP
+@inline function ncomponents(::AbstractCompressibleEulerMulticomponentEquations{NDIMS,
+                                                                                NVARS,
+                                                                                NCOMP}) where {
+                                                                                               NDIMS,
+                                                                                               NVARS,
+                                                                                               NCOMP
+                                                                                               }
+    NCOMP
+end
 """
     eachcomponent(equations::AbstractCompressibleEulerMulticomponentEquations)
 
@@ -369,21 +381,32 @@ Return an iterator over the indices that specify the location in relevant data s
 for the components in `AbstractCompressibleEulerMulticomponentEquations`.
 In particular, not the components themselves are returned.
 """
-@inline eachcomponent(equations::AbstractCompressibleEulerMulticomponentEquations) = Base.OneTo(ncomponents(equations))
+@inline function eachcomponent(equations::AbstractCompressibleEulerMulticomponentEquations)
+    Base.OneTo(ncomponents(equations))
+end
 
 # Ideal MHD
-abstract type AbstractIdealGlmMhdEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end
+abstract type AbstractIdealGlmMhdEquations{NDIMS, NVARS} <:
+              AbstractEquations{NDIMS, NVARS} end
 include("ideal_glm_mhd_1d.jl")
 include("ideal_glm_mhd_2d.jl")
 include("ideal_glm_mhd_3d.jl")
 
 # IdealGlmMhdMulticomponentEquations
-abstract type AbstractIdealGlmMhdMulticomponentEquations{NDIMS, NVARS, NCOMP} <: AbstractEquations{NDIMS, NVARS} end
+abstract type AbstractIdealGlmMhdMulticomponentEquations{NDIMS, NVARS, NCOMP} <:
+              AbstractEquations{NDIMS, NVARS} end
 include("ideal_glm_mhd_multicomponent_1d.jl")
 include("ideal_glm_mhd_multicomponent_2d.jl")
 
 # Retrieve number of components from equation instance for the multicomponent case
-@inline ncomponents(::AbstractIdealGlmMhdMulticomponentEquations{NDIMS, NVARS, NCOMP}) where {NDIMS, NVARS, NCOMP} = NCOMP
+@inline function ncomponents(::AbstractIdealGlmMhdMulticomponentEquations{NDIMS, NVARS,
+                                                                          NCOMP}) where {
+                                                                                         NDIMS,
+                                                                                         NVARS,
+                                                                                         NCOMP
+                                                                                         }
+    NCOMP
+end
 """
     eachcomponent(equations::AbstractIdealGlmMhdMulticomponentEquations)
 
@@ -391,27 +414,33 @@ Return an iterator over the indices that specify the location in relevant data s
 for the components in `AbstractIdealGlmMhdMulticomponentEquations`.
 In particular, not the components themselves are returned.
 """
-@inline eachcomponent(equations::AbstractIdealGlmMhdMulticomponentEquations) = Base.OneTo(ncomponents(equations))
+@inline function eachcomponent(equations::AbstractIdealGlmMhdMulticomponentEquations)
+    Base.OneTo(ncomponents(equations))
+end
 
 # Diffusion equation: first order hyperbolic system
-abstract type AbstractHyperbolicDiffusionEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end
+abstract type AbstractHyperbolicDiffusionEquations{NDIMS, NVARS} <:
+              AbstractEquations{NDIMS, NVARS} end
 include("hyperbolic_diffusion_1d.jl")
 include("hyperbolic_diffusion_2d.jl")
 include("hyperbolic_diffusion_3d.jl")
 
 # Lattice-Boltzmann equation (advection part only)
-abstract type AbstractLatticeBoltzmannEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end
+abstract type AbstractLatticeBoltzmannEquations{NDIMS, NVARS} <:
+              AbstractEquations{NDIMS, NVARS} end
 include("lattice_boltzmann_2d.jl")
 include("lattice_boltzmann_3d.jl")
 
 # Acoustic perturbation equations
-abstract type AbstractAcousticPerturbationEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end
+abstract type AbstractAcousticPerturbationEquations{NDIMS, NVARS} <:
+              AbstractEquations{NDIMS, NVARS} end
 include("acoustic_perturbation_2d.jl")
 
 # Linearized Euler equations
-abstract type AbstractLinearizedEulerEquations{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end
+abstract type AbstractLinearizedEulerEquations{NDIMS, NVARS} <:
+              AbstractEquations{NDIMS, NVARS} end
 include("linearized_euler_2d.jl")
 
-abstract type AbstractEquationsParabolic{NDIMS, NVARS} <: AbstractEquations{NDIMS, NVARS} end
-
+abstract type AbstractEquationsParabolic{NDIMS, NVARS} <:
+              AbstractEquations{NDIMS, NVARS} end
 end # @muladd
diff --git a/src/equations/equations_parabolic.jl b/src/equations/equations_parabolic.jl
index 76c6eedc33c..6c0be43798a 100644
--- a/src/equations/equations_parabolic.jl
+++ b/src/equations/equations_parabolic.jl
@@ -3,11 +3,13 @@
 gradient_variable_transformation(::AbstractEquationsParabolic) = cons2cons
 
 # Linear scalar diffusion for use in linear scalar advection-diffusion problems
-abstract type AbstractLaplaceDiffusion{NDIMS, NVARS} <: AbstractEquationsParabolic{NDIMS, NVARS} end
+abstract type AbstractLaplaceDiffusion{NDIMS, NVARS} <:
+              AbstractEquationsParabolic{NDIMS, NVARS} end
 include("laplace_diffusion_1d.jl")
 include("laplace_diffusion_2d.jl")
 
 # Compressible Navier-Stokes equations
-abstract type AbstractCompressibleNavierStokesDiffusion{NDIMS, NVARS} <: AbstractEquationsParabolic{NDIMS, NVARS} end
+abstract type AbstractCompressibleNavierStokesDiffusion{NDIMS, NVARS} <:
+              AbstractEquationsParabolic{NDIMS, NVARS} end
 include("compressible_navier_stokes_2d.jl")
 include("compressible_navier_stokes_3d.jl")
diff --git a/src/equations/hyperbolic_diffusion_1d.jl b/src/equations/hyperbolic_diffusion_1d.jl
index 0e599417551..39a555e7c72 100644
--- a/src/equations/hyperbolic_diffusion_1d.jl
+++ b/src/equations/hyperbolic_diffusion_1d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     HyperbolicDiffusionEquations1D
@@ -19,24 +19,26 @@ Further analysis can be found in the paper
   schemes
   [DOI: 10.1016/j.jcp.2007.07.029](https://doi.org/10.1016/j.jcp.2007.07.029)
 """
-struct HyperbolicDiffusionEquations1D{RealT<:Real} <: AbstractHyperbolicDiffusionEquations{1, 2}
-  Lr::RealT     # reference length scale
-  inv_Tr::RealT # inverse of the reference time scale
-  nu::RealT     # diffusion constant
+struct HyperbolicDiffusionEquations1D{RealT <: Real} <:
+       AbstractHyperbolicDiffusionEquations{1, 2}
+    Lr::RealT     # reference length scale
+    inv_Tr::RealT # inverse of the reference time scale
+    nu::RealT     # diffusion constant
 end
 
-function HyperbolicDiffusionEquations1D(; nu=1.0, Lr=inv(2pi))
-  Tr = Lr^2 / nu
-  HyperbolicDiffusionEquations1D(promote(Lr, inv(Tr), nu)...)
+function HyperbolicDiffusionEquations1D(; nu = 1.0, Lr = inv(2pi))
+    Tr = Lr^2 / nu
+    HyperbolicDiffusionEquations1D(promote(Lr, inv(Tr), nu)...)
 end
 
-
 varnames(::typeof(cons2cons), ::HyperbolicDiffusionEquations1D) = ("phi", "q1")
 varnames(::typeof(cons2prim), ::HyperbolicDiffusionEquations1D) = ("phi", "q1")
-default_analysis_errors(::HyperbolicDiffusionEquations1D) = (:l2_error, :linf_error, :residual)
+function default_analysis_errors(::HyperbolicDiffusionEquations1D)
+    (:l2_error, :linf_error, :residual)
+end
 
 @inline function residual_steady_state(du, ::HyperbolicDiffusionEquations1D)
-  abs(du[1])
+    abs(du[1])
 end
 
 """
@@ -47,18 +49,19 @@ A non-priodic smooth initial condition. Can be used for convergence tests in com
 !!! note
     The solution is periodic but the initial guess is not.
 """
-function initial_condition_poisson_nonperiodic(x, t, equations::HyperbolicDiffusionEquations1D)
-  # elliptic equation: -νΔϕ = f
-  # Taken from Section 6.1 of Nishikawa https://doi.org/10.1016/j.jcp.2007.07.029
-  if t == 0.0
-    # initial "guess" of the solution and its derivative
-    phi = x[1]^2 - x[1]
-    q1  = 2*x[1] - 1
-  else
-    phi = sinpi(x[1])      # ϕ
-    q1  = pi * cospi(x[1]) # ϕ_x
-  end
-  return SVector(phi, q1)
+function initial_condition_poisson_nonperiodic(x, t,
+                                               equations::HyperbolicDiffusionEquations1D)
+    # elliptic equation: -νΔϕ = f
+    # Taken from Section 6.1 of Nishikawa https://doi.org/10.1016/j.jcp.2007.07.029
+    if t == 0.0
+        # initial "guess" of the solution and its derivative
+        phi = x[1]^2 - x[1]
+        q1 = 2 * x[1] - 1
+    else
+        phi = sinpi(x[1])      # ϕ
+        q1 = pi * cospi(x[1]) # ϕ_x
+    end
+    return SVector(phi, q1)
 end
 
 """
@@ -71,14 +74,14 @@ diffusion system that is used with [`initial_condition_poisson_nonperiodic`](@re
 """
 @inline function source_terms_poisson_nonperiodic(u, x, t,
                                                   equations::HyperbolicDiffusionEquations1D)
-  # elliptic equation: -νΔϕ = f
-  # analytical solution: ϕ = sin(πx) and f = π^2sin(πx)
-  @unpack inv_Tr = equations
+    # elliptic equation: -νΔϕ = f
+    # analytical solution: ϕ = sin(πx) and f = π^2sin(πx)
+    @unpack inv_Tr = equations
 
-  dphi = pi^2 * sinpi(x[1])
-  dq1  = -inv_Tr * u[2]
+    dphi = pi^2 * sinpi(x[1])
+    dq1 = -inv_Tr * u[2]
 
-  return SVector(dphi, dq1)
+    return SVector(dphi, dq1)
 end
 
 """
@@ -92,37 +95,36 @@ Boundary conditions used for convergence tests in combination with
 function boundary_condition_poisson_nonperiodic(u_inner, orientation, direction, x, t,
                                                 surface_flux_function,
                                                 equations::HyperbolicDiffusionEquations1D)
-  # elliptic equation: -νΔϕ = f
-  phi = sinpi(x[1])      # ϕ
-  q1  = pi * cospi(x[1]) # ϕ_x
-  u_boundary = SVector(phi, q1)
-
-  # Calculate boundary flux
-  if direction == 2 # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    flux = surface_flux_function(u_boundary, u_inner, orientation, equations)
-  end
-
-  return flux
-end
+    # elliptic equation: -νΔϕ = f
+    phi = sinpi(x[1])      # ϕ
+    q1 = pi * cospi(x[1]) # ϕ_x
+    u_boundary = SVector(phi, q1)
+
+    # Calculate boundary flux
+    if direction == 2 # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation, equations)
+    end
 
+    return flux
+end
 
 """
     source_terms_harmonic(u, x, t, equations::HyperbolicDiffusionEquations1D)
 
 Source term that only includes the forcing from the hyperbolic diffusion system.
 """
-@inline function source_terms_harmonic(u, x, t, equations::HyperbolicDiffusionEquations1D)
-  # harmonic solution of the form ϕ = A + B * x, so f = 0
-  @unpack inv_Tr = equations
+@inline function source_terms_harmonic(u, x, t,
+                                       equations::HyperbolicDiffusionEquations1D)
+    # harmonic solution of the form ϕ = A + B * x, so f = 0
+    @unpack inv_Tr = equations
 
-  dq1 = -inv_Tr * u[2]
+    dq1 = -inv_Tr * u[2]
 
-  return SVector(zero(dq1), dq1)
+    return SVector(zero(dq1), dq1)
 end
 
-
 """
     initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::HyperbolicDiffusionEquations1D)
 
@@ -132,71 +134,68 @@ Setup used for convergence tests of the Euler equations with self-gravity used i
   [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593)
 in combination with [`source_terms_harmonic`](@ref).
 """
-function initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::HyperbolicDiffusionEquations1D)
-
-  # Determine phi_x
-  G = 1.0           # gravitational constant
-  C = -4.0 * G / pi # -4 * G / ndims * pi
-  A = 0.1           # perturbation coefficient must match Euler setup
-  rho1 = A * sinpi(x[1] - t)
-  # initialize with ansatz of gravity potential
-  phi = C * rho1
-  q1  = C * A * pi * cospi(x[1] - t) # = gravity acceleration in x-direction
-
-  return SVector(phi, q1)
+function initial_condition_eoc_test_coupled_euler_gravity(x, t,
+                                                          equations::HyperbolicDiffusionEquations1D)
+
+    # Determine phi_x
+    G = 1.0           # gravitational constant
+    C = -4.0 * G / pi # -4 * G / ndims * pi
+    A = 0.1           # perturbation coefficient must match Euler setup
+    rho1 = A * sinpi(x[1] - t)
+    # initialize with ansatz of gravity potential
+    phi = C * rho1
+    q1 = C * A * pi * cospi(x[1] - t) # = gravity acceleration in x-direction
+
+    return SVector(phi, q1)
 end
 
-
 # Calculate 1D flux in for a single point
-@inline function flux(u, orientation::Integer, equations::HyperbolicDiffusionEquations1D)
-  phi, q1 = u
-  @unpack inv_Tr = equations
+@inline function flux(u, orientation::Integer,
+                      equations::HyperbolicDiffusionEquations1D)
+    phi, q1 = u
+    @unpack inv_Tr = equations
 
-  # Ignore orientation since it is always "1" in 1D
-  f1 = -equations.nu * q1
-  f2 = -phi * inv_Tr
+    # Ignore orientation since it is always "1" in 1D
+    f1 = -equations.nu * q1
+    f2 = -phi * inv_Tr
 
-  return SVector(f1, f2)
+    return SVector(f1, f2)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::HyperbolicDiffusionEquations1D)
-  λ_max = sqrt(equations.nu * equations.inv_Tr)
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::HyperbolicDiffusionEquations1D)
+    λ_max = sqrt(equations.nu * equations.inv_Tr)
 end
 
-
 @inline have_constant_speed(::HyperbolicDiffusionEquations1D) = True()
 
 @inline function max_abs_speeds(eq::HyperbolicDiffusionEquations1D)
-  return sqrt(eq.nu * eq.inv_Tr)
+    return sqrt(eq.nu * eq.inv_Tr)
 end
 
-
 # Convert conservative variables to primitive
 @inline cons2prim(u, equations::HyperbolicDiffusionEquations1D) = u
 
 # Convert conservative variables to entropy found in I Do Like CFD, Too, Vol. 1
 @inline function cons2entropy(u, equations::HyperbolicDiffusionEquations1D)
-  phi, q1 = u
+    phi, q1 = u
 
-  w1 = phi
-  w2 = equations.Lr^2 * q1
+    w1 = phi
+    w2 = equations.Lr^2 * q1
 
-  return SVector(w1, w2)
+    return SVector(w1, w2)
 end
 
-
 # Calculate entropy for a conservative state `u` (here: same as total energy)
-@inline entropy(u, equations::HyperbolicDiffusionEquations1D) = energy_total(u, equations)
-
+@inline function entropy(u, equations::HyperbolicDiffusionEquations1D)
+    energy_total(u, equations)
+end
 
 # Calculate total energy for a conservative state `u`
 @inline function energy_total(u, equations::HyperbolicDiffusionEquations1D)
-  # energy function as found in equations (2.5.12) in the book "I Do Like CFD, Vol. 1"
-  phi, q1 = u
-  return 0.5 * (phi^2 + equations.Lr^2 * q1^2)
+    # energy function as found in equations (2.5.12) in the book "I Do Like CFD, Vol. 1"
+    phi, q1 = u
+    return 0.5 * (phi^2 + equations.Lr^2 * q1^2)
 end
-
-
 end # @muladd
diff --git a/src/equations/hyperbolic_diffusion_2d.jl b/src/equations/hyperbolic_diffusion_2d.jl
index 0f24949faad..25536a060f8 100644
--- a/src/equations/hyperbolic_diffusion_2d.jl
+++ b/src/equations/hyperbolic_diffusion_2d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     HyperbolicDiffusionEquations2D
@@ -13,92 +13,95 @@ A description of this system can be found in Sec. 2.5 of the book "I Do Like CFD
 The book is freely available at http://www.cfdbooks.com/ and further analysis can be found in
 the paper by Nishikawa [DOI: 10.1016/j.jcp.2007.07.029](https://doi.org/10.1016/j.jcp.2007.07.029)
 """
-struct HyperbolicDiffusionEquations2D{RealT<:Real} <: AbstractHyperbolicDiffusionEquations{2, 3}
-  Lr::RealT     # reference length scale
-  inv_Tr::RealT # inverse of the reference time scale
-  nu::RealT     # diffusion constant
+struct HyperbolicDiffusionEquations2D{RealT <: Real} <:
+       AbstractHyperbolicDiffusionEquations{2, 3}
+    Lr::RealT     # reference length scale
+    inv_Tr::RealT # inverse of the reference time scale
+    nu::RealT     # diffusion constant
 end
 
-function HyperbolicDiffusionEquations2D(; nu=1.0, Lr=inv(2pi))
-  Tr = Lr^2 / nu
-  HyperbolicDiffusionEquations2D(promote(Lr, inv(Tr), nu)...)
+function HyperbolicDiffusionEquations2D(; nu = 1.0, Lr = inv(2pi))
+    Tr = Lr^2 / nu
+    HyperbolicDiffusionEquations2D(promote(Lr, inv(Tr), nu)...)
 end
 
-
 varnames(::typeof(cons2cons), ::HyperbolicDiffusionEquations2D) = ("phi", "q1", "q2")
 varnames(::typeof(cons2prim), ::HyperbolicDiffusionEquations2D) = ("phi", "q1", "q2")
-default_analysis_errors(::HyperbolicDiffusionEquations2D)     = (:l2_error, :linf_error, :residual)
+function default_analysis_errors(::HyperbolicDiffusionEquations2D)
+    (:l2_error, :linf_error, :residual)
+end
 
 @inline function residual_steady_state(du, ::HyperbolicDiffusionEquations2D)
-  abs(du[1])
+    abs(du[1])
 end
 
-
 # Set initial conditions at physical location `x` for pseudo-time `t`
-@inline function initial_condition_poisson_nonperiodic(x, t, equations::HyperbolicDiffusionEquations2D)
-  # elliptic equation: -ν Δϕ = f in Ω, u = g on ∂Ω
-  if iszero(t)
-    T = eltype(x)
-    phi = one(T)
-    q1  = one(T)
-    q2  = one(T)
-  else
-    sinpi_x1,  cospi_x1  = sincos(pi*x[1])
-    sinpi_2x2, cospi_2x2 = sincos(pi*2*x[2])
-    phi =  2 *      cospi_x1 * sinpi_2x2 + 2 # ϕ
-    q1  = -2 * pi * sinpi_x1 * sinpi_2x2     # ϕ_x
-    q2  =  4 * pi * cospi_x1 * cospi_2x2     # ϕ_y
-  end
-  return SVector(phi, q1, q2)
-end
-
-@inline function source_terms_poisson_nonperiodic(u, x, t, equations::HyperbolicDiffusionEquations2D)
-  # elliptic equation: -ν Δϕ = f in Ω, u = g on ∂Ω
-  # analytical solution: ϕ = 2cos(πx)sin(2πy) + 2 and f = 10π^2cos(πx)sin(2πy)
-  @unpack inv_Tr = equations
-
-  x1, x2 = x
-  du1 = 10 * pi^2 * cospi(x1) * sinpi(2 * x2)
-  du2 = -inv_Tr * u[2]
-  du3 = -inv_Tr * u[3]
-
-  return SVector(du1, du2, du3)
-end
-
-@inline function boundary_condition_poisson_nonperiodic(u_inner, orientation, direction, x, t,
+@inline function initial_condition_poisson_nonperiodic(x, t,
+                                                       equations::HyperbolicDiffusionEquations2D)
+    # elliptic equation: -ν Δϕ = f in Ω, u = g on ∂Ω
+    if iszero(t)
+        T = eltype(x)
+        phi = one(T)
+        q1 = one(T)
+        q2 = one(T)
+    else
+        sinpi_x1, cospi_x1 = sincos(pi * x[1])
+        sinpi_2x2, cospi_2x2 = sincos(pi * 2 * x[2])
+        phi = 2 * cospi_x1 * sinpi_2x2 + 2 # ϕ
+        q1 = -2 * pi * sinpi_x1 * sinpi_2x2     # ϕ_x
+        q2 = 4 * pi * cospi_x1 * cospi_2x2     # ϕ_y
+    end
+    return SVector(phi, q1, q2)
+end
+
+@inline function source_terms_poisson_nonperiodic(u, x, t,
+                                                  equations::HyperbolicDiffusionEquations2D)
+    # elliptic equation: -ν Δϕ = f in Ω, u = g on ∂Ω
+    # analytical solution: ϕ = 2cos(πx)sin(2πy) + 2 and f = 10π^2cos(πx)sin(2πy)
+    @unpack inv_Tr = equations
+
+    x1, x2 = x
+    du1 = 10 * pi^2 * cospi(x1) * sinpi(2 * x2)
+    du2 = -inv_Tr * u[2]
+    du3 = -inv_Tr * u[3]
+
+    return SVector(du1, du2, du3)
+end
+
+@inline function boundary_condition_poisson_nonperiodic(u_inner, orientation, direction,
+                                                        x, t,
                                                         surface_flux_function,
                                                         equations::HyperbolicDiffusionEquations2D)
-  # elliptic equation: -ν Δϕ = f in Ω, u = g on ∂Ω
-  u_boundary = initial_condition_poisson_nonperiodic(x, one(t), equations)
+    # elliptic equation: -ν Δϕ = f in Ω, u = g on ∂Ω
+    u_boundary = initial_condition_poisson_nonperiodic(x, one(t), equations)
 
-  # Calculate boundary flux
-  if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    flux = surface_flux_function(u_boundary, u_inner, orientation, equations)
-  end
+    # Calculate boundary flux
+    if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation, equations)
+    end
 
-  return flux
+    return flux
 end
 
-
 """
     source_terms_harmonic(u, x, t, equations::HyperbolicDiffusionEquations2D)
 
 Source term that only includes the forcing from the hyperbolic diffusion system.
 """
-@inline function source_terms_harmonic(u, x, t, equations::HyperbolicDiffusionEquations2D)
-  # harmonic solution ϕ = (sinh(πx)sin(πy) + sinh(πy)sin(πx))/sinh(π), so f = 0
-  @unpack inv_Tr = equations
-  phi, q1, q2 = u
+@inline function source_terms_harmonic(u, x, t,
+                                       equations::HyperbolicDiffusionEquations2D)
+    # harmonic solution ϕ = (sinh(πx)sin(πy) + sinh(πy)sin(πx))/sinh(π), so f = 0
+    @unpack inv_Tr = equations
+    phi, q1, q2 = u
 
-  du2 = -inv_Tr * q1
-  du3 = -inv_Tr * q2
+    du2 = -inv_Tr * q1
+    du3 = -inv_Tr * q2
 
-  return SVector(0, du2, du3)
+    return SVector(0, du2, du3)
 end
 
-
 """
     initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::HyperbolicDiffusionEquations2D)
 
@@ -108,136 +111,139 @@ Setup used for convergence tests of the Euler equations with self-gravity used i
   [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593)
 in combination with [`source_terms_harmonic`](@ref).
 """
-function initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::HyperbolicDiffusionEquations2D)
+function initial_condition_eoc_test_coupled_euler_gravity(x, t,
+                                                          equations::HyperbolicDiffusionEquations2D)
 
-  # Determine phi_x, phi_y
-  G = 1.0 # gravitational constant
-  C = -2.0*G/pi
-  A = 0.1 # perturbation coefficient must match Euler setup
-  rho1 = A * sin(pi * (x[1] + x[2] - t))
-  # initialize with ansatz of gravity potential
-  phi = C * rho1
-  q1  = C * A * pi * cos(pi*(x[1] + x[2] - t)) # = gravity acceleration in x-direction
-  q2  = q1                                     # = gravity acceleration in y-direction
+    # Determine phi_x, phi_y
+    G = 1.0 # gravitational constant
+    C = -2.0 * G / pi
+    A = 0.1 # perturbation coefficient must match Euler setup
+    rho1 = A * sin(pi * (x[1] + x[2] - t))
+    # initialize with ansatz of gravity potential
+    phi = C * rho1
+    q1 = C * A * pi * cos(pi * (x[1] + x[2] - t)) # = gravity acceleration in x-direction
+    q2 = q1                                     # = gravity acceleration in y-direction
 
-  return SVector(phi, q1, q2)
+    return SVector(phi, q1, q2)
 end
 
-
 # Calculate 1D flux in for a single point
-@inline function flux(u, orientation::Integer, equations::HyperbolicDiffusionEquations2D)
-  phi, q1, q2 = u
-  @unpack inv_Tr = equations
+@inline function flux(u, orientation::Integer,
+                      equations::HyperbolicDiffusionEquations2D)
+    phi, q1, q2 = u
+    @unpack inv_Tr = equations
 
-  if orientation == 1
-    f1 = -equations.nu*q1
-    f2 = -phi * inv_Tr
-    f3 = zero(phi)
-  else
-    f1 = -equations.nu*q2
-    f2 = zero(phi)
-    f3 = -phi * inv_Tr
-  end
+    if orientation == 1
+        f1 = -equations.nu * q1
+        f2 = -phi * inv_Tr
+        f3 = zero(phi)
+    else
+        f1 = -equations.nu * q2
+        f2 = zero(phi)
+        f3 = -phi * inv_Tr
+    end
 
-  return SVector(f1, f2, f3)
+    return SVector(f1, f2, f3)
 end
 
 # Note, this directional vector is not normalized
-@inline function flux(u, normal_direction::AbstractVector, equations::HyperbolicDiffusionEquations2D)
-  phi, q1, q2 = u
-  @unpack inv_Tr = equations
+@inline function flux(u, normal_direction::AbstractVector,
+                      equations::HyperbolicDiffusionEquations2D)
+    phi, q1, q2 = u
+    @unpack inv_Tr = equations
 
-  f1 = -equations.nu * (normal_direction[1] * q1 + normal_direction[2] * q2)
-  f2 = -phi * inv_Tr * normal_direction[1]
-  f3 = -phi * inv_Tr * normal_direction[2]
+    f1 = -equations.nu * (normal_direction[1] * q1 + normal_direction[2] * q2)
+    f2 = -phi * inv_Tr * normal_direction[1]
+    f3 = -phi * inv_Tr * normal_direction[2]
 
-  return SVector(f1, f2, f3)
+    return SVector(f1, f2, f3)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::HyperbolicDiffusionEquations2D)
-  sqrt(equations.nu * equations.inv_Tr)
-end
-
-@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::HyperbolicDiffusionEquations2D)
-  sqrt(equations.nu * equations.inv_Tr) * norm(normal_direction)
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::HyperbolicDiffusionEquations2D)
+    sqrt(equations.nu * equations.inv_Tr)
+end
+
+@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::HyperbolicDiffusionEquations2D)
+    sqrt(equations.nu * equations.inv_Tr) * norm(normal_direction)
+end
+
+@inline function flux_godunov(u_ll, u_rr, orientation::Integer,
+                              equations::HyperbolicDiffusionEquations2D)
+    # Obtain left and right fluxes
+    phi_ll, q1_ll, q2_ll = u_ll
+    phi_rr, q1_rr, q2_rr = u_rr
+    f_ll = flux(u_ll, orientation, equations)
+    f_rr = flux(u_rr, orientation, equations)
+
+    # this is an optimized version of the application of the upwind dissipation matrix:
+    #   dissipation = 0.5*R_n*|Λ|*inv(R_n)[[u]]
+    λ_max = sqrt(equations.nu * equations.inv_Tr)
+    f1 = 1 / 2 * (f_ll[1] + f_rr[1]) - 1 / 2 * λ_max * (phi_rr - phi_ll)
+    if orientation == 1 # x-direction
+        f2 = 1 / 2 * (f_ll[2] + f_rr[2]) - 1 / 2 * λ_max * (q1_rr - q1_ll)
+        f3 = 1 / 2 * (f_ll[3] + f_rr[3])
+    else # y-direction
+        f2 = 1 / 2 * (f_ll[2] + f_rr[2])
+        f3 = 1 / 2 * (f_ll[3] + f_rr[3]) - 1 / 2 * λ_max * (q2_rr - q2_ll)
+    end
+
+    return SVector(f1, f2, f3)
+end
+
+@inline function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector,
+                              equations::HyperbolicDiffusionEquations2D)
+    # Obtain left and right fluxes
+    phi_ll, q1_ll, q2_ll = u_ll
+    phi_rr, q1_rr, q2_rr = u_rr
+    f_ll = flux(u_ll, normal_direction, equations)
+    f_rr = flux(u_rr, normal_direction, equations)
+
+    # this is an optimized version of the application of the upwind dissipation matrix:
+    #   dissipation = 0.5*R_n*|Λ|*inv(R_n)[[u]]
+    λ_max = sqrt(equations.nu * equations.inv_Tr)
+    f1 = 1 / 2 * (f_ll[1] + f_rr[1]) -
+         1 / 2 * λ_max * (phi_rr - phi_ll) *
+         sqrt(normal_direction[1]^2 + normal_direction[2]^2)
+    f2 = 1 / 2 * (f_ll[2] + f_rr[2]) -
+         1 / 2 * λ_max * (q1_rr - q1_ll) * normal_direction[1]
+    f3 = 1 / 2 * (f_ll[3] + f_rr[3]) -
+         1 / 2 * λ_max * (q2_rr - q2_ll) * normal_direction[2]
+
+    return SVector(f1, f2, f3)
 end
 
-
-@inline function flux_godunov(u_ll, u_rr, orientation::Integer, equations::HyperbolicDiffusionEquations2D)
-  # Obtain left and right fluxes
-  phi_ll, q1_ll, q2_ll = u_ll
-  phi_rr, q1_rr, q2_rr = u_rr
-  f_ll = flux(u_ll, orientation, equations)
-  f_rr = flux(u_rr, orientation, equations)
-
-  # this is an optimized version of the application of the upwind dissipation matrix:
-  #   dissipation = 0.5*R_n*|Λ|*inv(R_n)[[u]]
-  λ_max = sqrt(equations.nu * equations.inv_Tr)
-  f1 = 1/2 * (f_ll[1] + f_rr[1]) - 1/2 * λ_max * (phi_rr - phi_ll)
-  if orientation == 1 # x-direction
-    f2 = 1/2 * (f_ll[2] + f_rr[2]) - 1/2 * λ_max * (q1_rr - q1_ll)
-    f3 = 1/2 * (f_ll[3] + f_rr[3])
-  else # y-direction
-    f2 = 1/2 * (f_ll[2] + f_rr[2])
-    f3 = 1/2 * (f_ll[3] + f_rr[3]) - 1/2 * λ_max * (q2_rr - q2_ll)
-  end
-
-  return SVector(f1, f2, f3)
-end
-
-@inline function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector, equations::HyperbolicDiffusionEquations2D)
-  # Obtain left and right fluxes
-  phi_ll, q1_ll, q2_ll = u_ll
-  phi_rr, q1_rr, q2_rr = u_rr
-  f_ll = flux(u_ll, normal_direction, equations)
-  f_rr = flux(u_rr, normal_direction, equations)
-
-  # this is an optimized version of the application of the upwind dissipation matrix:
-  #   dissipation = 0.5*R_n*|Λ|*inv(R_n)[[u]]
-  λ_max = sqrt(equations.nu * equations.inv_Tr)
-  f1 = 1/2 * (f_ll[1] + f_rr[1]) - 1/2 * λ_max * (phi_rr - phi_ll) * sqrt(normal_direction[1]^2 + normal_direction[2]^2)
-  f2 = 1/2 * (f_ll[2] + f_rr[2]) - 1/2 * λ_max * (q1_rr - q1_ll) * normal_direction[1]
-  f3 = 1/2 * (f_ll[3] + f_rr[3]) - 1/2 * λ_max * (q2_rr - q2_ll) * normal_direction[2]
-
-  return SVector(f1, f2, f3)
-end
-
-
-
 @inline have_constant_speed(::HyperbolicDiffusionEquations2D) = True()
 
 @inline function max_abs_speeds(eq::HyperbolicDiffusionEquations2D)
-  λ = sqrt(eq.nu * eq.inv_Tr)
-  return λ, λ
+    λ = sqrt(eq.nu * eq.inv_Tr)
+    return λ, λ
 end
 
-
 # Convert conservative variables to primitive
 @inline cons2prim(u, equations::HyperbolicDiffusionEquations2D) = u
 
 # Convert conservative variables to entropy found in I Do Like CFD, Too, Vol. 1
 @inline function cons2entropy(u, equations::HyperbolicDiffusionEquations2D)
-  phi, q1, q2 = u
-  w1 = phi
-  w2 = equations.Lr^2 * q1
-  w3 = equations.Lr^2 * q2
+    phi, q1, q2 = u
+    w1 = phi
+    w2 = equations.Lr^2 * q1
+    w3 = equations.Lr^2 * q2
 
-  return SVector(w1, w2, w3)
+    return SVector(w1, w2, w3)
 end
 
-
 # Calculate entropy for a conservative state `u` (here: same as total energy)
-@inline entropy(u, equations::HyperbolicDiffusionEquations2D) = energy_total(u, equations)
-
+@inline function entropy(u, equations::HyperbolicDiffusionEquations2D)
+    energy_total(u, equations)
+end
 
 # Calculate total energy for a conservative state `u`
 @inline function energy_total(u, equations::HyperbolicDiffusionEquations2D)
-  # energy function as found in equations (2.5.12) in the book "I Do Like CFD, Vol. 1"
-  phi, q1, q2 = u
-  return 0.5 * (phi^2 + equations.Lr^2 * (q1^2 + q2^2))
+    # energy function as found in equations (2.5.12) in the book "I Do Like CFD, Vol. 1"
+    phi, q1, q2 = u
+    return 0.5 * (phi^2 + equations.Lr^2 * (q1^2 + q2^2))
 end
-
-
 end # @muladd
diff --git a/src/equations/hyperbolic_diffusion_3d.jl b/src/equations/hyperbolic_diffusion_3d.jl
index 2b4cfd95829..bf6a00140d4 100644
--- a/src/equations/hyperbolic_diffusion_3d.jl
+++ b/src/equations/hyperbolic_diffusion_3d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     HyperbolicDiffusionEquations3D
@@ -13,21 +13,27 @@ A description of this system can be found in Sec. 2.5 of the book "I Do Like CFD
 The book is freely available at http://www.cfdbooks.com/ and further analysis can be found in
 the paper by Nishikawa [DOI: 10.1016/j.jcp.2007.07.029](https://doi.org/10.1016/j.jcp.2007.07.029)
 """
-struct HyperbolicDiffusionEquations3D{RealT<:Real} <: AbstractHyperbolicDiffusionEquations{3, 4}
-  Lr::RealT     # reference length scale
-  inv_Tr::RealT # inverse of the reference time scale
-  nu::RealT     # diffusion constant
+struct HyperbolicDiffusionEquations3D{RealT <: Real} <:
+       AbstractHyperbolicDiffusionEquations{3, 4}
+    Lr::RealT     # reference length scale
+    inv_Tr::RealT # inverse of the reference time scale
+    nu::RealT     # diffusion constant
 end
 
-function HyperbolicDiffusionEquations3D(; nu=1.0, Lr=inv(2pi))
-  Tr = Lr^2 / nu
-  HyperbolicDiffusionEquations3D(promote(Lr, inv(Tr), nu)...)
+function HyperbolicDiffusionEquations3D(; nu = 1.0, Lr = inv(2pi))
+    Tr = Lr^2 / nu
+    HyperbolicDiffusionEquations3D(promote(Lr, inv(Tr), nu)...)
 end
 
-
-varnames(::typeof(cons2cons), ::HyperbolicDiffusionEquations3D) = ("phi", "q1", "q2", "q3")
-varnames(::typeof(cons2prim), ::HyperbolicDiffusionEquations3D) = ("phi", "q1", "q2", "q3")
-default_analysis_errors(::HyperbolicDiffusionEquations3D)     = (:l2_error, :linf_error, :residual)
+function varnames(::typeof(cons2cons), ::HyperbolicDiffusionEquations3D)
+    ("phi", "q1", "q2", "q3")
+end
+function varnames(::typeof(cons2prim), ::HyperbolicDiffusionEquations3D)
+    ("phi", "q1", "q2", "q3")
+end
+function default_analysis_errors(::HyperbolicDiffusionEquations3D)
+    (:l2_error, :linf_error, :residual)
+end
 
 """
     residual_steady_state(du, ::AbstractHyperbolicDiffusionEquations)
@@ -36,80 +42,80 @@ Used to determine the termination criterion of a [`SteadyStateCallback`](@ref).
 For hyperbolic diffusion, this checks convergence of the potential ``\\phi``.
 """
 @inline function residual_steady_state(du, ::HyperbolicDiffusionEquations3D)
-  abs(du[1])
+    abs(du[1])
 end
 
-
 # Set initial conditions at physical location `x` for pseudo-time `t`
-function initial_condition_poisson_nonperiodic(x, t, equations::HyperbolicDiffusionEquations3D)
-  # elliptic equation: -νΔϕ = f
-  if t == 0.0
-    phi = 1.0
-    q1  = 1.0
-    q2  = 1.0
-    q3  = 1.0
-  else
-    phi =  2.0 *      cos(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) + 2.0 # ϕ
-    q1  = -2.0 * pi * sin(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3])   # ϕ_x
-    q2  =  4.0 * pi * cos(pi * x[1]) * cos(2.0 * pi * x[2]) * sin(2.0 * pi * x[3])   # ϕ_y
-    q3  =  4.0 * pi * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * cos(2.0 * pi * x[3])   # ϕ_z
-  end
-  return SVector(phi, q1, q2, q3)
-end
-
-@inline function source_terms_poisson_nonperiodic(u, x, t, equations::HyperbolicDiffusionEquations3D)
-  # elliptic equation: -νΔϕ = f
-  # analytical solution: ϕ = 2 cos(πx)sin(2πy)sin(2πz) + 2 and f = 18 π^2 cos(πx)sin(2πy)sin(2πz)
-  @unpack inv_Tr = equations
-
-  x1, x2, x3 = x
-  du1 = 18 * pi^2 * cospi(x1) * sinpi(2 * x2) * sinpi(2 * x3)
-  du2 = -inv_Tr * u[2]
-  du3 = -inv_Tr * u[3]
-  du4 = -inv_Tr * u[4]
-
-  return SVector(du1, du2, du3, du4)
+function initial_condition_poisson_nonperiodic(x, t,
+                                               equations::HyperbolicDiffusionEquations3D)
+    # elliptic equation: -νΔϕ = f
+    if t == 0.0
+        phi = 1.0
+        q1 = 1.0
+        q2 = 1.0
+        q3 = 1.0
+    else
+        phi = 2.0 * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) + 2.0 # ϕ
+        q1 = -2.0 * pi * sin(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3])   # ϕ_x
+        q2 = 4.0 * pi * cos(pi * x[1]) * cos(2.0 * pi * x[2]) * sin(2.0 * pi * x[3])   # ϕ_y
+        q3 = 4.0 * pi * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * cos(2.0 * pi * x[3])   # ϕ_z
+    end
+    return SVector(phi, q1, q2, q3)
 end
 
-function boundary_condition_poisson_nonperiodic(u_inner, orientation, direction, x, t,
-                                                 surface_flux_function,
-                                                 equations::HyperbolicDiffusionEquations3D)
-  # elliptic equation: -νΔϕ = f
-  phi =  2.0 *      cos(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) + 2.0 # ϕ
-  q1  = -2.0 * pi * sin(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3])   # ϕ_x
-  q2  =  4.0 * pi * cos(pi * x[1]) * cos(2.0 * pi * x[2]) * sin(2.0 * pi * x[3])   # ϕ_y
-  q3  =  4.0 * pi * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * cos(2.0 * pi * x[3])   # ϕ_z
-  u_boundary = SVector(phi, q1, q2, q3)
+@inline function source_terms_poisson_nonperiodic(u, x, t,
+                                                  equations::HyperbolicDiffusionEquations3D)
+    # elliptic equation: -νΔϕ = f
+    # analytical solution: ϕ = 2 cos(πx)sin(2πy)sin(2πz) + 2 and f = 18 π^2 cos(πx)sin(2πy)sin(2πz)
+    @unpack inv_Tr = equations
 
-  # Calculate boundary flux
-  if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    flux = surface_flux_function(u_boundary, u_inner, orientation, equations)
-  end
+    x1, x2, x3 = x
+    du1 = 18 * pi^2 * cospi(x1) * sinpi(2 * x2) * sinpi(2 * x3)
+    du2 = -inv_Tr * u[2]
+    du3 = -inv_Tr * u[3]
+    du4 = -inv_Tr * u[4]
 
-  return flux
+    return SVector(du1, du2, du3, du4)
 end
 
+function boundary_condition_poisson_nonperiodic(u_inner, orientation, direction, x, t,
+                                                surface_flux_function,
+                                                equations::HyperbolicDiffusionEquations3D)
+    # elliptic equation: -νΔϕ = f
+    phi = 2.0 * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3]) + 2.0 # ϕ
+    q1 = -2.0 * pi * sin(pi * x[1]) * sin(2.0 * pi * x[2]) * sin(2.0 * pi * x[3])   # ϕ_x
+    q2 = 4.0 * pi * cos(pi * x[1]) * cos(2.0 * pi * x[2]) * sin(2.0 * pi * x[3])   # ϕ_y
+    q3 = 4.0 * pi * cos(pi * x[1]) * sin(2.0 * pi * x[2]) * cos(2.0 * pi * x[3])   # ϕ_z
+    u_boundary = SVector(phi, q1, q2, q3)
+
+    # Calculate boundary flux
+    if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation, equations)
+    end
+
+    return flux
+end
 
 """
     source_terms_harmonic(u, x, t, equations::HyperbolicDiffusionEquations3D)
 
 Source term that only includes the forcing from the hyperbolic diffusion system.
 """
-@inline function source_terms_harmonic(u, x, t, equations::HyperbolicDiffusionEquations3D)
-  # harmonic solution ϕ = (sinh(πx)sin(πy) + sinh(πy)sin(πx))/sinh(π), so f = 0
-  @unpack inv_Tr = equations
+@inline function source_terms_harmonic(u, x, t,
+                                       equations::HyperbolicDiffusionEquations3D)
+    # harmonic solution ϕ = (sinh(πx)sin(πy) + sinh(πy)sin(πx))/sinh(π), so f = 0
+    @unpack inv_Tr = equations
 
-  du1 = zero(u[1])
-  du2 = -inv_Tr * u[2]
-  du3 = -inv_Tr * u[3]
-  du4 = -inv_Tr * u[4]
+    du1 = zero(u[1])
+    du2 = -inv_Tr * u[2]
+    du3 = -inv_Tr * u[3]
+    du4 = -inv_Tr * u[4]
 
-  return SVector(du1, du2, du3, du4)
+    return SVector(du1, du2, du3, du4)
 end
 
-
 """
     initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::HyperbolicDiffusionEquations3D)
 
@@ -119,119 +125,113 @@ Setup used for convergence tests of the Euler equations with self-gravity used i
   [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593)
 in combination with [`source_terms_harmonic`](@ref).
 """
-function initial_condition_eoc_test_coupled_euler_gravity(x, t, equations::HyperbolicDiffusionEquations3D)
-
-  # Determine phi_x, phi_y
-  G = 1.0 # gravitational constant
-  C_grav = -4 * G / (3 * pi) # "3" is the number of spatial dimensions  # 2D: -2.0*G/pi
-  A = 0.1 # perturbation coefficient must match Euler setup
-  rho1 = A * sin(pi * (x[1] + x[2] + x[3] - t))
-  # initialize with ansatz of gravity potential
-  phi = C_grav * rho1
-  q1  = C_grav * A * pi * cos(pi*(x[1] + x[2] + x[3] - t)) # = gravity acceleration in x-direction
-  q2  = q1                                                 # = gravity acceleration in y-direction
-  q3  = q1                                                 # = gravity acceleration in z-direction
-
-  return SVector(phi, q1, q2, q3)
+function initial_condition_eoc_test_coupled_euler_gravity(x, t,
+                                                          equations::HyperbolicDiffusionEquations3D)
+
+    # Determine phi_x, phi_y
+    G = 1.0 # gravitational constant
+    C_grav = -4 * G / (3 * pi) # "3" is the number of spatial dimensions  # 2D: -2.0*G/pi
+    A = 0.1 # perturbation coefficient must match Euler setup
+    rho1 = A * sin(pi * (x[1] + x[2] + x[3] - t))
+    # initialize with ansatz of gravity potential
+    phi = C_grav * rho1
+    q1 = C_grav * A * pi * cos(pi * (x[1] + x[2] + x[3] - t)) # = gravity acceleration in x-direction
+    q2 = q1                                                 # = gravity acceleration in y-direction
+    q3 = q1                                                 # = gravity acceleration in z-direction
+
+    return SVector(phi, q1, q2, q3)
 end
 
-
-
 # Calculate 1D flux in for a single point
-@inline function flux(u, orientation::Integer, equations::HyperbolicDiffusionEquations3D)
-  phi, q1, q2, q3 = u
-
-  if orientation == 1
-    f1 = -equations.nu*q1
-    f2 = -phi * equations.inv_Tr
-    f3 = zero(phi)
-    f4 = zero(phi)
-  elseif orientation == 2
-    f1 = -equations.nu*q2
-    f2 = zero(phi)
-    f3 = -phi * equations.inv_Tr
-    f4 = zero(phi)
-  else
-    f1 = -equations.nu*q3
-    f2 = zero(phi)
-    f3 = zero(phi)
-    f4 = -phi * equations.inv_Tr
-  end
-
-  return SVector(f1, f2, f3, f4)
+@inline function flux(u, orientation::Integer,
+                      equations::HyperbolicDiffusionEquations3D)
+    phi, q1, q2, q3 = u
+
+    if orientation == 1
+        f1 = -equations.nu * q1
+        f2 = -phi * equations.inv_Tr
+        f3 = zero(phi)
+        f4 = zero(phi)
+    elseif orientation == 2
+        f1 = -equations.nu * q2
+        f2 = zero(phi)
+        f3 = -phi * equations.inv_Tr
+        f4 = zero(phi)
+    else
+        f1 = -equations.nu * q3
+        f2 = zero(phi)
+        f3 = zero(phi)
+        f4 = -phi * equations.inv_Tr
+    end
+
+    return SVector(f1, f2, f3, f4)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::HyperbolicDiffusionEquations3D)
-  λ_max = sqrt(equations.nu * equations.inv_Tr)
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::HyperbolicDiffusionEquations3D)
+    λ_max = sqrt(equations.nu * equations.inv_Tr)
 end
 
-
-@inline function flux_godunov(u_ll, u_rr, orientation::Integer, equations::HyperbolicDiffusionEquations3D)
-  # Obtain left and right fluxes
-  phi_ll, q1_ll, q2_ll, q3_ll = u_ll
-  phi_rr, q1_rr, q2_rr, q3_rr = u_rr
-  f_ll = flux(u_ll, orientation, equations)
-  f_rr = flux(u_rr, orientation, equations)
-
-  # this is an optimized version of the application of the upwind dissipation matrix:
-  #   dissipation = 0.5*R_n*|Λ|*inv(R_n)[[u]]
-  λ_max = sqrt(equations.nu * equations.inv_Tr)
-  f1 = 1/2 * (f_ll[1] + f_rr[1]) - 1/2 * λ_max * (phi_rr - phi_ll)
-  if orientation == 1 # x-direction
-    f2 = 1/2 * (f_ll[2] + f_rr[2]) - 1/2 * λ_max * (q1_rr - q1_ll)
-    f3 = 1/2 * (f_ll[3] + f_rr[3])
-    f4 = 1/2 * (f_ll[4] + f_rr[4])
-  elseif orientation == 2 # y-direction
-    f2 = 1/2 * (f_ll[2] + f_rr[2])
-    f3 = 1/2 * (f_ll[3] + f_rr[3]) - 1/2 * λ_max * (q2_rr - q2_ll)
-    f4 = 1/2 * (f_ll[4] + f_rr[4])
-  else # y-direction
-    f2 = 1/2 * (f_ll[2] + f_rr[2])
-    f3 = 1/2 * (f_ll[3] + f_rr[3])
-    f4 = 1/2 * (f_ll[4] + f_rr[4]) - 1/2 * λ_max * (q3_rr - q3_ll)
-  end
-
-  return SVector(f1, f2, f3, f4)
+@inline function flux_godunov(u_ll, u_rr, orientation::Integer,
+                              equations::HyperbolicDiffusionEquations3D)
+    # Obtain left and right fluxes
+    phi_ll, q1_ll, q2_ll, q3_ll = u_ll
+    phi_rr, q1_rr, q2_rr, q3_rr = u_rr
+    f_ll = flux(u_ll, orientation, equations)
+    f_rr = flux(u_rr, orientation, equations)
+
+    # this is an optimized version of the application of the upwind dissipation matrix:
+    #   dissipation = 0.5*R_n*|Λ|*inv(R_n)[[u]]
+    λ_max = sqrt(equations.nu * equations.inv_Tr)
+    f1 = 1 / 2 * (f_ll[1] + f_rr[1]) - 1 / 2 * λ_max * (phi_rr - phi_ll)
+    if orientation == 1 # x-direction
+        f2 = 1 / 2 * (f_ll[2] + f_rr[2]) - 1 / 2 * λ_max * (q1_rr - q1_ll)
+        f3 = 1 / 2 * (f_ll[3] + f_rr[3])
+        f4 = 1 / 2 * (f_ll[4] + f_rr[4])
+    elseif orientation == 2 # y-direction
+        f2 = 1 / 2 * (f_ll[2] + f_rr[2])
+        f3 = 1 / 2 * (f_ll[3] + f_rr[3]) - 1 / 2 * λ_max * (q2_rr - q2_ll)
+        f4 = 1 / 2 * (f_ll[4] + f_rr[4])
+    else # y-direction
+        f2 = 1 / 2 * (f_ll[2] + f_rr[2])
+        f3 = 1 / 2 * (f_ll[3] + f_rr[3])
+        f4 = 1 / 2 * (f_ll[4] + f_rr[4]) - 1 / 2 * λ_max * (q3_rr - q3_ll)
+    end
+
+    return SVector(f1, f2, f3, f4)
 end
 
-
-
 @inline have_constant_speed(::HyperbolicDiffusionEquations3D) = True()
 
 @inline function max_abs_speeds(eq::HyperbolicDiffusionEquations3D)
-  λ = sqrt(eq.nu * eq.inv_Tr)
-  return λ, λ, λ
+    λ = sqrt(eq.nu * eq.inv_Tr)
+    return λ, λ, λ
 end
 
-
 # Convert conservative variables to primitive
 @inline cons2prim(u, equations::HyperbolicDiffusionEquations3D) = u
 
-
 # Convert conservative variables to entropy found in I Do Like CFD, Too, Vol. 1
 @inline function cons2entropy(u, equations::HyperbolicDiffusionEquations3D)
-  phi, q1, q2, q3 = u
-  w1 = phi
-  w2 = equations.Lr^2 * q1
-  w3 = equations.Lr^2 * q2
-  w4 = equations.Lr^2 * q3
+    phi, q1, q2, q3 = u
+    w1 = phi
+    w2 = equations.Lr^2 * q1
+    w3 = equations.Lr^2 * q2
+    w4 = equations.Lr^2 * q3
 
-  return SVector(w1, w2, w3, w4)
+    return SVector(w1, w2, w3, w4)
 end
 
-
 # Calculate entropy for a conservative state `u` (here: same as total energy)
-@inline entropy(u, equations::HyperbolicDiffusionEquations3D) = energy_total(u, equations)
-
+@inline function entropy(u, equations::HyperbolicDiffusionEquations3D)
+    energy_total(u, equations)
+end
 
 # Calculate total energy for a conservative state `u`
 @inline function energy_total(u, equations::HyperbolicDiffusionEquations3D)
-  # energy function as found in equation (2.5.12) in the book "I Do Like CFD, Vol. 1"
-  phi, q1, q2, q3 = u
-  return 0.5 * (phi^2 + equations.Lr^2 * (q1^2 + q2^2 + q3^2))
+    # energy function as found in equation (2.5.12) in the book "I Do Like CFD, Vol. 1"
+    phi, q1, q2, q3 = u
+    return 0.5 * (phi^2 + equations.Lr^2 * (q1^2 + q2^2 + q3^2))
 end
-
-
 end # @muladd
diff --git a/src/equations/ideal_glm_mhd_1d.jl b/src/equations/ideal_glm_mhd_1d.jl
index 980ff24d9ef..4ef593cda53 100644
--- a/src/equations/ideal_glm_mhd_1d.jl
+++ b/src/equations/ideal_glm_mhd_1d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     IdealGlmMhdEquations1D(gamma)
@@ -15,21 +15,26 @@ specific heats `gamma` in one space dimension.
     There is no divergence cleaning variable `psi` because the divergence-free constraint
     is satisfied trivially in one spatial dimension.
 """
-struct IdealGlmMhdEquations1D{RealT<:Real} <: AbstractIdealGlmMhdEquations{1, 8}
-  gamma::RealT               # ratio of specific heats
-  inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
-
-  function IdealGlmMhdEquations1D(gamma)
-    γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1))
-    new{typeof(γ)}(γ, inv_gamma_minus_one)
-  end
+struct IdealGlmMhdEquations1D{RealT <: Real} <: AbstractIdealGlmMhdEquations{1, 8}
+    gamma::RealT               # ratio of specific heats
+    inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
+
+    function IdealGlmMhdEquations1D(gamma)
+        γ, inv_gamma_minus_one = promote(gamma, inv(gamma - 1))
+        new{typeof(γ)}(γ, inv_gamma_minus_one)
+    end
 end
 
 have_nonconservative_terms(::IdealGlmMhdEquations1D) = False()
-varnames(::typeof(cons2cons), ::IdealGlmMhdEquations1D) = ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3")
-varnames(::typeof(cons2prim), ::IdealGlmMhdEquations1D) = ("rho", "v1", "v2", "v3", "p", "B1", "B2", "B3")
-default_analysis_integrals(::IdealGlmMhdEquations1D)  = (entropy_timederivative, Val(:l2_divb), Val(:linf_divb))
-
+function varnames(::typeof(cons2cons), ::IdealGlmMhdEquations1D)
+    ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3")
+end
+function varnames(::typeof(cons2prim), ::IdealGlmMhdEquations1D)
+    ("rho", "v1", "v2", "v3", "p", "B1", "B2", "B3")
+end
+function default_analysis_integrals(::IdealGlmMhdEquations1D)
+    (entropy_timederivative, Val(:l2_divb), Val(:linf_divb))
+end
 
 """
     initial_condition_constant(x, t, equations::IdealGlmMhdEquations1D)
@@ -37,40 +42,38 @@ default_analysis_integrals(::IdealGlmMhdEquations1D)  = (entropy_timederivative,
 A constant initial condition to test free-stream preservation.
 """
 function initial_condition_constant(x, t, equations::IdealGlmMhdEquations1D)
-  rho = 1.0
-  rho_v1 = 0.1
-  rho_v2 = -0.2
-  rho_v3 = -0.5
-  rho_e = 50.0
-  B1 = 3.0
-  B2 = -1.2
-  B3 = 0.5
-  return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3)
+    rho = 1.0
+    rho_v1 = 0.1
+    rho_v2 = -0.2
+    rho_v3 = -0.5
+    rho_e = 50.0
+    B1 = 3.0
+    B2 = -1.2
+    B3 = 0.5
+    return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3)
 end
 
-
 """
     initial_condition_convergence_test(x, t, equations::IdealGlmMhdEquations1D)
 
 An Alfvén wave as smooth initial condition used for convergence tests.
 """
 function initial_condition_convergence_test(x, t, equations::IdealGlmMhdEquations1D)
-  # smooth Alfvén wave test from Derigs et al. FLASH (2016)
-  # domain must be set to [0, 1], γ = 5/3
-  rho = 1.0
-  v1 = 0.0
-  # TODO: sincospi
-  si, co = sincos(2 * pi * x[1])
-  v2 = 0.1 * si
-  v3 = 0.1 * co
-  p = 0.1
-  B1 = 1.0
-  B2 = v2
-  B3 = v3
-  return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3), equations)
+    # smooth Alfvén wave test from Derigs et al. FLASH (2016)
+    # domain must be set to [0, 1], γ = 5/3
+    rho = 1.0
+    v1 = 0.0
+    # TODO: sincospi
+    si, co = sincos(2 * pi * x[1])
+    v2 = 0.1 * si
+    v3 = 0.1 * co
+    p = 0.1
+    B1 = 1.0
+    B2 = v2
+    B3 = v3
+    return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3), equations)
 end
 
-
 """
     initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdEquations1D)
 
@@ -80,48 +83,47 @@ A weak blast wave adapted from
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
 function initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdEquations1D)
-  # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
-  # Same discontinuity in the velocities but with magnetic fields
-  # Set up polar coordinates
-  inicenter = (0,)
-  x_norm = x[1] - inicenter[1]
-  r = sqrt(x_norm^2)
-  phi = atan(x_norm)
-
-  # Calculate primitive variables
-  rho = r > 0.5 ? 1.0 : 1.1691
-  v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi)
-  p = r > 0.5 ? 1.0 : 1.245
-
-  return prim2cons(SVector(rho, v1, 0.0, 0.0, p, 1.0, 1.0, 1.0, 0.0), equations)
+    # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
+    # Same discontinuity in the velocities but with magnetic fields
+    # Set up polar coordinates
+    inicenter = (0,)
+    x_norm = x[1] - inicenter[1]
+    r = sqrt(x_norm^2)
+    phi = atan(x_norm)
+
+    # Calculate primitive variables
+    rho = r > 0.5 ? 1.0 : 1.1691
+    v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi)
+    p = r > 0.5 ? 1.0 : 1.245
+
+    return prim2cons(SVector(rho, v1, 0.0, 0.0, p, 1.0, 1.0, 1.0, 0.0), equations)
 end
 
-
 # Calculate 1D flux in for a single point
 @inline function flux(u, orientation::Integer, equations::IdealGlmMhdEquations1D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
-  mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
-  p_over_gamma_minus_one = (rho_e - kin_en - mag_en)
-  p = (equations.gamma - 1) * p_over_gamma_minus_one
-
-  # Ignore orientation since it is always "1" in 1D
-  f1 = rho_v1
-  f2 = rho_v1*v1 + p + mag_en - B1^2
-  f3 = rho_v1*v2 - B1*B2
-  f4 = rho_v1*v3 - B1*B3
-  f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en)*v1 - B1*(v1*B1 + v2*B2 + v3*B3)
-  f6 = 0.0
-  f7 = v1*B2 - v2*B1
-  f8 = v1*B3 - v3*B1
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
+    mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
+    p_over_gamma_minus_one = (rho_e - kin_en - mag_en)
+    p = (equations.gamma - 1) * p_over_gamma_minus_one
+
+    # Ignore orientation since it is always "1" in 1D
+    f1 = rho_v1
+    f2 = rho_v1 * v1 + p + mag_en - B1^2
+    f3 = rho_v1 * v2 - B1 * B2
+    f4 = rho_v1 * v3 - B1 * B3
+    f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v1 -
+         B1 * (v1 * B1 + v2 * B2 + v3 * B3)
+    f6 = 0.0
+    f7 = v1 * B2 - v2 * B1
+    f8 = v1 * B3 - v3 * B1
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8)
 end
 
-
 """
     flux_derigs_etal(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations1D)
 
@@ -131,63 +133,66 @@ Entropy conserving two-point flux by
   divergence diminishing ideal magnetohydrodynamics equations
   [DOI: 10.1016/j.jcp.2018.03.002](https://doi.org/10.1016/j.jcp.2018.03.002)
 """
-function flux_derigs_etal(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations1D)
-  # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr = u_rr
-
-  v1_ll = rho_v1_ll/rho_ll
-  v2_ll = rho_v2_ll/rho_ll
-  v3_ll = rho_v3_ll/rho_ll
-  v1_rr = rho_v1_rr/rho_rr
-  v2_rr = rho_v2_rr/rho_rr
-  v3_rr = rho_v3_rr/rho_rr
-  vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2
-  vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2
-  mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
-  mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
-  p_ll = (equations.gamma - 1)*(rho_e_ll - 0.5*rho_ll*vel_norm_ll - 0.5*mag_norm_ll)
-  p_rr = (equations.gamma - 1)*(rho_e_rr - 0.5*rho_rr*vel_norm_rr - 0.5*mag_norm_rr)
-  beta_ll = 0.5*rho_ll/p_ll
-  beta_rr = 0.5*rho_rr/p_rr
-  # for convenience store v⋅B
-  vel_dot_mag_ll = v1_ll*B1_ll + v2_ll*B2_ll + v3_ll*B3_ll
-  vel_dot_mag_rr = v1_rr*B1_rr + v2_rr*B2_rr + v3_rr*B3_rr
-
-  # Compute the necessary mean values needed for either direction
-  rho_avg  = 0.5*(rho_ll+rho_rr)
-  rho_mean = ln_mean(rho_ll,rho_rr)
-  beta_mean = ln_mean(beta_ll,beta_rr)
-  beta_avg = 0.5*(beta_ll+beta_rr)
-  v1_avg = 0.5*(v1_ll+v1_rr)
-  v2_avg = 0.5*(v2_ll+v2_rr)
-  v3_avg = 0.5*(v3_ll+v3_rr)
-  p_mean = 0.5*rho_avg/beta_avg
-  B1_avg = 0.5*(B1_ll+B1_rr)
-  B2_avg = 0.5*(B2_ll+B2_rr)
-  B3_avg = 0.5*(B3_ll+B3_rr)
-  vel_norm_avg = 0.5*(vel_norm_ll+vel_norm_rr)
-  mag_norm_avg = 0.5*(mag_norm_ll+mag_norm_rr)
-  vel_dot_mag_avg = 0.5*(vel_dot_mag_ll+vel_dot_mag_rr)
-
-  # Ignore orientation since it is always "1" in 1D
-  f1 = rho_mean*v1_avg
-  f2 = f1*v1_avg + p_mean + 0.5*mag_norm_avg - B1_avg*B1_avg
-  f3 = f1*v2_avg - B1_avg*B2_avg
-  f4 = f1*v3_avg - B1_avg*B3_avg
-  f6 = 0.0
-  f7 = v1_avg*B2_avg - v2_avg*B1_avg
-  f8 = v1_avg*B3_avg - v3_avg*B1_avg
-  # total energy flux is complicated and involves the previous eight components
-  v1_mag_avg = 0.5*(v1_ll*mag_norm_ll + v1_rr*mag_norm_rr)
-  f5 = (f1*0.5*(1/(equations.gamma-1)/beta_mean - vel_norm_avg) + f2*v1_avg + f3*v2_avg +
-        f4*v3_avg + f6*B1_avg + f7*B2_avg + f8*B3_avg - 0.5*v1_mag_avg +
-        B1_avg*vel_dot_mag_avg)
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8)
+function flux_derigs_etal(u_ll, u_rr, orientation::Integer,
+                          equations::IdealGlmMhdEquations1D)
+    # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr = u_rr
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+    vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2
+    vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2
+    mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
+    mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
+    p_ll = (equations.gamma - 1) *
+           (rho_e_ll - 0.5 * rho_ll * vel_norm_ll - 0.5 * mag_norm_ll)
+    p_rr = (equations.gamma - 1) *
+           (rho_e_rr - 0.5 * rho_rr * vel_norm_rr - 0.5 * mag_norm_rr)
+    beta_ll = 0.5 * rho_ll / p_ll
+    beta_rr = 0.5 * rho_rr / p_rr
+    # for convenience store v⋅B
+    vel_dot_mag_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+    vel_dot_mag_rr = v1_rr * B1_rr + v2_rr * B2_rr + v3_rr * B3_rr
+
+    # Compute the necessary mean values needed for either direction
+    rho_avg = 0.5 * (rho_ll + rho_rr)
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    beta_mean = ln_mean(beta_ll, beta_rr)
+    beta_avg = 0.5 * (beta_ll + beta_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_mean = 0.5 * rho_avg / beta_avg
+    B1_avg = 0.5 * (B1_ll + B1_rr)
+    B2_avg = 0.5 * (B2_ll + B2_rr)
+    B3_avg = 0.5 * (B3_ll + B3_rr)
+    vel_norm_avg = 0.5 * (vel_norm_ll + vel_norm_rr)
+    mag_norm_avg = 0.5 * (mag_norm_ll + mag_norm_rr)
+    vel_dot_mag_avg = 0.5 * (vel_dot_mag_ll + vel_dot_mag_rr)
+
+    # Ignore orientation since it is always "1" in 1D
+    f1 = rho_mean * v1_avg
+    f2 = f1 * v1_avg + p_mean + 0.5 * mag_norm_avg - B1_avg * B1_avg
+    f3 = f1 * v2_avg - B1_avg * B2_avg
+    f4 = f1 * v3_avg - B1_avg * B3_avg
+    f6 = 0.0
+    f7 = v1_avg * B2_avg - v2_avg * B1_avg
+    f8 = v1_avg * B3_avg - v3_avg * B1_avg
+    # total energy flux is complicated and involves the previous eight components
+    v1_mag_avg = 0.5 * (v1_ll * mag_norm_ll + v1_rr * mag_norm_rr)
+    f5 = (f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - vel_norm_avg) +
+          f2 * v1_avg + f3 * v2_avg +
+          f4 * v3_avg + f6 * B1_avg + f7 * B2_avg + f8 * B3_avg - 0.5 * v1_mag_avg +
+          B1_avg * vel_dot_mag_avg)
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8)
 end
 
-
 """
     flux_hindenlang_gassner(u_ll, u_rr, orientation_or_normal_direction,
                             equations::IdealGlmMhdEquations1D)
@@ -210,65 +215,68 @@ Hindenlang and Gassner (2019), extending [`flux_ranocha`](@ref) to the MHD equat
   the Euler Equations Using Summation-by-Parts Operators
   [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42)
 """
-@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations1D)
-  # Unpack left and right states
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr = cons2prim(u_rr, equations)
-
-  # Compute the necessary mean values needed for either direction
-  rho_mean = ln_mean(rho_ll, rho_rr)
-  # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-  # in exact arithmetic since
-  #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-  #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-  inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-  v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-  v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-  v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-  p_avg   = 0.5 * (  p_ll +   p_rr)
-  velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-  magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
-
-  # Calculate fluxes depending on orientation with specific direction averages
-  f1 = rho_mean * v1_avg
-  f2 = f1 * v1_avg + p_avg + magnetic_square_avg - 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll)
-  f3 = f1 * v2_avg                               - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll)
-  f4 = f1 * v3_avg                               - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll)
-  #f5 below
-  f6 = 0.0
-  f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr)
-  f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr)
-  # total energy flux is complicated and involves the previous components
-  f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-        + 0.5 * (
-        +   p_ll * v1_rr +  p_rr * v1_ll
-        + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll)
-        + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll)
-        - (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll)
-        - (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll) ) )
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8)
+@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer,
+                                         equations::IdealGlmMhdEquations1D)
+    # Unpack left and right states
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr = cons2prim(u_rr, equations)
+
+    # Compute the necessary mean values needed for either direction
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+    # in exact arithmetic since
+    #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+    #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+    magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
+
+    # Calculate fluxes depending on orientation with specific direction averages
+    f1 = rho_mean * v1_avg
+    f2 = f1 * v1_avg + p_avg + magnetic_square_avg -
+         0.5 * (B1_ll * B1_rr + B1_rr * B1_ll)
+    f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll)
+    f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll)
+    #f5 below
+    f6 = 0.0
+    f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr)
+    f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr)
+    # total energy flux is complicated and involves the previous components
+    f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+          +
+          0.5 * (+p_ll * v1_rr + p_rr * v1_ll
+           + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll)
+           + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll)
+           -
+           (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll)
+           -
+           (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll)))
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations1D)
-  rho_ll, rho_v1_ll, _ = u_ll
-  rho_rr, rho_v1_rr, _ = u_rr
-
-  # Calculate velocities (ignore orientation since it is always "1" in 1D)
-  # and fast magnetoacoustic wave speeds
-  # left
-  v_ll = rho_v1_ll / rho_ll
-  cf_ll = calc_fast_wavespeed(u_ll, orientation, equations)
-  # right
-  v_rr = rho_v1_rr / rho_rr
-  cf_rr = calc_fast_wavespeed(u_rr, orientation, equations)
-
-  λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::IdealGlmMhdEquations1D)
+    rho_ll, rho_v1_ll, _ = u_ll
+    rho_rr, rho_v1_rr, _ = u_rr
+
+    # Calculate velocities (ignore orientation since it is always "1" in 1D)
+    # and fast magnetoacoustic wave speeds
+    # left
+    v_ll = rho_v1_ll / rho_ll
+    cf_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+    # right
+    v_rr = rho_v1_rr / rho_rr
+    cf_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+
+    λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
 end
 
-
 """
     min_max_speed_naive(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations1D)
 
@@ -277,127 +285,128 @@ Calculate minimum and maximum wave speeds for HLL-type fluxes as in
   An HLLC Riemann solver for magneto-hydrodynamics
   [DOI: 10.1016/j.jcp.2004.08.020](https://doi.org/10.1016/j.jcp.2004.08.020)
 """
-@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations1D)
-  rho_ll, rho_v1_ll, _ = u_ll
-  rho_rr, rho_v1_rr, _ = u_rr
-
-  # Calculate primitive variables
-  v1_ll = rho_v1_ll / rho_ll
-  v1_rr = rho_v1_rr/rho_rr
-
-  # Approximate the left-most and right-most eigenvalues in the Riemann fan
-  c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
-  c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
-  vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations)
-  λ_min = min(v1_ll - c_f_ll, vel_roe - c_f_roe)
-  λ_max = max(v1_rr + c_f_rr, vel_roe + c_f_roe)
-
-  return λ_min, λ_max
+@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::IdealGlmMhdEquations1D)
+    rho_ll, rho_v1_ll, _ = u_ll
+    rho_rr, rho_v1_rr, _ = u_rr
+
+    # Calculate primitive variables
+    v1_ll = rho_v1_ll / rho_ll
+    v1_rr = rho_v1_rr / rho_rr
+
+    # Approximate the left-most and right-most eigenvalues in the Riemann fan
+    c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+    c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+    vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations)
+    λ_min = min(v1_ll - c_f_ll, vel_roe - c_f_roe)
+    λ_max = max(v1_rr + c_f_rr, vel_roe + c_f_roe)
+
+    return λ_min, λ_max
 end
 
-
 @inline function max_abs_speeds(u, equations::IdealGlmMhdEquations1D)
-  rho, rho_v1, _ = u
-  v1 = rho_v1 / rho
-  cf_x_direction = calc_fast_wavespeed(u, 1, equations)
+    rho, rho_v1, _ = u
+    v1 = rho_v1 / rho
+    cf_x_direction = calc_fast_wavespeed(u, 1, equations)
 
-  return abs(v1) + cf_x_direction
+    return abs(v1) + cf_x_direction
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::IdealGlmMhdEquations1D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
 
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3
-                                              + B1 * B1 + B2 * B2 + B3 * B3))
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    p = (equations.gamma - 1) * (rho_e -
+         0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3
+                + B1 * B1 + B2 * B2 + B3 * B3))
 
-  return SVector(rho, v1, v2, v3, p, B1, B2, B3)
+    return SVector(rho, v1, v2, v3, p, B1, B2, B3)
 end
 
-
 # Convert conservative variables to entropy
 @inline function cons2entropy(u, equations::IdealGlmMhdEquations1D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
-
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  v_square = v1^2 + v2^2 + v3^2
-  p = (equations.gamma - 1) * (rho_e - 0.5*rho*v_square - 0.5*(B1^2 + B2^2 + B3^2))
-  s = log(p) - equations.gamma*log(rho)
-  rho_p = rho / p
-
-  w1 = (equations.gamma - s) / (equations.gamma-1) - 0.5 * rho_p * v_square
-  w2 = rho_p * v1
-  w3 = rho_p * v2
-  w4 = rho_p * v3
-  w5 = -rho_p
-  w6 = rho_p * B1
-  w7 = rho_p * B2
-  w8 = rho_p * B3
-
-  return SVector(w1, w2, w3, w4, w5, w6, w7, w8)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
+
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    v_square = v1^2 + v2^2 + v3^2
+    p = (equations.gamma - 1) *
+        (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2))
+    s = log(p) - equations.gamma * log(rho)
+    rho_p = rho / p
+
+    w1 = (equations.gamma - s) / (equations.gamma - 1) - 0.5 * rho_p * v_square
+    w2 = rho_p * v1
+    w3 = rho_p * v2
+    w4 = rho_p * v3
+    w5 = -rho_p
+    w6 = rho_p * B1
+    w7 = rho_p * B2
+    w8 = rho_p * B3
+
+    return SVector(w1, w2, w3, w4, w5, w6, w7, w8)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::IdealGlmMhdEquations1D)
-  rho, v1, v2, v3, p, B1, B2, B3 = prim
+    rho, v1, v2, v3, p, B1, B2, B3 = prim
 
-  rho_v1 = rho * v1
-  rho_v2 = rho * v2
-  rho_v3 = rho * v3
-  rho_e = p/(equations.gamma-1) + 0.5 * (rho_v1*v1 + rho_v2*v2 + rho_v3*v3) +
-                                 0.5 * (B1^2 + B2^2 + B3^2)
+    rho_v1 = rho * v1
+    rho_v2 = rho * v2
+    rho_v3 = rho * v3
+    rho_e = p / (equations.gamma - 1) +
+            0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) +
+            0.5 * (B1^2 + B2^2 + B3^2)
 
-  return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3)
+    return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3)
 end
 
-
 @inline function density(u, equations::IdealGlmMhdEquations1D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
-  return rho
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
+    return rho
 end
 
 @inline function pressure(u, equations::IdealGlmMhdEquations1D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
-  p = (equations.gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
-                                   - 0.5 * (B1^2 + B2^2 + B3^2))
-  return p
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
+         -
+         0.5 * (B1^2 + B2^2 + B3^2))
+    return p
 end
 
 @inline function density_pressure(u, equations::IdealGlmMhdEquations1D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
-  p = (equations.gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
-                                   - 0.5 * (B1^2 + B2^2 + B3^2))
-  return rho * p
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
+         -
+         0.5 * (B1^2 + B2^2 + B3^2))
+    return rho * p
 end
 
-
 # Compute the fastest wave speed for ideal MHD equations: c_f, the fast magnetoacoustic eigenvalue
 @inline function calc_fast_wavespeed(cons, direction, equations::IdealGlmMhdEquations1D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = cons
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  v_mag = sqrt(v1^2 + v2^2 + v3^2)
-  p = (equations.gamma - 1)*(rho_e - 0.5*rho*v_mag^2 - 0.5*(B1^2 + B2^2 + B3^2))
-  a_square = equations.gamma * p / rho
-  sqrt_rho = sqrt(rho)
-  b1 = B1 / sqrt_rho
-  b2 = B2 / sqrt_rho
-  b3 = B3 / sqrt_rho
-  b_square = b1^2 + b2^2 + b3^2
-
-  c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b1^2))
-  return c_f
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = cons
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    v_mag = sqrt(v1^2 + v2^2 + v3^2)
+    p = (equations.gamma - 1) *
+        (rho_e - 0.5 * rho * v_mag^2 - 0.5 * (B1^2 + B2^2 + B3^2))
+    a_square = equations.gamma * p / rho
+    sqrt_rho = sqrt(rho)
+    b1 = B1 / sqrt_rho
+    b2 = B2 / sqrt_rho
+    b3 = B3 / sqrt_rho
+    b_square = b1^2 + b2^2 + b3^2
+
+    c_f = sqrt(0.5 * (a_square + b_square) +
+               0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b1^2))
+    return c_f
 end
 
-
 """
     calc_fast_wavespeed_roe(u_ll, u_rr, direction, equations::IdealGlmMhdEquations1D)
 
@@ -408,119 +417,118 @@ as given by
   of Roe Matrices for Systems of Conservation Laws
   [DOI: 10.1006/jcph.1997.5773](https://doi.org/10.1006/jcph.1997.5773)
 """
-@inline function calc_fast_wavespeed_roe(u_ll, u_rr, direction, equations::IdealGlmMhdEquations1D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr = u_rr
-
-  # Calculate primitive variables
-  v1_ll = rho_v1_ll/rho_ll
-  v2_ll = rho_v2_ll/rho_ll
-  v3_ll = rho_v3_ll/rho_ll
-  vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2
-  mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
-  p_ll = (equations.gamma - 1)*(rho_e_ll - 0.5*rho_ll*vel_norm_ll - 0.5*mag_norm_ll)
-
-  v1_rr = rho_v1_rr/rho_rr
-  v2_rr = rho_v2_rr/rho_rr
-  v3_rr = rho_v3_rr/rho_rr
-  vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2
-  mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
-  p_rr = (equations.gamma - 1)*(rho_e_rr - 0.5*rho_rr*vel_norm_rr - 0.5*mag_norm_rr)
-
-  # compute total pressure which is thermal + magnetic pressures
-  p_total_ll = p_ll + 0.5*mag_norm_ll
-  p_total_rr = p_rr + 0.5*mag_norm_rr
-
-  # compute the Roe density averages
-  sqrt_rho_ll = sqrt(rho_ll)
-  sqrt_rho_rr = sqrt(rho_rr)
-  inv_sqrt_rho_add  = 1.0 / (sqrt_rho_ll + sqrt_rho_rr)
-  inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr)
-  rho_ll_roe =  sqrt_rho_ll * inv_sqrt_rho_add
-  rho_rr_roe =  sqrt_rho_rr * inv_sqrt_rho_add
-  # Roe averages
-  # velocities and magnetic fields
-  v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe
-  v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe
-  v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe
-  B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe
-  B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe
-  B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe
-  # enthalpy
-  H_ll  = (rho_e_ll + p_total_ll) / rho_ll
-  H_rr  = (rho_e_rr + p_total_rr) / rho_rr
-  H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe
-  # temporary variable see equations (4.12) in Cargo and Gallice
-  X = 0.5 * ( (B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2 ) * inv_sqrt_rho_add^2
-  # averaged components needed to compute c_f, the fast magnetoacoustic wave speed
-  b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum
-  a_square_roe = ((2.0 - equations.gamma) * X +
-                 (equations.gamma -1.0) * (H_roe - 0.5*(v1_roe^2 + v2_roe^2 + v3_roe^2) -
-                                          b_square_roe)) # acoustic speed
-  # finally compute the average wave speed and set the output velocity
-  # Ignore orientation since it is always "1" in 1D
-  c_a_roe = B1_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed
-  a_star_roe = sqrt( (a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe )
-  c_f_roe = sqrt( 0.5 * (a_square_roe + b_square_roe + a_star_roe) )
-
-  return v1_roe, c_f_roe
+@inline function calc_fast_wavespeed_roe(u_ll, u_rr, direction,
+                                         equations::IdealGlmMhdEquations1D)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr = u_rr
+
+    # Calculate primitive variables
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2
+    mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
+    p_ll = (equations.gamma - 1) *
+           (rho_e_ll - 0.5 * rho_ll * vel_norm_ll - 0.5 * mag_norm_ll)
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+    vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2
+    mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
+    p_rr = (equations.gamma - 1) *
+           (rho_e_rr - 0.5 * rho_rr * vel_norm_rr - 0.5 * mag_norm_rr)
+
+    # compute total pressure which is thermal + magnetic pressures
+    p_total_ll = p_ll + 0.5 * mag_norm_ll
+    p_total_rr = p_rr + 0.5 * mag_norm_rr
+
+    # compute the Roe density averages
+    sqrt_rho_ll = sqrt(rho_ll)
+    sqrt_rho_rr = sqrt(rho_rr)
+    inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr)
+    inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr)
+    rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add
+    rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add
+    # Roe averages
+    # velocities and magnetic fields
+    v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe
+    v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe
+    v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe
+    B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe
+    B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe
+    B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe
+    # enthalpy
+    H_ll = (rho_e_ll + p_total_ll) / rho_ll
+    H_rr = (rho_e_rr + p_total_rr) / rho_rr
+    H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe
+    # temporary variable see equations (4.12) in Cargo and Gallice
+    X = 0.5 * ((B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2) *
+        inv_sqrt_rho_add^2
+    # averaged components needed to compute c_f, the fast magnetoacoustic wave speed
+    b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum
+    a_square_roe = ((2.0 - equations.gamma) * X +
+                    (equations.gamma - 1.0) *
+                    (H_roe - 0.5 * (v1_roe^2 + v2_roe^2 + v3_roe^2) -
+                     b_square_roe)) # acoustic speed
+    # finally compute the average wave speed and set the output velocity
+    # Ignore orientation since it is always "1" in 1D
+    c_a_roe = B1_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed
+    a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe)
+    c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe))
+
+    return v1_roe, c_f_roe
 end
 
-
 # Calculate thermodynamic entropy for a conservative state `cons`
 @inline function entropy_thermodynamic(cons, equations::IdealGlmMhdEquations1D)
-  # Pressure
-  p = (equations.gamma - 1) * (cons[5] - 1/2 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1]
-                                       - 1/2 * (cons[6]^2 + cons[7]^2 + cons[8]^2))
+    # Pressure
+    p = (equations.gamma - 1) *
+        (cons[5] - 1 / 2 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1]
+         -
+         1 / 2 * (cons[6]^2 + cons[7]^2 + cons[8]^2))
 
-  # Thermodynamic entropy
-  s = log(p) - equations.gamma*log(cons[1])
+    # Thermodynamic entropy
+    s = log(p) - equations.gamma * log(cons[1])
 
-  return s
+    return s
 end
 
-
 # Calculate mathematical entropy for a conservative state `cons`
 @inline function entropy_math(cons, equations::IdealGlmMhdEquations1D)
-  S = -entropy_thermodynamic(cons, equations) * cons[1] / (equations.gamma - 1)
+    S = -entropy_thermodynamic(cons, equations) * cons[1] / (equations.gamma - 1)
 
-  return S
+    return S
 end
 
-
 # Default entropy is the mathematical entropy
 @inline entropy(cons, equations::IdealGlmMhdEquations1D) = entropy_math(cons, equations)
 
-
 # Calculate total energy for a conservative state `cons`
 @inline energy_total(cons, ::IdealGlmMhdEquations1D) = cons[5]
 
-
 # Calculate kinetic energy for a conservative state `cons`
 @inline function energy_kinetic(cons, equations::IdealGlmMhdEquations1D)
-  return 0.5 * (cons[2]^2 + cons[3]^2 + cons[4]^2)/cons[1]
+    return 0.5 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1]
 end
 
-
 # Calculate the magnetic energy for a conservative state `cons'.
 #  OBS! For non-dinmensional form of the ideal MHD magnetic pressure ≡ magnetic energy
 @inline function energy_magnetic(cons, ::IdealGlmMhdEquations1D)
-  return 0.5 * (cons[6]^2 + cons[7]^2 + cons[8]^2)
+    return 0.5 * (cons[6]^2 + cons[7]^2 + cons[8]^2)
 end
 
-
 # Calculate internal energy for a conservative state `cons`
 @inline function energy_internal(cons, equations::IdealGlmMhdEquations1D)
-  return (energy_total(cons, equations)
-          - energy_kinetic(cons, equations)
-          - energy_magnetic(cons, equations))
+    return (energy_total(cons, equations)
+            -
+            energy_kinetic(cons, equations)
+            -
+            energy_magnetic(cons, equations))
 end
 
-
 # Calculate the cross helicity (\vec{v}⋅\vec{B}) for a conservative state `cons'
 @inline function cross_helicity(cons, ::IdealGlmMhdEquations1D)
-  return (cons[2]*cons[6] + cons[3]*cons[7] + cons[4]*cons[8]) / cons[1]
+    return (cons[2] * cons[6] + cons[3] * cons[7] + cons[4] * cons[8]) / cons[1]
 end
-
-
 end # @muladd
diff --git a/src/equations/ideal_glm_mhd_2d.jl b/src/equations/ideal_glm_mhd_2d.jl
index c19273737ef..fb3048fe883 100644
--- a/src/equations/ideal_glm_mhd_2d.jl
+++ b/src/equations/ideal_glm_mhd_2d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     IdealGlmMhdEquations2D(gamma)
@@ -11,28 +11,33 @@
 The ideal compressible GLM-MHD equations for an ideal gas with ratio of
 specific heats `gamma` in two space dimensions.
 """
-mutable struct IdealGlmMhdEquations2D{RealT<:Real} <: AbstractIdealGlmMhdEquations{2, 9}
-  gamma::RealT               # ratio of specific heats
-  inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
-  c_h::RealT                 # GLM cleaning speed
-
-  function IdealGlmMhdEquations2D(gamma, c_h)
-    γ, inv_gamma_minus_one, c_h = promote(gamma, inv(gamma - 1), c_h)
-    new{typeof(γ)}(γ, inv_gamma_minus_one, c_h)
-  end
+mutable struct IdealGlmMhdEquations2D{RealT <: Real} <:
+               AbstractIdealGlmMhdEquations{2, 9}
+    gamma::RealT               # ratio of specific heats
+    inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
+    c_h::RealT                 # GLM cleaning speed
+
+    function IdealGlmMhdEquations2D(gamma, c_h)
+        γ, inv_gamma_minus_one, c_h = promote(gamma, inv(gamma - 1), c_h)
+        new{typeof(γ)}(γ, inv_gamma_minus_one, c_h)
+    end
 end
 
-function IdealGlmMhdEquations2D(gamma; initial_c_h=convert(typeof(gamma), NaN))
-  # Use `promote` to ensure that `gamma` and `initial_c_h` have the same type
-  IdealGlmMhdEquations2D(promote(gamma, initial_c_h)...)
+function IdealGlmMhdEquations2D(gamma; initial_c_h = convert(typeof(gamma), NaN))
+    # Use `promote` to ensure that `gamma` and `initial_c_h` have the same type
+    IdealGlmMhdEquations2D(promote(gamma, initial_c_h)...)
 end
 
-
 have_nonconservative_terms(::IdealGlmMhdEquations2D) = True()
-varnames(::typeof(cons2cons), ::IdealGlmMhdEquations2D) = ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3", "psi")
-varnames(::typeof(cons2prim), ::IdealGlmMhdEquations2D) = ("rho", "v1", "v2", "v3", "p", "B1", "B2", "B3", "psi")
-default_analysis_integrals(::IdealGlmMhdEquations2D)  = (entropy_timederivative, Val(:l2_divb), Val(:linf_divb))
-
+function varnames(::typeof(cons2cons), ::IdealGlmMhdEquations2D)
+    ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3", "psi")
+end
+function varnames(::typeof(cons2prim), ::IdealGlmMhdEquations2D)
+    ("rho", "v1", "v2", "v3", "p", "B1", "B2", "B3", "psi")
+end
+function default_analysis_integrals(::IdealGlmMhdEquations2D)
+    (entropy_timederivative, Val(:l2_divb), Val(:linf_divb))
+end
 
 # Set initial conditions at physical location `x` for time `t`
 """
@@ -41,43 +46,41 @@ default_analysis_integrals(::IdealGlmMhdEquations2D)  = (entropy_timederivative,
 A constant initial condition to test free-stream preservation.
 """
 function initial_condition_constant(x, t, equations::IdealGlmMhdEquations2D)
-  rho = 1.0
-  rho_v1 = 0.1
-  rho_v2 = -0.2
-  rho_v3 = -0.5
-  rho_e = 50.0
-  B1 = 3.0
-  B2 = -1.2
-  B3 = 0.5
-  psi = 0.0
-  return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi)
+    rho = 1.0
+    rho_v1 = 0.1
+    rho_v2 = -0.2
+    rho_v3 = -0.5
+    rho_e = 50.0
+    B1 = 3.0
+    B2 = -1.2
+    B3 = 0.5
+    psi = 0.0
+    return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi)
 end
 
-
 """
     initial_condition_convergence_test(x, t, equations::IdealGlmMhdEquations2D)
 
 An Alfvén wave as smooth initial condition used for convergence tests.
 """
 function initial_condition_convergence_test(x, t, equations::IdealGlmMhdEquations2D)
-  # smooth Alfvén wave test from Derigs et al. FLASH (2016)
-  # domain must be set to [0, 1/cos(α)] x [0, 1/sin(α)], γ = 5/3
-  alpha = 0.25*pi
-  x_perp = x[1]*cos(alpha) + x[2]*sin(alpha)
-  B_perp = 0.1*sin(2.0*pi*x_perp)
-  rho = 1.0
-  v1 = -B_perp*sin(alpha)
-  v2 =  B_perp*cos(alpha)
-  v3 = 0.1*cos(2.0*pi*x_perp)
-  p = 0.1
-  B1 = cos(alpha) + v1
-  B2 = sin(alpha) + v2
-  B3 = v3
-  psi = 0.0
-  return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations)
+    # smooth Alfvén wave test from Derigs et al. FLASH (2016)
+    # domain must be set to [0, 1/cos(α)] x [0, 1/sin(α)], γ = 5/3
+    alpha = 0.25 * pi
+    x_perp = x[1] * cos(alpha) + x[2] * sin(alpha)
+    B_perp = 0.1 * sin(2.0 * pi * x_perp)
+    rho = 1.0
+    v1 = -B_perp * sin(alpha)
+    v2 = B_perp * cos(alpha)
+    v3 = 0.1 * cos(2.0 * pi * x_perp)
+    p = 0.1
+    B1 = cos(alpha) + v1
+    B2 = sin(alpha) + v2
+    B3 = v3
+    psi = 0.0
+    return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations)
 end
 
-
 """
     initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdEquations2D)
 
@@ -87,96 +90,98 @@ A weak blast wave adapted from
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
 function initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdEquations2D)
-  # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
-  # Same discontinuity in the velocities but with magnetic fields
-  # Set up polar coordinates
-  inicenter = (0, 0)
-  x_norm = x[1] - inicenter[1]
-  y_norm = x[2] - inicenter[2]
-  r = sqrt(x_norm^2 + y_norm^2)
-  phi = atan(y_norm, x_norm)
-
-  # Calculate primitive variables
-  rho = r > 0.5 ? 1.0 : 1.1691
-  v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi)
-  v2 = r > 0.5 ? 0.0 : 0.1882 * sin(phi)
-  p = r > 0.5 ? 1.0 : 1.245
-
-  return prim2cons(SVector(rho, v1, v2, 0.0, p, 1.0, 1.0, 1.0, 0.0), equations)
+    # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
+    # Same discontinuity in the velocities but with magnetic fields
+    # Set up polar coordinates
+    inicenter = (0, 0)
+    x_norm = x[1] - inicenter[1]
+    y_norm = x[2] - inicenter[2]
+    r = sqrt(x_norm^2 + y_norm^2)
+    phi = atan(y_norm, x_norm)
+
+    # Calculate primitive variables
+    rho = r > 0.5 ? 1.0 : 1.1691
+    v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi)
+    v2 = r > 0.5 ? 0.0 : 0.1882 * sin(phi)
+    p = r > 0.5 ? 1.0 : 1.245
+
+    return prim2cons(SVector(rho, v1, v2, 0.0, p, 1.0, 1.0, 1.0, 0.0), equations)
 end
 
-
 # Pre-defined source terms should be implemented as
 # function source_terms_WHATEVER(u, x, t, equations::IdealGlmMhdEquations2D)
 
-
 # Calculate 1D flux in for a single point
 @inline function flux(u, orientation::Integer, equations::IdealGlmMhdEquations2D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
-  mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
-  p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2)
-  p = (equations.gamma - 1) * p_over_gamma_minus_one
-  if orientation == 1
-    f1 = rho_v1
-    f2 = rho_v1*v1 + p + mag_en - B1^2
-    f3 = rho_v1*v2 - B1*B2
-    f4 = rho_v1*v3 - B1*B3
-    f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en)*v1 - B1*(v1*B1 + v2*B2 + v3*B3) + equations.c_h*psi*B1
-    f6 = equations.c_h*psi
-    f7 = v1*B2 - v2*B1
-    f8 = v1*B3 - v3*B1
-    f9 = equations.c_h*B1
-  else #if orientation == 2
-    f1 = rho_v2
-    f2 = rho_v2*v1 - B2*B1
-    f3 = rho_v2*v2 + p + mag_en - B2^2
-    f4 = rho_v2*v3 - B2*B3
-    f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en)*v2 - B2*(v1*B1 + v2*B2 + v3*B3) + equations.c_h*psi*B2
-    f6 = v2*B1 - v1*B2
-    f7 = equations.c_h*psi
-    f8 = v2*B3 - v3*B2
-    f9 = equations.c_h*B2
-  end
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
+    mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
+    p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2)
+    p = (equations.gamma - 1) * p_over_gamma_minus_one
+    if orientation == 1
+        f1 = rho_v1
+        f2 = rho_v1 * v1 + p + mag_en - B1^2
+        f3 = rho_v1 * v2 - B1 * B2
+        f4 = rho_v1 * v3 - B1 * B3
+        f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v1 -
+             B1 * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B1
+        f6 = equations.c_h * psi
+        f7 = v1 * B2 - v2 * B1
+        f8 = v1 * B3 - v3 * B1
+        f9 = equations.c_h * B1
+    else #if orientation == 2
+        f1 = rho_v2
+        f2 = rho_v2 * v1 - B2 * B1
+        f3 = rho_v2 * v2 + p + mag_en - B2^2
+        f4 = rho_v2 * v3 - B2 * B3
+        f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v2 -
+             B2 * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B2
+        f6 = v2 * B1 - v1 * B2
+        f7 = equations.c_h * psi
+        f8 = v2 * B3 - v3 * B2
+        f9 = equations.c_h * B2
+    end
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
 end
 
 # Calculate 1D flux for a single point in the normal direction
 # Note, this directional vector is not normalized
-@inline function flux(u, normal_direction::AbstractVector, equations::IdealGlmMhdEquations2D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
-  mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
-  p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2)
-  p = (equations.gamma - 1) * p_over_gamma_minus_one
-
-  v_normal = v1 * normal_direction[1] + v2 * normal_direction[2]
-  B_normal = B1 * normal_direction[1] + B2 * normal_direction[2]
-  rho_v_normal = rho * v_normal
-
-  f1 = rho_v_normal
-  f2 = rho_v_normal * v1 - B1 * B_normal + (p + mag_en) * normal_direction[1]
-  f3 = rho_v_normal * v2 - B2 * B_normal + (p + mag_en) * normal_direction[2]
-  f4 = rho_v_normal * v3 - B3 * B_normal
-  f5 = ( (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en) * v_normal
-        - B_normal * (v1*B1 + v2*B2 + v3*B3) + equations.c_h * psi * B_normal )
-  f6 = equations.c_h * psi * normal_direction[1] + (v2 * B1 - v1 * B2) * normal_direction[2]
-  f7 = equations.c_h * psi * normal_direction[2] + (v1 * B2 - v2 * B1) * normal_direction[1]
-  f8 = v_normal * B3 - v3 * B_normal
-  f9 = equations.c_h * B_normal
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
+@inline function flux(u, normal_direction::AbstractVector,
+                      equations::IdealGlmMhdEquations2D)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
+    mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
+    p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2)
+    p = (equations.gamma - 1) * p_over_gamma_minus_one
+
+    v_normal = v1 * normal_direction[1] + v2 * normal_direction[2]
+    B_normal = B1 * normal_direction[1] + B2 * normal_direction[2]
+    rho_v_normal = rho * v_normal
+
+    f1 = rho_v_normal
+    f2 = rho_v_normal * v1 - B1 * B_normal + (p + mag_en) * normal_direction[1]
+    f3 = rho_v_normal * v2 - B2 * B_normal + (p + mag_en) * normal_direction[2]
+    f4 = rho_v_normal * v3 - B3 * B_normal
+    f5 = ((kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v_normal
+          -
+          B_normal * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B_normal)
+    f6 = equations.c_h * psi * normal_direction[1] +
+         (v2 * B1 - v1 * B2) * normal_direction[2]
+    f7 = equations.c_h * psi * normal_direction[2] +
+         (v1 * B2 - v2 * B1) * normal_direction[1]
+    f8 = v_normal * B3 - v3 * B_normal
+    f9 = equations.c_h * B_normal
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
 end
 
-
-
 """
     flux_nonconservative_powell(u_ll, u_rr, orientation::Integer,
                                 equations::IdealGlmMhdEquations2D)
@@ -203,78 +208,77 @@ terms.
 """
 @inline function flux_nonconservative_powell(u_ll, u_rr, orientation::Integer,
                                              equations::IdealGlmMhdEquations2D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
-
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-  v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
-
-  # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
-  # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2})
-  if orientation == 1
-    f = SVector(0,
-                B1_ll      * B1_rr,
-                B2_ll      * B1_rr,
-                B3_ll      * B1_rr,
-                v_dot_B_ll * B1_rr + v1_ll * psi_ll * psi_rr,
-                v1_ll      * B1_rr,
-                v2_ll      * B1_rr,
-                v3_ll      * B1_rr,
-                                     v1_ll * psi_rr)
-  else # orientation == 2
-    f = SVector(0,
-                B1_ll      * B2_rr,
-                B2_ll      * B2_rr,
-                B3_ll      * B2_rr,
-                v_dot_B_ll * B2_rr + v2_ll * psi_ll * psi_rr,
-                v1_ll      * B2_rr,
-                v2_ll      * B2_rr,
-                v3_ll      * B2_rr,
-                                     v2_ll * psi_rr)
-  end
-
-  return f
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+
+    # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
+    # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2})
+    if orientation == 1
+        f = SVector(0,
+                    B1_ll * B1_rr,
+                    B2_ll * B1_rr,
+                    B3_ll * B1_rr,
+                    v_dot_B_ll * B1_rr + v1_ll * psi_ll * psi_rr,
+                    v1_ll * B1_rr,
+                    v2_ll * B1_rr,
+                    v3_ll * B1_rr,
+                    v1_ll * psi_rr)
+    else # orientation == 2
+        f = SVector(0,
+                    B1_ll * B2_rr,
+                    B2_ll * B2_rr,
+                    B3_ll * B2_rr,
+                    v_dot_B_ll * B2_rr + v2_ll * psi_ll * psi_rr,
+                    v1_ll * B2_rr,
+                    v2_ll * B2_rr,
+                    v3_ll * B2_rr,
+                    v2_ll * psi_rr)
+    end
+
+    return f
 end
 
 @inline function flux_nonconservative_powell(u_ll, u_rr,
                                              normal_direction_ll::AbstractVector,
                                              normal_direction_average::AbstractVector,
                                              equations::IdealGlmMhdEquations2D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
-
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-  v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
-
-  # Note that `v_dot_n_ll` uses the `normal_direction_ll` (contravariant vector
-  # at the same node location) while `B_dot_n_rr` uses the averaged normal
-  # direction. The reason for this is that `v_dot_n_ll` depends only on the left
-  # state and multiplies some gradient while `B_dot_n_rr` is used to compute
-  # the divergence of B.
-  v_dot_n_ll = v1_ll * normal_direction_ll[1]      + v2_ll * normal_direction_ll[2]
-  B_dot_n_rr = B1_rr * normal_direction_average[1] + B2_rr * normal_direction_average[2]
-
-  # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
-  # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2})
-  f = SVector(0,
-              B1_ll      * B_dot_n_rr,
-              B2_ll      * B_dot_n_rr,
-              B3_ll      * B_dot_n_rr,
-              v_dot_B_ll * B_dot_n_rr + v_dot_n_ll * psi_ll * psi_rr,
-              v1_ll      * B_dot_n_rr,
-              v2_ll      * B_dot_n_rr,
-              v3_ll      * B_dot_n_rr,
-                                        v_dot_n_ll * psi_rr)
-
-  return f
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+
+    # Note that `v_dot_n_ll` uses the `normal_direction_ll` (contravariant vector
+    # at the same node location) while `B_dot_n_rr` uses the averaged normal
+    # direction. The reason for this is that `v_dot_n_ll` depends only on the left
+    # state and multiplies some gradient while `B_dot_n_rr` is used to compute
+    # the divergence of B.
+    v_dot_n_ll = v1_ll * normal_direction_ll[1] + v2_ll * normal_direction_ll[2]
+    B_dot_n_rr = B1_rr * normal_direction_average[1] +
+                 B2_rr * normal_direction_average[2]
+
+    # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
+    # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2})
+    f = SVector(0,
+                B1_ll * B_dot_n_rr,
+                B2_ll * B_dot_n_rr,
+                B3_ll * B_dot_n_rr,
+                v_dot_B_ll * B_dot_n_rr + v_dot_n_ll * psi_ll * psi_rr,
+                v1_ll * B_dot_n_rr,
+                v2_ll * B_dot_n_rr,
+                v3_ll * B_dot_n_rr,
+                v_dot_n_ll * psi_rr)
+
+    return f
 end
 
-
-
 """
     flux_derigs_etal(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations2D)
 
@@ -284,83 +288,89 @@ Entropy conserving two-point flux by
   divergence diminishing ideal magnetohydrodynamics equations
   [DOI: 10.1016/j.jcp.2018.03.002](https://doi.org/10.1016/j.jcp.2018.03.002)
 """
-function flux_derigs_etal(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D)
-  # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
-
-  v1_ll = rho_v1_ll/rho_ll
-  v2_ll = rho_v2_ll/rho_ll
-  v3_ll = rho_v3_ll/rho_ll
-  v1_rr = rho_v1_rr/rho_rr
-  v2_rr = rho_v2_rr/rho_rr
-  v3_rr = rho_v3_rr/rho_rr
-  vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2
-  vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2
-  mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
-  mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
-  p_ll = (equations.gamma - 1)*(rho_e_ll - 0.5*rho_ll*vel_norm_ll - 0.5*mag_norm_ll - 0.5*psi_ll^2)
-  p_rr = (equations.gamma - 1)*(rho_e_rr - 0.5*rho_rr*vel_norm_rr - 0.5*mag_norm_rr - 0.5*psi_rr^2)
-  beta_ll = 0.5*rho_ll/p_ll
-  beta_rr = 0.5*rho_rr/p_rr
-  # for convenience store v⋅B
-  vel_dot_mag_ll = v1_ll*B1_ll + v2_ll*B2_ll + v3_ll*B3_ll
-  vel_dot_mag_rr = v1_rr*B1_rr + v2_rr*B2_rr + v3_rr*B3_rr
-
-  # Compute the necessary mean values needed for either direction
-  rho_avg  = 0.5*(rho_ll+rho_rr)
-  rho_mean = ln_mean(rho_ll,rho_rr)
-  beta_mean = ln_mean(beta_ll,beta_rr)
-  beta_avg = 0.5*(beta_ll+beta_rr)
-  v1_avg = 0.5*(v1_ll+v1_rr)
-  v2_avg = 0.5*(v2_ll+v2_rr)
-  v3_avg = 0.5*(v3_ll+v3_rr)
-  p_mean = 0.5*rho_avg/beta_avg
-  B1_avg = 0.5*(B1_ll+B1_rr)
-  B2_avg = 0.5*(B2_ll+B2_rr)
-  B3_avg = 0.5*(B3_ll+B3_rr)
-  psi_avg = 0.5*(psi_ll+psi_rr)
-  vel_norm_avg = 0.5*(vel_norm_ll+vel_norm_rr)
-  mag_norm_avg = 0.5*(mag_norm_ll+mag_norm_rr)
-  vel_dot_mag_avg = 0.5*(vel_dot_mag_ll+vel_dot_mag_rr)
-
-  # Calculate fluxes depending on orientation with specific direction averages
-  if orientation == 1
-    f1 = rho_mean*v1_avg
-    f2 = f1*v1_avg + p_mean + 0.5*mag_norm_avg - B1_avg*B1_avg
-    f3 = f1*v2_avg - B1_avg*B2_avg
-    f4 = f1*v3_avg - B1_avg*B3_avg
-    f6 = equations.c_h*psi_avg
-    f7 = v1_avg*B2_avg - v2_avg*B1_avg
-    f8 = v1_avg*B3_avg - v3_avg*B1_avg
-    f9 = equations.c_h*B1_avg
-    # total energy flux is complicated and involves the previous eight components
-    psi_B1_avg = 0.5*(B1_ll*psi_ll + B1_rr*psi_rr)
-    v1_mag_avg = 0.5*(v1_ll*mag_norm_ll + v1_rr*mag_norm_rr)
-    f5 = (f1*0.5*(1/(equations.gamma-1)/beta_mean - vel_norm_avg) + f2*v1_avg + f3*v2_avg +
-          f4*v3_avg + f6*B1_avg + f7*B2_avg + f8*B3_avg + f9*psi_avg - 0.5*v1_mag_avg +
-          B1_avg*vel_dot_mag_avg - equations.c_h*psi_B1_avg)
-  else
-    f1 = rho_mean*v2_avg
-    f2 = f1*v1_avg - B1_avg*B2_avg
-    f3 = f1*v2_avg + p_mean + 0.5*mag_norm_avg - B2_avg*B2_avg
-    f4 = f1*v3_avg - B2_avg*B3_avg
-    f6 = v2_avg*B1_avg - v1_avg*B2_avg
-    f7 = equations.c_h*psi_avg
-    f8 = v2_avg*B3_avg - v3_avg*B2_avg
-    f9 = equations.c_h*B2_avg
-    # total energy flux is complicated and involves the previous eight components
-    psi_B2_avg = 0.5*(B2_ll*psi_ll + B2_rr*psi_rr)
-    v2_mag_avg = 0.5*(v2_ll*mag_norm_ll + v2_rr*mag_norm_rr)
-    f5 = (f1*0.5*(1/(equations.gamma-1)/beta_mean - vel_norm_avg) + f2*v1_avg + f3*v2_avg +
-          f4*v3_avg + f6*B1_avg + f7*B2_avg + f8*B3_avg + f9*psi_avg - 0.5*v2_mag_avg +
-          B2_avg*vel_dot_mag_avg - equations.c_h*psi_B2_avg)
-  end
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
+function flux_derigs_etal(u_ll, u_rr, orientation::Integer,
+                          equations::IdealGlmMhdEquations2D)
+    # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+    vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2
+    vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2
+    mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
+    mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
+    p_ll = (equations.gamma - 1) *
+           (rho_e_ll - 0.5 * rho_ll * vel_norm_ll - 0.5 * mag_norm_ll - 0.5 * psi_ll^2)
+    p_rr = (equations.gamma - 1) *
+           (rho_e_rr - 0.5 * rho_rr * vel_norm_rr - 0.5 * mag_norm_rr - 0.5 * psi_rr^2)
+    beta_ll = 0.5 * rho_ll / p_ll
+    beta_rr = 0.5 * rho_rr / p_rr
+    # for convenience store v⋅B
+    vel_dot_mag_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+    vel_dot_mag_rr = v1_rr * B1_rr + v2_rr * B2_rr + v3_rr * B3_rr
+
+    # Compute the necessary mean values needed for either direction
+    rho_avg = 0.5 * (rho_ll + rho_rr)
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    beta_mean = ln_mean(beta_ll, beta_rr)
+    beta_avg = 0.5 * (beta_ll + beta_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_mean = 0.5 * rho_avg / beta_avg
+    B1_avg = 0.5 * (B1_ll + B1_rr)
+    B2_avg = 0.5 * (B2_ll + B2_rr)
+    B3_avg = 0.5 * (B3_ll + B3_rr)
+    psi_avg = 0.5 * (psi_ll + psi_rr)
+    vel_norm_avg = 0.5 * (vel_norm_ll + vel_norm_rr)
+    mag_norm_avg = 0.5 * (mag_norm_ll + mag_norm_rr)
+    vel_dot_mag_avg = 0.5 * (vel_dot_mag_ll + vel_dot_mag_rr)
+
+    # Calculate fluxes depending on orientation with specific direction averages
+    if orientation == 1
+        f1 = rho_mean * v1_avg
+        f2 = f1 * v1_avg + p_mean + 0.5 * mag_norm_avg - B1_avg * B1_avg
+        f3 = f1 * v2_avg - B1_avg * B2_avg
+        f4 = f1 * v3_avg - B1_avg * B3_avg
+        f6 = equations.c_h * psi_avg
+        f7 = v1_avg * B2_avg - v2_avg * B1_avg
+        f8 = v1_avg * B3_avg - v3_avg * B1_avg
+        f9 = equations.c_h * B1_avg
+        # total energy flux is complicated and involves the previous eight components
+        psi_B1_avg = 0.5 * (B1_ll * psi_ll + B1_rr * psi_rr)
+        v1_mag_avg = 0.5 * (v1_ll * mag_norm_ll + v1_rr * mag_norm_rr)
+        f5 = (f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - vel_norm_avg) +
+              f2 * v1_avg + f3 * v2_avg +
+              f4 * v3_avg + f6 * B1_avg + f7 * B2_avg + f8 * B3_avg + f9 * psi_avg -
+              0.5 * v1_mag_avg +
+              B1_avg * vel_dot_mag_avg - equations.c_h * psi_B1_avg)
+    else
+        f1 = rho_mean * v2_avg
+        f2 = f1 * v1_avg - B1_avg * B2_avg
+        f3 = f1 * v2_avg + p_mean + 0.5 * mag_norm_avg - B2_avg * B2_avg
+        f4 = f1 * v3_avg - B2_avg * B3_avg
+        f6 = v2_avg * B1_avg - v1_avg * B2_avg
+        f7 = equations.c_h * psi_avg
+        f8 = v2_avg * B3_avg - v3_avg * B2_avg
+        f9 = equations.c_h * B2_avg
+        # total energy flux is complicated and involves the previous eight components
+        psi_B2_avg = 0.5 * (B2_ll * psi_ll + B2_rr * psi_rr)
+        v2_mag_avg = 0.5 * (v2_ll * mag_norm_ll + v2_rr * mag_norm_rr)
+        f5 = (f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - vel_norm_avg) +
+              f2 * v1_avg + f3 * v2_avg +
+              f4 * v3_avg + f6 * B1_avg + f7 * B2_avg + f8 * B3_avg + f9 * psi_avg -
+              0.5 * v2_mag_avg +
+              B2_avg * vel_dot_mag_avg - equations.c_h * psi_B2_avg)
+    end
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
 end
 
-
 """
     flux_hindenlang_gassner(u_ll, u_rr, orientation_or_normal_direction,
                             equations::IdealGlmMhdEquations2D)
@@ -383,172 +393,198 @@ Hindenlang and Gassner (2019), extending [`flux_ranocha`](@ref) to the MHD equat
   the Euler Equations Using Summation-by-Parts Operators
   [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42)
 """
-@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D)
-  # Unpack left and right states
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations)
-
-  # Compute the necessary mean values needed for either direction
-  rho_mean = ln_mean(rho_ll, rho_rr)
-  # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-  # in exact arithmetic since
-  #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-  #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-  inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-  v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-  v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-  v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-  p_avg   = 0.5 * (  p_ll +   p_rr)
-  psi_avg = 0.5 * (psi_ll + psi_rr)
-  velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-  magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
-
-  # Calculate fluxes depending on orientation with specific direction averages
-  if orientation == 1
-    f1 = rho_mean * v1_avg
-    f2 = f1 * v1_avg + p_avg + magnetic_square_avg - 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll)
-    f3 = f1 * v2_avg                               - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll)
-    f4 = f1 * v3_avg                               - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll)
-    #f5 below
-    f6 = equations.c_h * psi_avg
-    f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr)
-    f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr)
-    f9 = equations.c_h * 0.5 * (B1_ll + B1_rr)
-    # total energy flux is complicated and involves the previous components
-    f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-          + 0.5 * (
-            +   p_ll * v1_rr +  p_rr * v1_ll
-            + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll)
-            + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll)
-            - (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll)
-            - (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll)
-            + equations.c_h * (B1_ll * psi_rr + B1_rr * psi_ll) ) )
-  else # orientation == 2
-    f1 = rho_mean * v2_avg
-    f2 = f1 * v1_avg                               - 0.5 * (B2_ll * B1_rr + B2_rr * B1_ll)
-    f3 = f1 * v2_avg + p_avg + magnetic_square_avg - 0.5 * (B2_ll * B2_rr + B2_rr * B2_ll)
-    f4 = f1 * v3_avg                               - 0.5 * (B2_ll * B3_rr + B2_rr * B3_ll)
-    #f5 below
-    f6 = 0.5 * (v2_ll * B1_ll - v1_ll * B2_ll + v2_rr * B1_rr - v1_rr * B2_rr)
-    f7 = equations.c_h * psi_avg
-    f8 = 0.5 * (v2_ll * B3_ll - v3_ll * B2_ll + v2_rr * B3_rr - v3_rr * B2_rr)
-    f9 = equations.c_h * 0.5 * (B2_ll + B2_rr)
-    # total energy flux is complicated and involves the previous components
-    f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-          + 0.5 * (
-            +   p_ll * v2_rr +  p_rr * v2_ll
-            + (v2_ll * B1_ll * B1_rr + v2_rr * B1_rr * B1_ll)
-            + (v2_ll * B3_ll * B3_rr + v2_rr * B3_rr * B3_ll)
-            - (v1_ll * B2_ll * B1_rr + v1_rr * B2_rr * B1_ll)
-            - (v3_ll * B2_ll * B3_rr + v3_rr * B2_rr * B3_ll)
-            + equations.c_h * (B2_ll * psi_rr + B2_rr * psi_ll) ) )
-  end
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
+@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer,
+                                         equations::IdealGlmMhdEquations2D)
+    # Unpack left and right states
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll,
+                                                                               equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr,
+                                                                               equations)
+
+    # Compute the necessary mean values needed for either direction
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+    # in exact arithmetic since
+    #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+    #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    psi_avg = 0.5 * (psi_ll + psi_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+    magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
+
+    # Calculate fluxes depending on orientation with specific direction averages
+    if orientation == 1
+        f1 = rho_mean * v1_avg
+        f2 = f1 * v1_avg + p_avg + magnetic_square_avg -
+             0.5 * (B1_ll * B1_rr + B1_rr * B1_ll)
+        f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll)
+        f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll)
+        #f5 below
+        f6 = equations.c_h * psi_avg
+        f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr)
+        f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr)
+        f9 = equations.c_h * 0.5 * (B1_ll + B1_rr)
+        # total energy flux is complicated and involves the previous components
+        f5 = (f1 *
+              (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+              +
+              0.5 * (+p_ll * v1_rr + p_rr * v1_ll
+               + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll)
+               + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll)
+               -
+               (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll)
+               -
+               (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll)
+               +
+               equations.c_h * (B1_ll * psi_rr + B1_rr * psi_ll)))
+    else # orientation == 2
+        f1 = rho_mean * v2_avg
+        f2 = f1 * v1_avg - 0.5 * (B2_ll * B1_rr + B2_rr * B1_ll)
+        f3 = f1 * v2_avg + p_avg + magnetic_square_avg -
+             0.5 * (B2_ll * B2_rr + B2_rr * B2_ll)
+        f4 = f1 * v3_avg - 0.5 * (B2_ll * B3_rr + B2_rr * B3_ll)
+        #f5 below
+        f6 = 0.5 * (v2_ll * B1_ll - v1_ll * B2_ll + v2_rr * B1_rr - v1_rr * B2_rr)
+        f7 = equations.c_h * psi_avg
+        f8 = 0.5 * (v2_ll * B3_ll - v3_ll * B2_ll + v2_rr * B3_rr - v3_rr * B2_rr)
+        f9 = equations.c_h * 0.5 * (B2_ll + B2_rr)
+        # total energy flux is complicated and involves the previous components
+        f5 = (f1 *
+              (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+              +
+              0.5 * (+p_ll * v2_rr + p_rr * v2_ll
+               + (v2_ll * B1_ll * B1_rr + v2_rr * B1_rr * B1_ll)
+               + (v2_ll * B3_ll * B3_rr + v2_rr * B3_rr * B3_ll)
+               -
+               (v1_ll * B2_ll * B1_rr + v1_rr * B2_rr * B1_ll)
+               -
+               (v3_ll * B2_ll * B3_rr + v3_rr * B2_rr * B3_ll)
+               +
+               equations.c_h * (B2_ll * psi_rr + B2_rr * psi_ll)))
+    end
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
 end
 
 @inline function flux_hindenlang_gassner(u_ll, u_rr, normal_direction::AbstractVector,
                                          equations::IdealGlmMhdEquations2D)
-  # Unpack left and right states
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations)
-  v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
-  v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
-  B_dot_n_ll = B1_ll * normal_direction[1] + B2_ll * normal_direction[2]
-  B_dot_n_rr = B1_rr * normal_direction[1] + B2_rr * normal_direction[2]
-
-  # Compute the necessary mean values needed for either direction
-  rho_mean = ln_mean(rho_ll, rho_rr)
-  # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-  # in exact arithmetic since
-  #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-  #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-  inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-  v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-  v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-  v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-  p_avg   = 0.5 * (  p_ll +   p_rr)
-  psi_avg = 0.5 * (psi_ll + psi_rr)
-  velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-  magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
-
-  # Calculate fluxes depending on normal_direction
-  f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
-  f2 = ( f1 * v1_avg + (p_avg + magnetic_square_avg) * normal_direction[1]
-        - 0.5 * (B_dot_n_ll * B1_rr + B_dot_n_rr * B1_ll) )
-  f3 = ( f1 * v2_avg + (p_avg + magnetic_square_avg) * normal_direction[2]
-        - 0.5 * (B_dot_n_ll * B2_rr + B_dot_n_rr * B2_ll) )
-  f4 = ( f1 * v3_avg
-        - 0.5 * (B_dot_n_ll * B3_rr + B_dot_n_rr * B3_ll) )
-  #f5 below
-  f6 = ( equations.c_h * psi_avg * normal_direction[1]
-        + 0.5 * (v_dot_n_ll * B1_ll - v1_ll * B_dot_n_ll +
-                 v_dot_n_rr * B1_rr - v1_rr * B_dot_n_rr) )
-  f7 = ( equations.c_h * psi_avg * normal_direction[2]
-        + 0.5 * (v_dot_n_ll * B2_ll - v2_ll * B_dot_n_ll +
-                 v_dot_n_rr * B2_rr - v2_rr * B_dot_n_rr) )
-  f8 =  + 0.5 * (v_dot_n_ll * B3_ll - v3_ll * B_dot_n_ll +
-                 v_dot_n_rr * B3_rr - v3_rr * B_dot_n_rr)
-  f9 = equations.c_h * 0.5 * (B_dot_n_ll + B_dot_n_rr)
-  # total energy flux is complicated and involves the previous components
-  f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-        + 0.5 * (
-          +   p_ll * v_dot_n_rr +  p_rr * v_dot_n_ll
-          + (v_dot_n_ll * B1_ll * B1_rr + v_dot_n_rr * B1_rr * B1_ll)
-          + (v_dot_n_ll * B2_ll * B2_rr + v_dot_n_rr * B2_rr * B2_ll)
-          + (v_dot_n_ll * B3_ll * B3_rr + v_dot_n_rr * B3_rr * B3_ll)
-          - (v1_ll * B_dot_n_ll * B1_rr + v1_rr * B_dot_n_rr * B1_ll)
-          - (v2_ll * B_dot_n_ll * B2_rr + v2_rr * B_dot_n_rr * B2_ll)
-          - (v3_ll * B_dot_n_ll * B3_rr + v3_rr * B_dot_n_rr * B3_ll)
-          + equations.c_h * (B_dot_n_ll * psi_rr + B_dot_n_rr * psi_ll) ) )
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
+    # Unpack left and right states
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll,
+                                                                               equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr,
+                                                                               equations)
+    v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
+    v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
+    B_dot_n_ll = B1_ll * normal_direction[1] + B2_ll * normal_direction[2]
+    B_dot_n_rr = B1_rr * normal_direction[1] + B2_rr * normal_direction[2]
+
+    # Compute the necessary mean values needed for either direction
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+    # in exact arithmetic since
+    #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+    #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    psi_avg = 0.5 * (psi_ll + psi_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+    magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
+
+    # Calculate fluxes depending on normal_direction
+    f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
+    f2 = (f1 * v1_avg + (p_avg + magnetic_square_avg) * normal_direction[1]
+          -
+          0.5 * (B_dot_n_ll * B1_rr + B_dot_n_rr * B1_ll))
+    f3 = (f1 * v2_avg + (p_avg + magnetic_square_avg) * normal_direction[2]
+          -
+          0.5 * (B_dot_n_ll * B2_rr + B_dot_n_rr * B2_ll))
+    f4 = (f1 * v3_avg
+          -
+          0.5 * (B_dot_n_ll * B3_rr + B_dot_n_rr * B3_ll))
+    #f5 below
+    f6 = (equations.c_h * psi_avg * normal_direction[1]
+          +
+          0.5 * (v_dot_n_ll * B1_ll - v1_ll * B_dot_n_ll +
+           v_dot_n_rr * B1_rr - v1_rr * B_dot_n_rr))
+    f7 = (equations.c_h * psi_avg * normal_direction[2]
+          +
+          0.5 * (v_dot_n_ll * B2_ll - v2_ll * B_dot_n_ll +
+           v_dot_n_rr * B2_rr - v2_rr * B_dot_n_rr))
+    f8 = +0.5 * (v_dot_n_ll * B3_ll - v3_ll * B_dot_n_ll +
+          v_dot_n_rr * B3_rr - v3_rr * B_dot_n_rr)
+    f9 = equations.c_h * 0.5 * (B_dot_n_ll + B_dot_n_rr)
+    # total energy flux is complicated and involves the previous components
+    f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+          +
+          0.5 * (+p_ll * v_dot_n_rr + p_rr * v_dot_n_ll
+           + (v_dot_n_ll * B1_ll * B1_rr + v_dot_n_rr * B1_rr * B1_ll)
+           + (v_dot_n_ll * B2_ll * B2_rr + v_dot_n_rr * B2_rr * B2_ll)
+           + (v_dot_n_ll * B3_ll * B3_rr + v_dot_n_rr * B3_rr * B3_ll)
+           -
+           (v1_ll * B_dot_n_ll * B1_rr + v1_rr * B_dot_n_rr * B1_ll)
+           -
+           (v2_ll * B_dot_n_ll * B2_rr + v2_rr * B_dot_n_rr * B2_ll)
+           -
+           (v3_ll * B_dot_n_ll * B3_rr + v3_rr * B_dot_n_rr * B3_ll)
+           +
+           equations.c_h * (B_dot_n_ll * psi_rr + B_dot_n_rr * psi_ll)))
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D)
-  rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr
-
-  # Calculate the left/right velocities and fast magnetoacoustic wave speeds
-  if orientation == 1
-    v_ll = rho_v1_ll / rho_ll
-    v_rr = rho_v1_rr / rho_rr
-  else # orientation == 2
-    v_ll = rho_v2_ll / rho_ll
-    v_rr = rho_v2_rr / rho_rr
-  end
-  cf_ll = calc_fast_wavespeed(u_ll, orientation, equations)
-  cf_rr = calc_fast_wavespeed(u_rr, orientation, equations)
-
-  return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::IdealGlmMhdEquations2D)
+    rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr
+
+    # Calculate the left/right velocities and fast magnetoacoustic wave speeds
+    if orientation == 1
+        v_ll = rho_v1_ll / rho_ll
+        v_rr = rho_v1_rr / rho_rr
+    else # orientation == 2
+        v_ll = rho_v2_ll / rho_ll
+        v_rr = rho_v2_rr / rho_rr
+    end
+    cf_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+    cf_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+
+    return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
 end
 
-@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::IdealGlmMhdEquations2D)
-  # return max(v_mag_ll, v_mag_rr) + max(cf_ll, cf_rr)
-  rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr
-
-  # Calculate normal velocities and fast magnetoacoustic wave speeds
-  # left
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v_ll = (  v1_ll * normal_direction[1]
-          + v2_ll * normal_direction[2] )
-  cf_ll = calc_fast_wavespeed(u_ll, normal_direction, equations)
-  # right
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
-  v_rr = (  v1_rr * normal_direction[1]
-          + v2_rr * normal_direction[2] )
-  cf_rr = calc_fast_wavespeed(u_rr, normal_direction, equations)
-
-  # wave speeds already scaled by norm(normal_direction) in [`calc_fast_wavespeed`](@ref)
-  return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
+@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::IdealGlmMhdEquations2D)
+    # return max(v_mag_ll, v_mag_rr) + max(cf_ll, cf_rr)
+    rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr
+
+    # Calculate normal velocities and fast magnetoacoustic wave speeds
+    # left
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v_ll = (v1_ll * normal_direction[1]
+            +
+            v2_ll * normal_direction[2])
+    cf_ll = calc_fast_wavespeed(u_ll, normal_direction, equations)
+    # right
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v_rr = (v1_rr * normal_direction[1]
+            +
+            v2_rr * normal_direction[2])
+    cf_rr = calc_fast_wavespeed(u_rr, normal_direction, equations)
+
+    # wave speeds already scaled by norm(normal_direction) in [`calc_fast_wavespeed`](@ref)
+    return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
 end
 
-
 """
     min_max_speed_naive(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations2D)
 
@@ -557,295 +593,298 @@ Calculate minimum and maximum wave speeds for HLL-type fluxes as in
   An HLLC Riemann solver for magneto-hydrodynamics
   [DOI: 10.1016/j.jcp.2004.08.020](https://doi.org/10.1016/j.jcp.2004.08.020)
 """
-@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D)
-  rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr
-
-  # Calculate primitive velocity variables
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
-
-  # Approximate the left-most and right-most eigenvalues in the Riemann fan
-  if orientation == 1 # x-direction
-    c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
-    c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
-    vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations)
-    λ_min = min(v1_ll - c_f_ll, vel_roe - c_f_roe)
-    λ_max = max(v1_rr + c_f_rr, vel_roe + c_f_roe)
-  else # y-direction
-    c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
-    c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
-    vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations)
-    λ_min = min(v2_ll - c_f_ll, vel_roe - c_f_roe)
-    λ_max = max(v2_rr + c_f_rr, vel_roe + c_f_roe)
-  end
-
-  return λ_min, λ_max
+@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::IdealGlmMhdEquations2D)
+    rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr
+
+    # Calculate primitive velocity variables
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+
+    # Approximate the left-most and right-most eigenvalues in the Riemann fan
+    if orientation == 1 # x-direction
+        c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+        c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+        vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations)
+        λ_min = min(v1_ll - c_f_ll, vel_roe - c_f_roe)
+        λ_max = max(v1_rr + c_f_rr, vel_roe + c_f_roe)
+    else # y-direction
+        c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+        c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+        vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations)
+        λ_min = min(v2_ll - c_f_ll, vel_roe - c_f_roe)
+        λ_max = max(v2_rr + c_f_rr, vel_roe + c_f_roe)
+    end
+
+    return λ_min, λ_max
 end
 
 @inline function min_max_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
                                      equations::IdealGlmMhdEquations2D)
-  rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr
+    rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr
 
-  # Calculate primitive velocity variables
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
+    # Calculate primitive velocity variables
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
 
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
 
-  v_normal_ll = (v1_ll * normal_direction[1] +
-                 v2_ll * normal_direction[2])
-  v_normal_rr = (v1_rr * normal_direction[1] +
-                 v2_rr * normal_direction[2])
+    v_normal_ll = (v1_ll * normal_direction[1] +
+                   v2_ll * normal_direction[2])
+    v_normal_rr = (v1_rr * normal_direction[1] +
+                   v2_rr * normal_direction[2])
 
-  c_f_ll = calc_fast_wavespeed(u_ll, normal_direction, equations)
-  c_f_rr = calc_fast_wavespeed(u_rr, normal_direction, equations)
-  v_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction, equations)
+    c_f_ll = calc_fast_wavespeed(u_ll, normal_direction, equations)
+    c_f_rr = calc_fast_wavespeed(u_rr, normal_direction, equations)
+    v_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction, equations)
 
-  # Estimate the min/max eigenvalues in the normal direction
-  λ_min = min(v_normal_ll - c_f_ll, v_roe - c_f_roe)
-  λ_max = max(v_normal_rr + c_f_rr, v_roe + c_f_roe)
+    # Estimate the min/max eigenvalues in the normal direction
+    λ_min = min(v_normal_ll - c_f_ll, v_roe - c_f_roe)
+    λ_max = max(v_normal_rr + c_f_rr, v_roe + c_f_roe)
 
-  return λ_min, λ_max
+    return λ_min, λ_max
 end
 
-
 # Called inside `FluxRotated` in `numerical_fluxes.jl` so the direction
 # has been normalized prior to this rotation of the state vector
 @inline function rotate_to_x(u, normal_vector, equations::IdealGlmMhdEquations2D)
-  # cos and sin of the angle between the x-axis and the normalized normal_vector are
-  # the normalized vector's x and y coordinates respectively (see unit circle).
-  c = normal_vector[1]
-  s = normal_vector[2]
-
-  # Apply the 2D rotation matrix with normal and tangent directions of the form
-  # [ 1   0    0   0   0   0    0   0   0;
-  #   0  n_1  n_2  0   0   0    0   0   0;
-  #   0  t_1  t_2  0   0   0    0   0   0;
-  #   0   0    0   1   0   0    0   0   0;
-  #   0   0    0   0   1   0    0   0   0;
-  #   0   0    0   0   0  n_1  n_2  0   0;
-  #   0   0    0   0   0  t_1  t_2  0   0;
-  #   0   0    0   0   0   0    0   1   0;
-  #   0   0    0   0   0   0    0   0   1 ]
-  # where t_1 = -n_2 and t_2 = n_1.
-  # Note for IdealGlmMhdEquations2D only the velocities and magnetic field variables rotate
-
-  return SVector(u[1],
-                  c * u[2] + s * u[3],
-                 -s * u[2] + c * u[3],
-                 u[4],
-                 u[5],
-                  c * u[6] + s * u[7],
-                 -s * u[6] + c * u[7],
-                 u[8],
-                 u[9])
+    # cos and sin of the angle between the x-axis and the normalized normal_vector are
+    # the normalized vector's x and y coordinates respectively (see unit circle).
+    c = normal_vector[1]
+    s = normal_vector[2]
+
+    # Apply the 2D rotation matrix with normal and tangent directions of the form
+    # [ 1   0    0   0   0   0    0   0   0;
+    #   0  n_1  n_2  0   0   0    0   0   0;
+    #   0  t_1  t_2  0   0   0    0   0   0;
+    #   0   0    0   1   0   0    0   0   0;
+    #   0   0    0   0   1   0    0   0   0;
+    #   0   0    0   0   0  n_1  n_2  0   0;
+    #   0   0    0   0   0  t_1  t_2  0   0;
+    #   0   0    0   0   0   0    0   1   0;
+    #   0   0    0   0   0   0    0   0   1 ]
+    # where t_1 = -n_2 and t_2 = n_1.
+    # Note for IdealGlmMhdEquations2D only the velocities and magnetic field variables rotate
+
+    return SVector(u[1],
+                   c * u[2] + s * u[3],
+                   -s * u[2] + c * u[3],
+                   u[4],
+                   u[5],
+                   c * u[6] + s * u[7],
+                   -s * u[6] + c * u[7],
+                   u[8],
+                   u[9])
 end
 
-
 # Called inside `FluxRotated` in `numerical_fluxes.jl` so the direction
 # has been normalized prior to this back-rotation of the state vector
 @inline function rotate_from_x(u, normal_vector, equations::IdealGlmMhdEquations2D)
-  # cos and sin of the angle between the x-axis and the normalized normal_vector are
-  # the normalized vector's x and y coordinates respectively (see unit circle).
-  c = normal_vector[1]
-  s = normal_vector[2]
-
-  # Apply the 2D back-rotation matrix with normal and tangent directions of the form
-  # [ 1   0    0   0   0   0    0   0   0;
-  #   0  n_1  t_1  0   0   0    0   0   0;
-  #   0  n_2  t_2  0   0   0    0   0   0;
-  #   0   0    0   1   0   0    0   0   0;
-  #   0   0    0   0   1   0    0   0   0;
-  #   0   0    0   0   0  n_1  t_1  0   0;
-  #   0   0    0   0   0  n_2  t_2  0   0;
-  #   0   0    0   0   0   0    0   1   0;
-  #   0   0    0   0   0   0    0   0   1 ]
-  # where t_1 = -n_2 and t_2 = n_1.
-  # Note for IdealGlmMhdEquations2D the velocities and magnetic field variables back-rotate
-
-  return SVector(u[1],
-                 c * u[2] - s * u[3],
-                 s * u[2] + c * u[3],
-                 u[4],
-                 u[5],
-                 c * u[6] - s * u[7],
-                 s * u[6] + c * u[7],
-                 u[8],
-                 u[9])
+    # cos and sin of the angle between the x-axis and the normalized normal_vector are
+    # the normalized vector's x and y coordinates respectively (see unit circle).
+    c = normal_vector[1]
+    s = normal_vector[2]
+
+    # Apply the 2D back-rotation matrix with normal and tangent directions of the form
+    # [ 1   0    0   0   0   0    0   0   0;
+    #   0  n_1  t_1  0   0   0    0   0   0;
+    #   0  n_2  t_2  0   0   0    0   0   0;
+    #   0   0    0   1   0   0    0   0   0;
+    #   0   0    0   0   1   0    0   0   0;
+    #   0   0    0   0   0  n_1  t_1  0   0;
+    #   0   0    0   0   0  n_2  t_2  0   0;
+    #   0   0    0   0   0   0    0   1   0;
+    #   0   0    0   0   0   0    0   0   1 ]
+    # where t_1 = -n_2 and t_2 = n_1.
+    # Note for IdealGlmMhdEquations2D the velocities and magnetic field variables back-rotate
+
+    return SVector(u[1],
+                   c * u[2] - s * u[3],
+                   s * u[2] + c * u[3],
+                   u[4],
+                   u[5],
+                   c * u[6] - s * u[7],
+                   s * u[6] + c * u[7],
+                   u[8],
+                   u[9])
 end
 
-
-
 @inline function max_abs_speeds(u, equations::IdealGlmMhdEquations2D)
-  rho, rho_v1, rho_v2, rho_v3, _ = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  cf_x_direction = calc_fast_wavespeed(u, 1, equations)
-  cf_y_direction = calc_fast_wavespeed(u, 2, equations)
-
-  return abs(v1) + cf_x_direction, abs(v2) + cf_y_direction
+    rho, rho_v1, rho_v2, rho_v3, _ = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    cf_x_direction = calc_fast_wavespeed(u, 1, equations)
+    cf_y_direction = calc_fast_wavespeed(u, 2, equations)
+
+    return abs(v1) + cf_x_direction, abs(v2) + cf_y_direction
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::IdealGlmMhdEquations2D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
 
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3
-                                              + B1 * B1 + B2 * B2 + B3 * B3
-                                              + psi * psi))
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    p = (equations.gamma - 1) * (rho_e -
+         0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3
+          + B1 * B1 + B2 * B2 + B3 * B3
+          + psi * psi))
 
-  return SVector(rho, v1, v2, v3, p, B1, B2, B3, psi)
+    return SVector(rho, v1, v2, v3, p, B1, B2, B3, psi)
 end
 
-
 # Convert conservative variables to entropy variables
 @inline function cons2entropy(u, equations::IdealGlmMhdEquations2D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  v_square = v1^2 + v2^2 + v3^2
-  p = (equations.gamma - 1) * (rho_e - 0.5*rho*v_square - 0.5*(B1^2 + B2^2 + B3^2) - 0.5*psi^2)
-  s = log(p) - equations.gamma*log(rho)
-  rho_p = rho / p
-
-  w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square
-  w2 = rho_p * v1
-  w3 = rho_p * v2
-  w4 = rho_p * v3
-  w5 = -rho_p
-  w6 = rho_p * B1
-  w7 = rho_p * B2
-  w8 = rho_p * B3
-  w9 = rho_p * psi
-
-  return SVector(w1, w2, w3, w4, w5, w6, w7, w8, w9)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    v_square = v1^2 + v2^2 + v3^2
+    p = (equations.gamma - 1) *
+        (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2) - 0.5 * psi^2)
+    s = log(p) - equations.gamma * log(rho)
+    rho_p = rho / p
+
+    w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square
+    w2 = rho_p * v1
+    w3 = rho_p * v2
+    w4 = rho_p * v3
+    w5 = -rho_p
+    w6 = rho_p * B1
+    w7 = rho_p * B2
+    w8 = rho_p * B3
+    w9 = rho_p * psi
+
+    return SVector(w1, w2, w3, w4, w5, w6, w7, w8, w9)
 end
 
 # Convert entropy variables to conservative variables
 @inline function entropy2cons(w, equations::IdealGlmMhdEquations2D)
-  w1, w2, w3, w4, w5, w6, w7, w8, w9 = w
-
-  v1 = - w2 / w5
-  v2 = - w3 / w5
-  v3 = - w4 / w5
-
-  B1 = - w6 / w5
-  B2 = - w7 / w5
-  B3 = - w8 / w5
-  psi = - w9 / w5
-
-  # This imitates what is done for compressible Euler 3D `entropy2cons`: we convert from
-  # the entropy variables for `-rho * s / (gamma - 1)` to the entropy variables for the entropy
-  # `-rho * s` used by Hughes, Franca, Mallet (1986).
-  @unpack gamma = equations
-  V1, V2, V3, V4, V5 = SVector(w1, w2, w3, w4, w5) * (gamma - 1)
-  s = gamma - V1 + (V2^2 + V3^2 + V4^2)/(2*V5)
-  rho_iota = ((gamma-1) / (-V5)^gamma)^(equations.inv_gamma_minus_one)*exp(-s * equations.inv_gamma_minus_one)
-  rho = -rho_iota * V5
-  p = -rho / w5
-
-  return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations)
+    w1, w2, w3, w4, w5, w6, w7, w8, w9 = w
+
+    v1 = -w2 / w5
+    v2 = -w3 / w5
+    v3 = -w4 / w5
+
+    B1 = -w6 / w5
+    B2 = -w7 / w5
+    B3 = -w8 / w5
+    psi = -w9 / w5
+
+    # This imitates what is done for compressible Euler 3D `entropy2cons`: we convert from
+    # the entropy variables for `-rho * s / (gamma - 1)` to the entropy variables for the entropy
+    # `-rho * s` used by Hughes, Franca, Mallet (1986).
+    @unpack gamma = equations
+    V1, V2, V3, V4, V5 = SVector(w1, w2, w3, w4, w5) * (gamma - 1)
+    s = gamma - V1 + (V2^2 + V3^2 + V4^2) / (2 * V5)
+    rho_iota = ((gamma - 1) / (-V5)^gamma)^(equations.inv_gamma_minus_one) *
+               exp(-s * equations.inv_gamma_minus_one)
+    rho = -rho_iota * V5
+    p = -rho / w5
+
+    return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::IdealGlmMhdEquations2D)
-  rho, v1, v2, v3, p, B1, B2, B3, psi = prim
+    rho, v1, v2, v3, p, B1, B2, B3, psi = prim
 
-  rho_v1 = rho * v1
-  rho_v2 = rho * v2
-  rho_v3 = rho * v3
-  rho_e = p* equations.inv_gamma_minus_one + 0.5 * (rho_v1*v1 + rho_v2*v2 + rho_v3*v3) +
-                                 0.5 * (B1^2 + B2^2 + B3^2) + 0.5 * psi^2
+    rho_v1 = rho * v1
+    rho_v2 = rho * v2
+    rho_v3 = rho * v3
+    rho_e = p * equations.inv_gamma_minus_one +
+            0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) +
+            0.5 * (B1^2 + B2^2 + B3^2) + 0.5 * psi^2
 
-  return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi)
+    return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi)
 end
 
-
 @inline function density(u, equations::IdealGlmMhdEquations2D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  return rho
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    return rho
 end
 
 @inline function pressure(u, equations::IdealGlmMhdEquations2D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  p = (equations.gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
-                                   - 0.5 * (B1^2 + B2^2 + B3^2)
-                                   - 0.5 * psi^2)
-  return p
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
+         -
+         0.5 * (B1^2 + B2^2 + B3^2)
+         -
+         0.5 * psi^2)
+    return p
 end
 
 @inline function density_pressure(u, equations::IdealGlmMhdEquations2D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  p = (equations.gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
-                                   - 0.5 * (B1^2 + B2^2 + B3^2)
-                                   - 0.5 * psi^2)
-  return rho * p
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
+         -
+         0.5 * (B1^2 + B2^2 + B3^2)
+         -
+         0.5 * psi^2)
+    return rho * p
 end
 
-
 # Compute the fastest wave speed for ideal MHD equations: c_f, the fast magnetoacoustic eigenvalue
-@inline function calc_fast_wavespeed(cons, orientation::Integer, equations::IdealGlmMhdEquations2D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
-  mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
-  p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2)
-  a_square = equations.gamma * p / rho
-  sqrt_rho = sqrt(rho)
-  b1 = B1 / sqrt_rho
-  b2 = B2 / sqrt_rho
-  b3 = B3 / sqrt_rho
-  b_square = b1 * b1 + b2 * b2 + b3 * b3
-  if orientation == 1 # x-direction
-    c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b1^2))
-  else
-    c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b2^2))
-  end
-  return c_f
+@inline function calc_fast_wavespeed(cons, orientation::Integer,
+                                     equations::IdealGlmMhdEquations2D)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
+    mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
+    p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2)
+    a_square = equations.gamma * p / rho
+    sqrt_rho = sqrt(rho)
+    b1 = B1 / sqrt_rho
+    b2 = B2 / sqrt_rho
+    b3 = B3 / sqrt_rho
+    b_square = b1 * b1 + b2 * b2 + b3 * b3
+    if orientation == 1 # x-direction
+        c_f = sqrt(0.5 * (a_square + b_square) +
+                   0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b1^2))
+    else
+        c_f = sqrt(0.5 * (a_square + b_square) +
+                   0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b2^2))
+    end
+    return c_f
 end
 
-@inline function calc_fast_wavespeed(cons, normal_direction::AbstractVector, equations::IdealGlmMhdEquations2D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
-  mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
-  p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2)
-  a_square = equations.gamma * p / rho
-  sqrt_rho = sqrt(rho)
-  b1 = B1 / sqrt_rho
-  b2 = B2 / sqrt_rho
-  b3 = B3 / sqrt_rho
-  b_square = b1 * b1 + b2 * b2 + b3 * b3
-  norm_squared = (normal_direction[1] * normal_direction[1] +
-                  normal_direction[2] * normal_direction[2])
-  b_dot_n_squared = (b1 * normal_direction[1] +
-                     b2 * normal_direction[2])^2 / norm_squared
-
-  c_f = sqrt(
-    (0.5 * (a_square + b_square) +
-     0.5 * sqrt((a_square + b_square)^2 - 4 * a_square * b_dot_n_squared)) * norm_squared)
-  return c_f
+@inline function calc_fast_wavespeed(cons, normal_direction::AbstractVector,
+                                     equations::IdealGlmMhdEquations2D)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
+    mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
+    p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2)
+    a_square = equations.gamma * p / rho
+    sqrt_rho = sqrt(rho)
+    b1 = B1 / sqrt_rho
+    b2 = B2 / sqrt_rho
+    b3 = B3 / sqrt_rho
+    b_square = b1 * b1 + b2 * b2 + b3 * b3
+    norm_squared = (normal_direction[1] * normal_direction[1] +
+                    normal_direction[2] * normal_direction[2])
+    b_dot_n_squared = (b1 * normal_direction[1] +
+                       b2 * normal_direction[2])^2 / norm_squared
+
+    c_f = sqrt((0.5 * (a_square + b_square) +
+                0.5 * sqrt((a_square + b_square)^2 - 4 * a_square * b_dot_n_squared)) *
+               norm_squared)
+    return c_f
 end
 
-
 """
     calc_fast_wavespeed_roe(u_ll, u_rr, orientation_or_normal_direction, equations::IdealGlmMhdEquations2D)
 
@@ -856,193 +895,202 @@ as given by
   of Roe Matrices for Systems of Conservation Laws
   [DOI: 10.1006/jcph.1997.5773](https://doi.org/10.1006/jcph.1997.5773)
 """
-@inline function calc_fast_wavespeed_roe(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
-
-  # Calculate primitive variables
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-  kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll)
-  mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll
-  p_ll = (equations.gamma - 1)*(rho_e_ll - kin_en_ll - 0.5*mag_norm_ll - 0.5*psi_ll^2)
-
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
-  v3_rr = rho_v3_rr / rho_rr
-  kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr)
-  mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr
-  p_rr = (equations.gamma - 1)*(rho_e_rr - kin_en_rr - 0.5*mag_norm_rr - 0.5*psi_rr^2)
-
-  # compute total pressure which is thermal + magnetic pressures
-  p_total_ll = p_ll + 0.5 * mag_norm_ll
-  p_total_rr = p_rr + 0.5 * mag_norm_rr
-
-  # compute the Roe density averages
-  sqrt_rho_ll = sqrt(rho_ll)
-  sqrt_rho_rr = sqrt(rho_rr)
-  inv_sqrt_rho_add  = 1.0 / (sqrt_rho_ll + sqrt_rho_rr)
-  inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr)
-  rho_ll_roe =  sqrt_rho_ll * inv_sqrt_rho_add
-  rho_rr_roe =  sqrt_rho_rr * inv_sqrt_rho_add
-  # Roe averages
-  # velocities and magnetic fields
-  v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe
-  v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe
-  v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe
-  B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe
-  B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe
-  B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe
-  # enthalpy
-  H_ll  = (rho_e_ll + p_total_ll) / rho_ll
-  H_rr  = (rho_e_rr + p_total_rr) / rho_rr
-  H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe
-  # temporary variable see equation (4.12) in Cargo and Gallice
-  X = 0.5 * ( (B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2 ) * inv_sqrt_rho_add^2
-  # averaged components needed to compute c_f, the fast magnetoacoustic wave speed
-  b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum
-  a_square_roe = ((2.0 - equations.gamma) * X +
-                 (equations.gamma -1.0) * (H_roe - 0.5*(v1_roe^2 + v2_roe^2 + v3_roe^2) -
-                                          b_square_roe)) # acoustic speed
-  # finally compute the average wave speed and set the output velocity (depends on orientation)
-  if orientation == 1 # x-direction
-    c_a_roe = B1_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed
-    a_star_roe = sqrt( (a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe )
-    c_f_roe = sqrt( 0.5 * (a_square_roe + b_square_roe + a_star_roe) )
-    vel_out_roe = v1_roe
-  else # y-direction
-    c_a_roe = B2_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed
-    a_star_roe = sqrt( (a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe )
-    c_f_roe = sqrt( 0.5 * (a_square_roe + b_square_roe + a_star_roe) )
-    vel_out_roe = v2_roe
-  end
-
-  return vel_out_roe, c_f_roe
+@inline function calc_fast_wavespeed_roe(u_ll, u_rr, orientation::Integer,
+                                         equations::IdealGlmMhdEquations2D)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    # Calculate primitive variables
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll)
+    mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll
+    p_ll = (equations.gamma - 1) *
+           (rho_e_ll - kin_en_ll - 0.5 * mag_norm_ll - 0.5 * psi_ll^2)
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+    kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr)
+    mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr
+    p_rr = (equations.gamma - 1) *
+           (rho_e_rr - kin_en_rr - 0.5 * mag_norm_rr - 0.5 * psi_rr^2)
+
+    # compute total pressure which is thermal + magnetic pressures
+    p_total_ll = p_ll + 0.5 * mag_norm_ll
+    p_total_rr = p_rr + 0.5 * mag_norm_rr
+
+    # compute the Roe density averages
+    sqrt_rho_ll = sqrt(rho_ll)
+    sqrt_rho_rr = sqrt(rho_rr)
+    inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr)
+    inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr)
+    rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add
+    rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add
+    # Roe averages
+    # velocities and magnetic fields
+    v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe
+    v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe
+    v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe
+    B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe
+    B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe
+    B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe
+    # enthalpy
+    H_ll = (rho_e_ll + p_total_ll) / rho_ll
+    H_rr = (rho_e_rr + p_total_rr) / rho_rr
+    H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe
+    # temporary variable see equation (4.12) in Cargo and Gallice
+    X = 0.5 * ((B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2) *
+        inv_sqrt_rho_add^2
+    # averaged components needed to compute c_f, the fast magnetoacoustic wave speed
+    b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum
+    a_square_roe = ((2.0 - equations.gamma) * X +
+                    (equations.gamma - 1.0) *
+                    (H_roe - 0.5 * (v1_roe^2 + v2_roe^2 + v3_roe^2) -
+                     b_square_roe)) # acoustic speed
+    # finally compute the average wave speed and set the output velocity (depends on orientation)
+    if orientation == 1 # x-direction
+        c_a_roe = B1_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed
+        a_star_roe = sqrt((a_square_roe + b_square_roe)^2 -
+                          4.0 * a_square_roe * c_a_roe)
+        c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe))
+        vel_out_roe = v1_roe
+    else # y-direction
+        c_a_roe = B2_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed
+        a_star_roe = sqrt((a_square_roe + b_square_roe)^2 -
+                          4.0 * a_square_roe * c_a_roe)
+        c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe))
+        vel_out_roe = v2_roe
+    end
+
+    return vel_out_roe, c_f_roe
 end
 
-@inline function calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction::AbstractVector, equations::IdealGlmMhdEquations2D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
-
-  # Calculate primitive variables
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-  kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll)
-  mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll
-  p_ll = (equations.gamma - 1)*(rho_e_ll - kin_en_ll - 0.5*mag_norm_ll - 0.5*psi_ll^2)
-
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
-  v3_rr = rho_v3_rr / rho_rr
-  kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr)
-  mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr
-  p_rr = (equations.gamma - 1)*(rho_e_rr - kin_en_rr - 0.5*mag_norm_rr - 0.5*psi_rr^2)
-
-  # compute total pressure which is thermal + magnetic pressures
-  p_total_ll = p_ll + 0.5 * mag_norm_ll
-  p_total_rr = p_rr + 0.5 * mag_norm_rr
-
-  # compute the Roe density averages
-  sqrt_rho_ll = sqrt(rho_ll)
-  sqrt_rho_rr = sqrt(rho_rr)
-  inv_sqrt_rho_add  = 1.0 / (sqrt_rho_ll + sqrt_rho_rr)
-  inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr)
-  rho_ll_roe =  sqrt_rho_ll * inv_sqrt_rho_add
-  rho_rr_roe =  sqrt_rho_rr * inv_sqrt_rho_add
-  # Roe averages
-  # velocities and magnetic fields
-  v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe
-  v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe
-  v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe
-  B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe
-  B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe
-  B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe
-  # enthalpy
-  H_ll  = (rho_e_ll + p_total_ll) / rho_ll
-  H_rr  = (rho_e_rr + p_total_rr) / rho_rr
-  H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe
-  # temporary variable see equation (4.12) in Cargo and Gallice
-  X = 0.5 * ( (B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2 ) * inv_sqrt_rho_add^2
-  # averaged components needed to compute c_f, the fast magnetoacoustic wave speed
-  b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum
-  a_square_roe = ((2.0 - equations.gamma) * X +
-                 (equations.gamma -1.0) * (H_roe - 0.5*(v1_roe^2 + v2_roe^2 + v3_roe^2) -
-                                          b_square_roe)) # acoustic speed
-
-  # finally compute the average wave speed and set the output velocity (depends on orientation)
-  norm_squared = (normal_direction[1] * normal_direction[1] +
-                  normal_direction[2] * normal_direction[2])
-  B_roe_dot_n_squared = (B1_roe * normal_direction[1] +
-                         B2_roe * normal_direction[2])^2 / norm_squared
-
-  c_a_roe = B_roe_dot_n_squared * inv_sqrt_rho_prod # (squared) Alfvén wave speed
-  a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - 4 * a_square_roe * c_a_roe)
-  c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe) * norm_squared)
-  vel_out_roe = (v1_roe * normal_direction[1] +
-                 v2_roe * normal_direction[2])
-
-  return vel_out_roe, c_f_roe
+@inline function calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction::AbstractVector,
+                                         equations::IdealGlmMhdEquations2D)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    # Calculate primitive variables
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll)
+    mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll
+    p_ll = (equations.gamma - 1) *
+           (rho_e_ll - kin_en_ll - 0.5 * mag_norm_ll - 0.5 * psi_ll^2)
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+    kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr)
+    mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr
+    p_rr = (equations.gamma - 1) *
+           (rho_e_rr - kin_en_rr - 0.5 * mag_norm_rr - 0.5 * psi_rr^2)
+
+    # compute total pressure which is thermal + magnetic pressures
+    p_total_ll = p_ll + 0.5 * mag_norm_ll
+    p_total_rr = p_rr + 0.5 * mag_norm_rr
+
+    # compute the Roe density averages
+    sqrt_rho_ll = sqrt(rho_ll)
+    sqrt_rho_rr = sqrt(rho_rr)
+    inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr)
+    inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr)
+    rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add
+    rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add
+    # Roe averages
+    # velocities and magnetic fields
+    v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe
+    v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe
+    v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe
+    B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe
+    B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe
+    B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe
+    # enthalpy
+    H_ll = (rho_e_ll + p_total_ll) / rho_ll
+    H_rr = (rho_e_rr + p_total_rr) / rho_rr
+    H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe
+    # temporary variable see equation (4.12) in Cargo and Gallice
+    X = 0.5 * ((B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2) *
+        inv_sqrt_rho_add^2
+    # averaged components needed to compute c_f, the fast magnetoacoustic wave speed
+    b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum
+    a_square_roe = ((2.0 - equations.gamma) * X +
+                    (equations.gamma - 1.0) *
+                    (H_roe - 0.5 * (v1_roe^2 + v2_roe^2 + v3_roe^2) -
+                     b_square_roe)) # acoustic speed
+
+    # finally compute the average wave speed and set the output velocity (depends on orientation)
+    norm_squared = (normal_direction[1] * normal_direction[1] +
+                    normal_direction[2] * normal_direction[2])
+    B_roe_dot_n_squared = (B1_roe * normal_direction[1] +
+                           B2_roe * normal_direction[2])^2 / norm_squared
+
+    c_a_roe = B_roe_dot_n_squared * inv_sqrt_rho_prod # (squared) Alfvén wave speed
+    a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - 4 * a_square_roe * c_a_roe)
+    c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe) * norm_squared)
+    vel_out_roe = (v1_roe * normal_direction[1] +
+                   v2_roe * normal_direction[2])
+
+    return vel_out_roe, c_f_roe
 end
 
-
 # Calculate thermodynamic entropy for a conservative state `cons`
 @inline function entropy_thermodynamic(cons, equations::IdealGlmMhdEquations2D)
-  # Pressure
-  p = (equations.gamma - 1) * (cons[5] - 1/2 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1]
-                                       - 1/2 * (cons[6]^2 + cons[7]^2 + cons[8]^2)
-                                       - 1/2 * cons[9]^2)
-
-  # Thermodynamic entropy
-  s = log(p) - equations.gamma*log(cons[1])
-
-  return s
+    # Pressure
+    p = (equations.gamma - 1) *
+        (cons[5] - 1 / 2 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1]
+         -
+         1 / 2 * (cons[6]^2 + cons[7]^2 + cons[8]^2)
+         -
+         1 / 2 * cons[9]^2)
+
+    # Thermodynamic entropy
+    s = log(p) - equations.gamma * log(cons[1])
+
+    return s
 end
 
-
 # Calculate mathematical entropy for a conservative state `cons`
 @inline function entropy_math(cons, equations::IdealGlmMhdEquations2D)
-  S = -entropy_thermodynamic(cons, equations) * cons[1] * equations.inv_gamma_minus_one
+    S = -entropy_thermodynamic(cons, equations) * cons[1] *
+        equations.inv_gamma_minus_one
 
-  return S
+    return S
 end
 
-
 # Default entropy is the mathematical entropy
 @inline entropy(cons, equations::IdealGlmMhdEquations2D) = entropy_math(cons, equations)
 
-
 # Calculate total energy for a conservative state `cons`
 @inline energy_total(cons, ::IdealGlmMhdEquations2D) = cons[5]
 
-
 # Calculate kinetic energy for a conservative state `cons`
 @inline function energy_kinetic(cons, equations::IdealGlmMhdEquations2D)
-  return 0.5 * (cons[2]^2 + cons[3]^2 + cons[4]^2)/cons[1]
+    return 0.5 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1]
 end
 
-
 # Calculate the magnetic energy for a conservative state `cons'.
 #  OBS! For non-dinmensional form of the ideal MHD magnetic pressure ≡ magnetic energy
 @inline function energy_magnetic(cons, ::IdealGlmMhdEquations2D)
-  return 0.5 * (cons[6]^2 + cons[7]^2 + cons[8]^2)
+    return 0.5 * (cons[6]^2 + cons[7]^2 + cons[8]^2)
 end
 
-
 # Calculate internal energy for a conservative state `cons`
 @inline function energy_internal(cons, equations::IdealGlmMhdEquations2D)
-  return (energy_total(cons, equations)
-          - energy_kinetic(cons, equations)
-          - energy_magnetic(cons, equations)
-          - cons[9]^2 / 2)
+    return (energy_total(cons, equations)
+            -
+            energy_kinetic(cons, equations)
+            -
+            energy_magnetic(cons, equations)
+            -
+            cons[9]^2 / 2)
 end
 
-
 # Calculate the cross helicity (\vec{v}⋅\vec{B}) for a conservative state `cons'
 @inline function cross_helicity(cons, ::IdealGlmMhdEquations2D)
-  return (cons[2]*cons[6] + cons[3]*cons[7] + cons[4]*cons[8]) / cons[1]
+    return (cons[2] * cons[6] + cons[3] * cons[7] + cons[4] * cons[8]) / cons[1]
 end
-
-
 end # @muladd
diff --git a/src/equations/ideal_glm_mhd_3d.jl b/src/equations/ideal_glm_mhd_3d.jl
index 647e6c4f88a..401fcd2daf1 100644
--- a/src/equations/ideal_glm_mhd_3d.jl
+++ b/src/equations/ideal_glm_mhd_3d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     IdealGlmMhdEquations3D(gamma)
@@ -11,28 +11,33 @@
 The ideal compressible GLM-MHD equations for an ideal gas with ratio of
 specific heats `gamma` in three space dimensions.
 """
-mutable struct IdealGlmMhdEquations3D{RealT<:Real} <: AbstractIdealGlmMhdEquations{3, 9}
-  gamma::RealT               # ratio of specific heats
-  inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
-  c_h::RealT                 # GLM cleaning speed
-
-  function IdealGlmMhdEquations3D(gamma, c_h)
-    γ, inv_gamma_minus_one, c_h = promote(gamma, inv(gamma - 1), c_h)
-    new{typeof(γ)}(γ, inv_gamma_minus_one, c_h)
-  end
+mutable struct IdealGlmMhdEquations3D{RealT <: Real} <:
+               AbstractIdealGlmMhdEquations{3, 9}
+    gamma::RealT               # ratio of specific heats
+    inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
+    c_h::RealT                 # GLM cleaning speed
+
+    function IdealGlmMhdEquations3D(gamma, c_h)
+        γ, inv_gamma_minus_one, c_h = promote(gamma, inv(gamma - 1), c_h)
+        new{typeof(γ)}(γ, inv_gamma_minus_one, c_h)
+    end
 end
 
-function IdealGlmMhdEquations3D(gamma; initial_c_h=convert(typeof(gamma), NaN))
-  # Use `promote` to ensure that `gamma` and `initial_c_h` have the same type
-  IdealGlmMhdEquations3D(promote(gamma, initial_c_h)...)
+function IdealGlmMhdEquations3D(gamma; initial_c_h = convert(typeof(gamma), NaN))
+    # Use `promote` to ensure that `gamma` and `initial_c_h` have the same type
+    IdealGlmMhdEquations3D(promote(gamma, initial_c_h)...)
 end
 
-
 have_nonconservative_terms(::IdealGlmMhdEquations3D) = True()
-varnames(::typeof(cons2cons), ::IdealGlmMhdEquations3D) = ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3", "psi")
-varnames(::typeof(cons2prim), ::IdealGlmMhdEquations3D) = ("rho", "v1", "v2", "v3", "p", "B1", "B2", "B3", "psi")
-default_analysis_integrals(::IdealGlmMhdEquations3D)  = (entropy_timederivative, Val(:l2_divb), Val(:linf_divb))
-
+function varnames(::typeof(cons2cons), ::IdealGlmMhdEquations3D)
+    ("rho", "rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3", "psi")
+end
+function varnames(::typeof(cons2prim), ::IdealGlmMhdEquations3D)
+    ("rho", "v1", "v2", "v3", "p", "B1", "B2", "B3", "psi")
+end
+function default_analysis_integrals(::IdealGlmMhdEquations3D)
+    (entropy_timederivative, Val(:l2_divb), Val(:linf_divb))
+end
 
 # Set initial conditions at physical location `x` for time `t`
 """
@@ -41,53 +46,51 @@ initial_condition_constant(x, t, equations::IdealGlmMhdEquations3D)
 A constant initial condition to test free-stream preservation.
 """
 function initial_condition_constant(x, t, equations::IdealGlmMhdEquations3D)
-  rho = 1.0
-  rho_v1 = 0.1
-  rho_v2 = -0.2
-  rho_v3 = -0.5
-  rho_e = 50.0
-  B1 = 3.0
-  B2 = -1.2
-  B3 = 0.5
-  psi = 0.0
-  return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi)
+    rho = 1.0
+    rho_v1 = 0.1
+    rho_v2 = -0.2
+    rho_v3 = -0.5
+    rho_e = 50.0
+    B1 = 3.0
+    B2 = -1.2
+    B3 = 0.5
+    psi = 0.0
+    return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi)
 end
 
-
 """
     initial_condition_convergence_test(x, t, equations::IdealGlmMhdEquations3D)
 
 An Alfvén wave as smooth initial condition used for convergence tests.
 """
 function initial_condition_convergence_test(x, t, equations::IdealGlmMhdEquations3D)
-  # Alfvén wave in three space dimensions
-  # Altmann thesis http://dx.doi.org/10.18419/opus-3895
-  # domain must be set to [-1, 1]^3, γ = 5/3
-  p = 1
-  omega = 2*pi # may be multiplied by frequency
-  # r: length-variable = length of computational domain
-  r = 2
-  # e: epsilon = 0.2
-  e = 0.2
-  nx  = 1 / sqrt(r^2 + 1)
-  ny  = r / sqrt(r^2 + 1)
-  sqr = 1
-  Va  = omega / (ny * sqr)
-  phi_alv = omega / ny * (nx * (x[1] - 0.5*r) + ny * (x[2] - 0.5*r)) - Va * t
-
-  rho = 1.
-  v1  = -e*ny*cos(phi_alv) / rho
-  v2  =  e*nx*cos(phi_alv) / rho
-  v3  =  e *  sin(phi_alv) / rho
-  B1  = nx -rho*v1*sqr
-  B2  = ny -rho*v2*sqr
-  B3  =    -rho*v3*sqr
-  psi = 0
-
-  return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations)
+    # Alfvén wave in three space dimensions
+    # Altmann thesis http://dx.doi.org/10.18419/opus-3895
+    # domain must be set to [-1, 1]^3, γ = 5/3
+    p = 1
+    omega = 2 * pi # may be multiplied by frequency
+    # r: length-variable = length of computational domain
+    r = 2
+    # e: epsilon = 0.2
+    e = 0.2
+    nx = 1 / sqrt(r^2 + 1)
+    ny = r / sqrt(r^2 + 1)
+    sqr = 1
+    Va = omega / (ny * sqr)
+    phi_alv = omega / ny * (nx * (x[1] - 0.5 * r) + ny * (x[2] - 0.5 * r)) - Va * t
+
+    rho = 1.0
+    v1 = -e * ny * cos(phi_alv) / rho
+    v2 = e * nx * cos(phi_alv) / rho
+    v3 = e * sin(phi_alv) / rho
+    B1 = nx - rho * v1 * sqr
+    B2 = ny - rho * v2 * sqr
+    B3 = -rho * v3 * sqr
+    psi = 0
+
+    return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations)
 end
 
-
 """
     initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdEquations3D)
 
@@ -97,115 +100,118 @@ A weak blast wave adapted from
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
 function initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdEquations3D)
-  # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
-  # Same discontinuity in the velocities but with magnetic fields
-  # Set up polar coordinates
-  inicenter = (0, 0, 0)
-  x_norm = x[1] - inicenter[1]
-  y_norm = x[2] - inicenter[2]
-  z_norm = x[3] - inicenter[3]
-  r = sqrt(x_norm^2 + y_norm^2 + z_norm^2)
-  phi = atan(y_norm, x_norm)
-  theta = iszero(r) ? 0.0 : acos(z_norm / r)
-
-  # Calculate primitive variables
-  rho = r > 0.5 ? 1.0 : 1.1691
-  v1  = r > 0.5 ? 0.0 : 0.1882 * cos(phi) * sin(theta)
-  v2  = r > 0.5 ? 0.0 : 0.1882 * sin(phi) * sin(theta)
-  v3  = r > 0.5 ? 0.0 : 0.1882 * cos(theta)
-  p   = r > 0.5 ? 1.0 : 1.245
-
-  return prim2cons(SVector(rho, v1, v2, v3, p, 1.0, 1.0, 1.0, 0.0), equations)
+    # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
+    # Same discontinuity in the velocities but with magnetic fields
+    # Set up polar coordinates
+    inicenter = (0, 0, 0)
+    x_norm = x[1] - inicenter[1]
+    y_norm = x[2] - inicenter[2]
+    z_norm = x[3] - inicenter[3]
+    r = sqrt(x_norm^2 + y_norm^2 + z_norm^2)
+    phi = atan(y_norm, x_norm)
+    theta = iszero(r) ? 0.0 : acos(z_norm / r)
+
+    # Calculate primitive variables
+    rho = r > 0.5 ? 1.0 : 1.1691
+    v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi) * sin(theta)
+    v2 = r > 0.5 ? 0.0 : 0.1882 * sin(phi) * sin(theta)
+    v3 = r > 0.5 ? 0.0 : 0.1882 * cos(theta)
+    p = r > 0.5 ? 1.0 : 1.245
+
+    return prim2cons(SVector(rho, v1, v2, v3, p, 1.0, 1.0, 1.0, 0.0), equations)
 end
 
-
 # Pre-defined source terms should be implemented as
 # function source_terms_WHATEVER(u, x, t, equations::IdealGlmMhdEquations3D)
 
-
 # Calculate 1D flux in for a single point
 @inline function flux(u, orientation::Integer, equations::IdealGlmMhdEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
-  mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
-  p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2)
-  p = (equations.gamma - 1) * p_over_gamma_minus_one
-  if orientation == 1
-    f1 = rho_v1
-    f2 = rho_v1*v1 + p + mag_en - B1^2
-    f3 = rho_v1*v2 - B1*B2
-    f4 = rho_v1*v3 - B1*B3
-    f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en)*v1 - B1*(v1*B1 + v2*B2 + v3*B3) + equations.c_h*psi*B1
-    f6 = equations.c_h*psi
-    f7 = v1*B2 - v2*B1
-    f8 = v1*B3 - v3*B1
-    f9 = equations.c_h*B1
-  elseif orientation == 2
-    f1 = rho_v2
-    f2 = rho_v2*v1 - B2*B1
-    f3 = rho_v2*v2 + p + mag_en - B2^2
-    f4 = rho_v2*v3 - B2*B3
-    f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en)*v2 - B2*(v1*B1 + v2*B2 + v3*B3) + equations.c_h*psi*B2
-    f6 = v2*B1 - v1*B2
-    f7 = equations.c_h*psi
-    f8 = v2*B3 - v3*B2
-    f9 = equations.c_h*B2
-  else
-    f1 = rho_v3
-    f2 = rho_v3*v1 - B3*B1
-    f3 = rho_v3*v2 - B3*B2
-    f4 = rho_v3*v3 + p + mag_en - B3^2
-    f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en)*v3 - B3*(v1*B1 + v2*B2 + v3*B3) + equations.c_h*psi*B3
-    f6 = v3*B1 - v1*B3
-    f7 = v3*B2 - v2*B3
-    f8 = equations.c_h*psi
-    f9 = equations.c_h*B3
-  end
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
+    mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
+    p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2)
+    p = (equations.gamma - 1) * p_over_gamma_minus_one
+    if orientation == 1
+        f1 = rho_v1
+        f2 = rho_v1 * v1 + p + mag_en - B1^2
+        f3 = rho_v1 * v2 - B1 * B2
+        f4 = rho_v1 * v3 - B1 * B3
+        f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v1 -
+             B1 * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B1
+        f6 = equations.c_h * psi
+        f7 = v1 * B2 - v2 * B1
+        f8 = v1 * B3 - v3 * B1
+        f9 = equations.c_h * B1
+    elseif orientation == 2
+        f1 = rho_v2
+        f2 = rho_v2 * v1 - B2 * B1
+        f3 = rho_v2 * v2 + p + mag_en - B2^2
+        f4 = rho_v2 * v3 - B2 * B3
+        f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v2 -
+             B2 * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B2
+        f6 = v2 * B1 - v1 * B2
+        f7 = equations.c_h * psi
+        f8 = v2 * B3 - v3 * B2
+        f9 = equations.c_h * B2
+    else
+        f1 = rho_v3
+        f2 = rho_v3 * v1 - B3 * B1
+        f3 = rho_v3 * v2 - B3 * B2
+        f4 = rho_v3 * v3 + p + mag_en - B3^2
+        f5 = (kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v3 -
+             B3 * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B3
+        f6 = v3 * B1 - v1 * B3
+        f7 = v3 * B2 - v2 * B3
+        f8 = equations.c_h * psi
+        f9 = equations.c_h * B3
+    end
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
 end
 
 # Calculate 1D flux for a single point in the normal direction
 # Note, this directional vector is not normalized
-@inline function flux(u, normal_direction::AbstractVector, equations::IdealGlmMhdEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
-  mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
-  p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2)
-  p = (equations.gamma - 1) * p_over_gamma_minus_one
-
-  v_normal = v1 * normal_direction[1] + v2 * normal_direction[2] + v3 * normal_direction[3]
-  B_normal = B1 * normal_direction[1] + B2 * normal_direction[2] + B3 * normal_direction[3]
-  rho_v_normal = rho * v_normal
-
-  f1 = rho_v_normal
-  f2 = rho_v_normal * v1 - B1 * B_normal + (p + mag_en) * normal_direction[1]
-  f3 = rho_v_normal * v2 - B2 * B_normal + (p + mag_en) * normal_direction[2]
-  f4 = rho_v_normal * v3 - B3 * B_normal + (p + mag_en) * normal_direction[3]
-  f5 = ( (kin_en + equations.gamma * p_over_gamma_minus_one + 2*mag_en) * v_normal
-        - B_normal * (v1*B1 + v2*B2 + v3*B3) + equations.c_h * psi * B_normal )
-  f6 = ( equations.c_h * psi * normal_direction[1] +
-         (v2 * B1 - v1 * B2) * normal_direction[2] +
-         (v3 * B1 - v1 * B3) * normal_direction[3] )
-  f7 = ( (v1 * B2 - v2 * B1) * normal_direction[1] +
-         equations.c_h * psi * normal_direction[2] +
-         (v3 * B2 - v2 * B3) * normal_direction[3] )
-  f8 = ( (v1 * B3 - v3 * B1) * normal_direction[1] +
-         (v2 * B3 - v3 * B2) * normal_direction[2] +
-         equations.c_h * psi * normal_direction[3] )
-  f9 = equations.c_h * B_normal
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
+@inline function flux(u, normal_direction::AbstractVector,
+                      equations::IdealGlmMhdEquations3D)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
+    mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
+    p_over_gamma_minus_one = (rho_e - kin_en - mag_en - 0.5 * psi^2)
+    p = (equations.gamma - 1) * p_over_gamma_minus_one
+
+    v_normal = v1 * normal_direction[1] + v2 * normal_direction[2] +
+               v3 * normal_direction[3]
+    B_normal = B1 * normal_direction[1] + B2 * normal_direction[2] +
+               B3 * normal_direction[3]
+    rho_v_normal = rho * v_normal
+
+    f1 = rho_v_normal
+    f2 = rho_v_normal * v1 - B1 * B_normal + (p + mag_en) * normal_direction[1]
+    f3 = rho_v_normal * v2 - B2 * B_normal + (p + mag_en) * normal_direction[2]
+    f4 = rho_v_normal * v3 - B3 * B_normal + (p + mag_en) * normal_direction[3]
+    f5 = ((kin_en + equations.gamma * p_over_gamma_minus_one + 2 * mag_en) * v_normal
+          -
+          B_normal * (v1 * B1 + v2 * B2 + v3 * B3) + equations.c_h * psi * B_normal)
+    f6 = (equations.c_h * psi * normal_direction[1] +
+          (v2 * B1 - v1 * B2) * normal_direction[2] +
+          (v3 * B1 - v1 * B3) * normal_direction[3])
+    f7 = ((v1 * B2 - v2 * B1) * normal_direction[1] +
+          equations.c_h * psi * normal_direction[2] +
+          (v3 * B2 - v2 * B3) * normal_direction[3])
+    f8 = ((v1 * B3 - v3 * B1) * normal_direction[1] +
+          (v2 * B3 - v3 * B2) * normal_direction[2] +
+          equations.c_h * psi * normal_direction[3])
+    f9 = equations.c_h * B_normal
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
 end
 
-
-
 """
     flux_nonconservative_powell(u_ll, u_rr, orientation::Integer,
                                 equations::IdealGlmMhdEquations3D)
@@ -232,88 +238,89 @@ terms.
 """
 @inline function flux_nonconservative_powell(u_ll, u_rr, orientation::Integer,
                                              equations::IdealGlmMhdEquations3D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
-
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-  v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
-
-  # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
-  # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2,3}, 0, 0, 0, v_{1,2,3})
-  if orientation == 1
-    f = SVector(0,
-                B1_ll      * B1_rr,
-                B2_ll      * B1_rr,
-                B3_ll      * B1_rr,
-                v_dot_B_ll * B1_rr + v1_ll * psi_ll * psi_rr,
-                v1_ll      * B1_rr,
-                v2_ll      * B1_rr,
-                v3_ll      * B1_rr,
-                                     v1_ll * psi_rr)
-  elseif orientation == 2
-    f = SVector(0,
-                B1_ll      * B2_rr,
-                B2_ll      * B2_rr,
-                B3_ll      * B2_rr,
-                v_dot_B_ll * B2_rr + v2_ll * psi_ll * psi_rr,
-                v1_ll      * B2_rr,
-                v2_ll      * B2_rr,
-                v3_ll      * B2_rr,
-                                     v2_ll * psi_rr)
-  else # orientation == 3
-    f = SVector(0,
-                B1_ll      * B3_rr,
-                B2_ll      * B3_rr,
-                B3_ll      * B3_rr,
-                v_dot_B_ll * B3_rr + v3_ll * psi_ll * psi_rr,
-                v1_ll      * B3_rr,
-                v2_ll      * B3_rr,
-                v3_ll      * B3_rr,
-                                     v3_ll * psi_rr)
-  end
-
-  return f
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+
+    # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
+    # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2,3}, 0, 0, 0, v_{1,2,3})
+    if orientation == 1
+        f = SVector(0,
+                    B1_ll * B1_rr,
+                    B2_ll * B1_rr,
+                    B3_ll * B1_rr,
+                    v_dot_B_ll * B1_rr + v1_ll * psi_ll * psi_rr,
+                    v1_ll * B1_rr,
+                    v2_ll * B1_rr,
+                    v3_ll * B1_rr,
+                    v1_ll * psi_rr)
+    elseif orientation == 2
+        f = SVector(0,
+                    B1_ll * B2_rr,
+                    B2_ll * B2_rr,
+                    B3_ll * B2_rr,
+                    v_dot_B_ll * B2_rr + v2_ll * psi_ll * psi_rr,
+                    v1_ll * B2_rr,
+                    v2_ll * B2_rr,
+                    v3_ll * B2_rr,
+                    v2_ll * psi_rr)
+    else # orientation == 3
+        f = SVector(0,
+                    B1_ll * B3_rr,
+                    B2_ll * B3_rr,
+                    B3_ll * B3_rr,
+                    v_dot_B_ll * B3_rr + v3_ll * psi_ll * psi_rr,
+                    v1_ll * B3_rr,
+                    v2_ll * B3_rr,
+                    v3_ll * B3_rr,
+                    v3_ll * psi_rr)
+    end
+
+    return f
 end
 
 @inline function flux_nonconservative_powell(u_ll, u_rr,
                                              normal_direction_ll::AbstractVector,
                                              normal_direction_average::AbstractVector,
                                              equations::IdealGlmMhdEquations3D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
-
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-  v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
-
-  # Note that `v_dot_n_ll` uses the `normal_direction_ll` (contravariant vector
-  # at the same node location) while `B_dot_n_rr` uses the averaged normal
-  # direction. The reason for this is that `v_dot_n_ll` depends only on the left
-  # state and multiplies some gradient while `B_dot_n_rr` is used to compute
-  # the divergence of B.
-  v_dot_n_ll = v1_ll * normal_direction_ll[1]      + v2_ll * normal_direction_ll[2]      + v3_ll * normal_direction_ll[3]
-  B_dot_n_rr = B1_rr * normal_direction_average[1] + B2_rr * normal_direction_average[2] + B3_rr * normal_direction_average[3]
-
-  # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
-  # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2,3}, 0, 0, 0, v_{1,2,3})
-  f = SVector(0,
-              B1_ll      * B_dot_n_rr,
-              B2_ll      * B_dot_n_rr,
-              B3_ll      * B_dot_n_rr,
-              v_dot_B_ll * B_dot_n_rr + v_dot_n_ll * psi_ll * psi_rr,
-              v1_ll      * B_dot_n_rr,
-              v2_ll      * B_dot_n_rr,
-              v3_ll      * B_dot_n_rr,
-                                        v_dot_n_ll * psi_rr)
-
-  return f
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+
+    # Note that `v_dot_n_ll` uses the `normal_direction_ll` (contravariant vector
+    # at the same node location) while `B_dot_n_rr` uses the averaged normal
+    # direction. The reason for this is that `v_dot_n_ll` depends only on the left
+    # state and multiplies some gradient while `B_dot_n_rr` is used to compute
+    # the divergence of B.
+    v_dot_n_ll = v1_ll * normal_direction_ll[1] + v2_ll * normal_direction_ll[2] +
+                 v3_ll * normal_direction_ll[3]
+    B_dot_n_rr = B1_rr * normal_direction_average[1] +
+                 B2_rr * normal_direction_average[2] +
+                 B3_rr * normal_direction_average[3]
+
+    # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
+    # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2,3}, 0, 0, 0, v_{1,2,3})
+    f = SVector(0,
+                B1_ll * B_dot_n_rr,
+                B2_ll * B_dot_n_rr,
+                B3_ll * B_dot_n_rr,
+                v_dot_B_ll * B_dot_n_rr + v_dot_n_ll * psi_ll * psi_rr,
+                v1_ll * B_dot_n_rr,
+                v2_ll * B_dot_n_rr,
+                v3_ll * B_dot_n_rr,
+                v_dot_n_ll * psi_rr)
+
+    return f
 end
 
-
-
 """
     flux_derigs_etal(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations3D)
 
@@ -323,90 +330,98 @@ Entropy conserving two-point flux by
   divergence diminishing ideal magnetohydrodynamics equations
   [DOI: 10.1016/j.jcp.2018.03.002](https://doi.org/10.1016/j.jcp.2018.03.002)
 """
-function flux_derigs_etal(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations3D)
-  # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta)
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations)
-
-  vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2
-  vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2
-  mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
-  mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
-  beta_ll = 0.5 * rho_ll / p_ll
-  beta_rr = 0.5 * rho_rr / p_rr
-  # for convenience store v⋅B
-  vel_dot_mag_ll = v1_ll*B1_ll + v2_ll*B2_ll + v3_ll*B3_ll
-  vel_dot_mag_rr = v1_rr*B1_rr + v2_rr*B2_rr + v3_rr*B3_rr
-
-  # Compute the necessary mean values needed for either direction
-  rho_avg = 0.5 * (rho_ll + rho_rr)
-  rho_mean  = ln_mean(rho_ll, rho_rr)
-  beta_mean = ln_mean(beta_ll, beta_rr)
-  beta_avg = 0.5 * (beta_ll + beta_rr)
-  v1_avg = 0.5 * (v1_ll + v1_rr)
-  v2_avg = 0.5 * (v2_ll + v2_rr)
-  v3_avg = 0.5 * (v3_ll + v3_rr)
-  p_mean = 0.5 * rho_avg / beta_avg
-  B1_avg = 0.5 * (B1_ll + B1_rr)
-  B2_avg = 0.5 * (B2_ll + B2_rr)
-  B3_avg = 0.5 * (B3_ll + B3_rr)
-  psi_avg = 0.5 * (psi_ll + psi_rr)
-  vel_norm_avg = 0.5 * (vel_norm_ll + vel_norm_rr)
-  mag_norm_avg = 0.5 * (mag_norm_ll + mag_norm_rr)
-  vel_dot_mag_avg = 0.5 * (vel_dot_mag_ll + vel_dot_mag_rr)
-
-  # Calculate fluxes depending on orientation with specific direction averages
-  if orientation == 1
-    f1 = rho_mean*v1_avg
-    f2 = f1*v1_avg + p_mean + 0.5*mag_norm_avg - B1_avg*B1_avg
-    f3 = f1*v2_avg - B1_avg*B2_avg
-    f4 = f1*v3_avg - B1_avg*B3_avg
-    f6 = equations.c_h*psi_avg
-    f7 = v1_avg*B2_avg - v2_avg*B1_avg
-    f8 = v1_avg*B3_avg - v3_avg*B1_avg
-    f9 = equations.c_h*B1_avg
-    # total energy flux is complicated and involves the previous eight components
-    psi_B1_avg = 0.5*(B1_ll*psi_ll + B1_rr*psi_rr)
-    v1_mag_avg = 0.5*(v1_ll*mag_norm_ll + v1_rr*mag_norm_rr)
-    f5 = (f1*0.5*(1/(equations.gamma-1)/beta_mean - vel_norm_avg) + f2*v1_avg + f3*v2_avg +
-          f4*v3_avg + f6*B1_avg + f7*B2_avg + f8*B3_avg + f9*psi_avg - 0.5*v1_mag_avg +
-          B1_avg*vel_dot_mag_avg - equations.c_h*psi_B1_avg)
-  elseif orientation == 2
-    f1 = rho_mean*v2_avg
-    f2 = f1*v1_avg - B2_avg*B1_avg
-    f3 = f1*v2_avg + p_mean + 0.5*mag_norm_avg - B2_avg*B2_avg
-    f4 = f1*v3_avg - B2_avg*B3_avg
-    f6 = v2_avg*B1_avg - v1_avg*B2_avg
-    f7 = equations.c_h*psi_avg
-    f8 = v2_avg*B3_avg - v3_avg*B2_avg
-    f9 = equations.c_h*B2_avg
-    # total energy flux is complicated and involves the previous eight components
-    psi_B2_avg = 0.5*(B2_ll*psi_ll + B2_rr*psi_rr)
-    v2_mag_avg = 0.5*(v2_ll*mag_norm_ll + v2_rr*mag_norm_rr)
-    f5 = (f1*0.5*(1/(equations.gamma-1)/beta_mean - vel_norm_avg) + f2*v1_avg + f3*v2_avg +
-          f4*v3_avg + f6*B1_avg + f7*B2_avg + f8*B3_avg + f9*psi_avg - 0.5*v2_mag_avg +
-          B2_avg*vel_dot_mag_avg - equations.c_h*psi_B2_avg)
-  else
-    f1 = rho_mean*v3_avg
-    f2 = f1*v1_avg - B3_avg*B1_avg
-    f3 = f1*v2_avg - B3_avg*B2_avg
-    f4 = f1*v3_avg + p_mean + 0.5*mag_norm_avg - B3_avg*B3_avg
-    f6 = v3_avg*B1_avg - v1_avg*B3_avg
-    f7 = v3_avg*B2_avg - v2_avg*B3_avg
-    f8 = equations.c_h*psi_avg
-    f9 = equations.c_h*B3_avg
-    # total energy flux is complicated and involves the previous eight components
-    psi_B3_avg = 0.5*(B3_ll*psi_ll + B3_rr*psi_rr)
-    v3_mag_avg = 0.5*(v3_ll*mag_norm_ll + v3_rr*mag_norm_rr)
-    f5 = (f1*0.5*(1/(equations.gamma-1)/beta_mean - vel_norm_avg) + f2*v1_avg + f3*v2_avg +
-          f4*v3_avg + f6*B1_avg + f7*B2_avg + f8*B3_avg + f9*psi_avg - 0.5*v3_mag_avg +
-          B3_avg*vel_dot_mag_avg - equations.c_h*psi_B3_avg)
-  end
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
+function flux_derigs_etal(u_ll, u_rr, orientation::Integer,
+                          equations::IdealGlmMhdEquations3D)
+    # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta)
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll,
+                                                                               equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr,
+                                                                               equations)
+
+    vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2
+    vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2
+    mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
+    mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
+    beta_ll = 0.5 * rho_ll / p_ll
+    beta_rr = 0.5 * rho_rr / p_rr
+    # for convenience store v⋅B
+    vel_dot_mag_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+    vel_dot_mag_rr = v1_rr * B1_rr + v2_rr * B2_rr + v3_rr * B3_rr
+
+    # Compute the necessary mean values needed for either direction
+    rho_avg = 0.5 * (rho_ll + rho_rr)
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    beta_mean = ln_mean(beta_ll, beta_rr)
+    beta_avg = 0.5 * (beta_ll + beta_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_mean = 0.5 * rho_avg / beta_avg
+    B1_avg = 0.5 * (B1_ll + B1_rr)
+    B2_avg = 0.5 * (B2_ll + B2_rr)
+    B3_avg = 0.5 * (B3_ll + B3_rr)
+    psi_avg = 0.5 * (psi_ll + psi_rr)
+    vel_norm_avg = 0.5 * (vel_norm_ll + vel_norm_rr)
+    mag_norm_avg = 0.5 * (mag_norm_ll + mag_norm_rr)
+    vel_dot_mag_avg = 0.5 * (vel_dot_mag_ll + vel_dot_mag_rr)
+
+    # Calculate fluxes depending on orientation with specific direction averages
+    if orientation == 1
+        f1 = rho_mean * v1_avg
+        f2 = f1 * v1_avg + p_mean + 0.5 * mag_norm_avg - B1_avg * B1_avg
+        f3 = f1 * v2_avg - B1_avg * B2_avg
+        f4 = f1 * v3_avg - B1_avg * B3_avg
+        f6 = equations.c_h * psi_avg
+        f7 = v1_avg * B2_avg - v2_avg * B1_avg
+        f8 = v1_avg * B3_avg - v3_avg * B1_avg
+        f9 = equations.c_h * B1_avg
+        # total energy flux is complicated and involves the previous eight components
+        psi_B1_avg = 0.5 * (B1_ll * psi_ll + B1_rr * psi_rr)
+        v1_mag_avg = 0.5 * (v1_ll * mag_norm_ll + v1_rr * mag_norm_rr)
+        f5 = (f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - vel_norm_avg) +
+              f2 * v1_avg + f3 * v2_avg +
+              f4 * v3_avg + f6 * B1_avg + f7 * B2_avg + f8 * B3_avg + f9 * psi_avg -
+              0.5 * v1_mag_avg +
+              B1_avg * vel_dot_mag_avg - equations.c_h * psi_B1_avg)
+    elseif orientation == 2
+        f1 = rho_mean * v2_avg
+        f2 = f1 * v1_avg - B2_avg * B1_avg
+        f3 = f1 * v2_avg + p_mean + 0.5 * mag_norm_avg - B2_avg * B2_avg
+        f4 = f1 * v3_avg - B2_avg * B3_avg
+        f6 = v2_avg * B1_avg - v1_avg * B2_avg
+        f7 = equations.c_h * psi_avg
+        f8 = v2_avg * B3_avg - v3_avg * B2_avg
+        f9 = equations.c_h * B2_avg
+        # total energy flux is complicated and involves the previous eight components
+        psi_B2_avg = 0.5 * (B2_ll * psi_ll + B2_rr * psi_rr)
+        v2_mag_avg = 0.5 * (v2_ll * mag_norm_ll + v2_rr * mag_norm_rr)
+        f5 = (f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - vel_norm_avg) +
+              f2 * v1_avg + f3 * v2_avg +
+              f4 * v3_avg + f6 * B1_avg + f7 * B2_avg + f8 * B3_avg + f9 * psi_avg -
+              0.5 * v2_mag_avg +
+              B2_avg * vel_dot_mag_avg - equations.c_h * psi_B2_avg)
+    else
+        f1 = rho_mean * v3_avg
+        f2 = f1 * v1_avg - B3_avg * B1_avg
+        f3 = f1 * v2_avg - B3_avg * B2_avg
+        f4 = f1 * v3_avg + p_mean + 0.5 * mag_norm_avg - B3_avg * B3_avg
+        f6 = v3_avg * B1_avg - v1_avg * B3_avg
+        f7 = v3_avg * B2_avg - v2_avg * B3_avg
+        f8 = equations.c_h * psi_avg
+        f9 = equations.c_h * B3_avg
+        # total energy flux is complicated and involves the previous eight components
+        psi_B3_avg = 0.5 * (B3_ll * psi_ll + B3_rr * psi_rr)
+        v3_mag_avg = 0.5 * (v3_ll * mag_norm_ll + v3_rr * mag_norm_rr)
+        f5 = (f1 * 0.5 * (1 / (equations.gamma - 1) / beta_mean - vel_norm_avg) +
+              f2 * v1_avg + f3 * v2_avg +
+              f4 * v3_avg + f6 * B1_avg + f7 * B2_avg + f8 * B3_avg + f9 * psi_avg -
+              0.5 * v3_mag_avg +
+              B3_avg * vel_dot_mag_avg - equations.c_h * psi_B3_avg)
+    end
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
 end
 
-
 """
     flux_hindenlang_gassner(u_ll, u_rr, orientation_or_normal_direction,
                             equations::IdealGlmMhdEquations3D)
@@ -429,198 +444,232 @@ Hindenlang and Gassner (2019), extending [`flux_ranocha`](@ref) to the MHD equat
   the Euler Equations Using Summation-by-Parts Operators
   [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42)
 """
-@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations3D)
-  # Unpack left and right states
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations)
-
-  # Compute the necessary mean values needed for either direction
-  rho_mean = ln_mean(rho_ll, rho_rr)
-  # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-  # in exact arithmetic since
-  #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-  #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-  inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-  v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-  v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-  v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-  p_avg   = 0.5 * (  p_ll +   p_rr)
-  psi_avg = 0.5 * (psi_ll + psi_rr)
-  velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-  magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
-
-  # Calculate fluxes depending on orientation with specific direction averages
-  if orientation == 1
-    f1 = rho_mean * v1_avg
-    f2 = f1 * v1_avg + p_avg + magnetic_square_avg - 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll)
-    f3 = f1 * v2_avg                               - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll)
-    f4 = f1 * v3_avg                               - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll)
-    #f5 below
-    f6 = equations.c_h * psi_avg
-    f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr)
-    f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr)
-    f9 = equations.c_h * 0.5 * (B1_ll + B1_rr)
-    # total energy flux is complicated and involves the previous components
-    f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-          + 0.5 * (
-            +   p_ll * v1_rr +  p_rr * v1_ll
-            + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll)
-            + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll)
-            - (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll)
-            - (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll)
-            + equations.c_h * (B1_ll * psi_rr + B1_rr * psi_ll) ) )
-  elseif orientation == 2
-    f1 = rho_mean * v2_avg
-    f2 = f1 * v1_avg                               - 0.5 * (B2_ll * B1_rr + B2_rr * B1_ll)
-    f3 = f1 * v2_avg + p_avg + magnetic_square_avg - 0.5 * (B2_ll * B2_rr + B2_rr * B2_ll)
-    f4 = f1 * v3_avg                               - 0.5 * (B2_ll * B3_rr + B2_rr * B3_ll)
-    #f5 below
-    f6 = 0.5 * (v2_ll * B1_ll - v1_ll * B2_ll + v2_rr * B1_rr - v1_rr * B2_rr)
-    f7 = equations.c_h * psi_avg
-    f8 = 0.5 * (v2_ll * B3_ll - v3_ll * B2_ll + v2_rr * B3_rr - v3_rr * B2_rr)
-    f9 = equations.c_h * 0.5 * (B2_ll + B2_rr)
-    # total energy flux is complicated and involves the previous components
-    f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-          + 0.5 * (
-            +   p_ll * v2_rr +  p_rr * v2_ll
-            + (v2_ll * B1_ll * B1_rr + v2_rr * B1_rr * B1_ll)
-            + (v2_ll * B3_ll * B3_rr + v2_rr * B3_rr * B3_ll)
-            - (v1_ll * B2_ll * B1_rr + v1_rr * B2_rr * B1_ll)
-            - (v3_ll * B2_ll * B3_rr + v3_rr * B2_rr * B3_ll)
-            + equations.c_h * (B2_ll * psi_rr + B2_rr * psi_ll) ) )
-  else # orientation == 3
-    f1 = rho_mean * v3_avg
-    f2 = f1 * v1_avg                               - 0.5 * (B3_ll * B1_rr + B3_rr * B1_ll)
-    f3 = f1 * v2_avg                               - 0.5 * (B3_ll * B2_rr + B3_rr * B2_ll)
-    f4 = f1 * v3_avg + p_avg + magnetic_square_avg - 0.5 * (B3_ll * B3_rr + B3_rr * B3_ll)
-    #f5 below
-    f6 = 0.5 * (v3_ll * B1_ll - v1_ll * B3_ll + v3_rr * B1_rr - v1_rr * B3_rr)
-    f7 = 0.5 * (v3_ll * B2_ll - v2_ll * B3_ll + v3_rr * B2_rr - v2_rr * B3_rr)
-    f8 = equations.c_h * psi_avg
-    f9 = equations.c_h * 0.5 * (B3_ll + B3_rr)
-    # total energy flux is complicated and involves the previous components
-    f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-          + 0.5 * (
-            +   p_ll * v3_rr +  p_rr * v3_ll
-            + (v3_ll * B1_ll * B1_rr + v3_rr * B1_rr * B1_ll)
-            + (v3_ll * B2_ll * B2_rr + v3_rr * B2_rr * B2_ll)
-            - (v1_ll * B3_ll * B1_rr + v1_rr * B3_rr * B1_ll)
-            - (v2_ll * B3_ll * B2_rr + v2_rr * B3_rr * B2_ll)
-            + equations.c_h * (B3_ll * psi_rr + B3_rr * psi_ll) ) )
-  end
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
+@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer,
+                                         equations::IdealGlmMhdEquations3D)
+    # Unpack left and right states
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll,
+                                                                               equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr,
+                                                                               equations)
+
+    # Compute the necessary mean values needed for either direction
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+    # in exact arithmetic since
+    #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+    #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    psi_avg = 0.5 * (psi_ll + psi_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+    magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
+
+    # Calculate fluxes depending on orientation with specific direction averages
+    if orientation == 1
+        f1 = rho_mean * v1_avg
+        f2 = f1 * v1_avg + p_avg + magnetic_square_avg -
+             0.5 * (B1_ll * B1_rr + B1_rr * B1_ll)
+        f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll)
+        f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll)
+        #f5 below
+        f6 = equations.c_h * psi_avg
+        f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr)
+        f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr)
+        f9 = equations.c_h * 0.5 * (B1_ll + B1_rr)
+        # total energy flux is complicated and involves the previous components
+        f5 = (f1 *
+              (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+              +
+              0.5 * (+p_ll * v1_rr + p_rr * v1_ll
+               + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll)
+               + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll)
+               -
+               (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll)
+               -
+               (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll)
+               +
+               equations.c_h * (B1_ll * psi_rr + B1_rr * psi_ll)))
+    elseif orientation == 2
+        f1 = rho_mean * v2_avg
+        f2 = f1 * v1_avg - 0.5 * (B2_ll * B1_rr + B2_rr * B1_ll)
+        f3 = f1 * v2_avg + p_avg + magnetic_square_avg -
+             0.5 * (B2_ll * B2_rr + B2_rr * B2_ll)
+        f4 = f1 * v3_avg - 0.5 * (B2_ll * B3_rr + B2_rr * B3_ll)
+        #f5 below
+        f6 = 0.5 * (v2_ll * B1_ll - v1_ll * B2_ll + v2_rr * B1_rr - v1_rr * B2_rr)
+        f7 = equations.c_h * psi_avg
+        f8 = 0.5 * (v2_ll * B3_ll - v3_ll * B2_ll + v2_rr * B3_rr - v3_rr * B2_rr)
+        f9 = equations.c_h * 0.5 * (B2_ll + B2_rr)
+        # total energy flux is complicated and involves the previous components
+        f5 = (f1 *
+              (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+              +
+              0.5 * (+p_ll * v2_rr + p_rr * v2_ll
+               + (v2_ll * B1_ll * B1_rr + v2_rr * B1_rr * B1_ll)
+               + (v2_ll * B3_ll * B3_rr + v2_rr * B3_rr * B3_ll)
+               -
+               (v1_ll * B2_ll * B1_rr + v1_rr * B2_rr * B1_ll)
+               -
+               (v3_ll * B2_ll * B3_rr + v3_rr * B2_rr * B3_ll)
+               +
+               equations.c_h * (B2_ll * psi_rr + B2_rr * psi_ll)))
+    else # orientation == 3
+        f1 = rho_mean * v3_avg
+        f2 = f1 * v1_avg - 0.5 * (B3_ll * B1_rr + B3_rr * B1_ll)
+        f3 = f1 * v2_avg - 0.5 * (B3_ll * B2_rr + B3_rr * B2_ll)
+        f4 = f1 * v3_avg + p_avg + magnetic_square_avg -
+             0.5 * (B3_ll * B3_rr + B3_rr * B3_ll)
+        #f5 below
+        f6 = 0.5 * (v3_ll * B1_ll - v1_ll * B3_ll + v3_rr * B1_rr - v1_rr * B3_rr)
+        f7 = 0.5 * (v3_ll * B2_ll - v2_ll * B3_ll + v3_rr * B2_rr - v2_rr * B3_rr)
+        f8 = equations.c_h * psi_avg
+        f9 = equations.c_h * 0.5 * (B3_ll + B3_rr)
+        # total energy flux is complicated and involves the previous components
+        f5 = (f1 *
+              (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+              +
+              0.5 * (+p_ll * v3_rr + p_rr * v3_ll
+               + (v3_ll * B1_ll * B1_rr + v3_rr * B1_rr * B1_ll)
+               + (v3_ll * B2_ll * B2_rr + v3_rr * B2_rr * B2_ll)
+               -
+               (v1_ll * B3_ll * B1_rr + v1_rr * B3_rr * B1_ll)
+               -
+               (v2_ll * B3_ll * B2_rr + v2_rr * B3_rr * B2_ll)
+               +
+               equations.c_h * (B3_ll * psi_rr + B3_rr * psi_ll)))
+    end
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
 end
 
-@inline function flux_hindenlang_gassner(u_ll, u_rr, normal_direction::AbstractVector, equations::IdealGlmMhdEquations3D)
-  # Unpack left and right states
-  rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations)
-  rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations)
-  v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] + v3_ll * normal_direction[3]
-  v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] + v3_rr * normal_direction[3]
-  B_dot_n_ll = B1_ll * normal_direction[1] + B2_ll * normal_direction[2] + B3_ll * normal_direction[3]
-  B_dot_n_rr = B1_rr * normal_direction[1] + B2_rr * normal_direction[2] + B3_rr * normal_direction[3]
-
-  # Compute the necessary mean values needed for either direction
-  rho_mean = ln_mean(rho_ll, rho_rr)
-  # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-  # in exact arithmetic since
-  #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-  #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-  inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-  v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-  v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-  v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-  p_avg   = 0.5 * (  p_ll +   p_rr)
-  psi_avg = 0.5 * (psi_ll + psi_rr)
-  velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-  magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
-
-  # Calculate fluxes depending on normal_direction
-  f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
-  f2 = ( f1 * v1_avg + (p_avg + magnetic_square_avg) * normal_direction[1]
-        - 0.5 * (B_dot_n_ll * B1_rr + B_dot_n_rr * B1_ll) )
-  f3 = ( f1 * v2_avg + (p_avg + magnetic_square_avg) * normal_direction[2]
-        - 0.5 * (B_dot_n_ll * B2_rr + B_dot_n_rr * B2_ll) )
-  f4 = ( f1 * v3_avg + (p_avg + magnetic_square_avg) * normal_direction[3]
-        - 0.5 * (B_dot_n_ll * B3_rr + B_dot_n_rr * B3_ll) )
-  #f5 below
-  f6 = ( equations.c_h * psi_avg * normal_direction[1]
-        + 0.5 * (v_dot_n_ll * B1_ll - v1_ll * B_dot_n_ll +
-                 v_dot_n_rr * B1_rr - v1_rr * B_dot_n_rr) )
-  f7 = ( equations.c_h * psi_avg * normal_direction[2]
-        + 0.5 * (v_dot_n_ll * B2_ll - v2_ll * B_dot_n_ll +
-                 v_dot_n_rr * B2_rr - v2_rr * B_dot_n_rr) )
-  f8 = ( equations.c_h * psi_avg * normal_direction[3]
-        + 0.5 * (v_dot_n_ll * B3_ll - v3_ll * B_dot_n_ll +
-                 v_dot_n_rr * B3_rr - v3_rr * B_dot_n_rr) )
-  f9 = equations.c_h * 0.5 * (B_dot_n_ll + B_dot_n_rr)
-  # total energy flux is complicated and involves the previous components
-  f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-        + 0.5 * (
-          +   p_ll * v_dot_n_rr +  p_rr * v_dot_n_ll
-          + (v_dot_n_ll * B1_ll * B1_rr + v_dot_n_rr * B1_rr * B1_ll)
-          + (v_dot_n_ll * B2_ll * B2_rr + v_dot_n_rr * B2_rr * B2_ll)
-          + (v_dot_n_ll * B3_ll * B3_rr + v_dot_n_rr * B3_rr * B3_ll)
-          - (v1_ll * B_dot_n_ll * B1_rr + v1_rr * B_dot_n_rr * B1_ll)
-          - (v2_ll * B_dot_n_ll * B2_rr + v2_rr * B_dot_n_rr * B2_ll)
-          - (v3_ll * B_dot_n_ll * B3_rr + v3_rr * B_dot_n_rr * B3_ll)
-          + equations.c_h * (B_dot_n_ll * psi_rr + B_dot_n_rr * psi_ll) ) )
-
-  return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
+@inline function flux_hindenlang_gassner(u_ll, u_rr, normal_direction::AbstractVector,
+                                         equations::IdealGlmMhdEquations3D)
+    # Unpack left and right states
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll,
+                                                                               equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr,
+                                                                               equations)
+    v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2] +
+                 v3_ll * normal_direction[3]
+    v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2] +
+                 v3_rr * normal_direction[3]
+    B_dot_n_ll = B1_ll * normal_direction[1] + B2_ll * normal_direction[2] +
+                 B3_ll * normal_direction[3]
+    B_dot_n_rr = B1_rr * normal_direction[1] + B2_rr * normal_direction[2] +
+                 B3_rr * normal_direction[3]
+
+    # Compute the necessary mean values needed for either direction
+    rho_mean = ln_mean(rho_ll, rho_rr)
+    # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+    # in exact arithmetic since
+    #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+    #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    psi_avg = 0.5 * (psi_ll + psi_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+    magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
+
+    # Calculate fluxes depending on normal_direction
+    f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
+    f2 = (f1 * v1_avg + (p_avg + magnetic_square_avg) * normal_direction[1]
+          -
+          0.5 * (B_dot_n_ll * B1_rr + B_dot_n_rr * B1_ll))
+    f3 = (f1 * v2_avg + (p_avg + magnetic_square_avg) * normal_direction[2]
+          -
+          0.5 * (B_dot_n_ll * B2_rr + B_dot_n_rr * B2_ll))
+    f4 = (f1 * v3_avg + (p_avg + magnetic_square_avg) * normal_direction[3]
+          -
+          0.5 * (B_dot_n_ll * B3_rr + B_dot_n_rr * B3_ll))
+    #f5 below
+    f6 = (equations.c_h * psi_avg * normal_direction[1]
+          +
+          0.5 * (v_dot_n_ll * B1_ll - v1_ll * B_dot_n_ll +
+           v_dot_n_rr * B1_rr - v1_rr * B_dot_n_rr))
+    f7 = (equations.c_h * psi_avg * normal_direction[2]
+          +
+          0.5 * (v_dot_n_ll * B2_ll - v2_ll * B_dot_n_ll +
+           v_dot_n_rr * B2_rr - v2_rr * B_dot_n_rr))
+    f8 = (equations.c_h * psi_avg * normal_direction[3]
+          +
+          0.5 * (v_dot_n_ll * B3_ll - v3_ll * B_dot_n_ll +
+           v_dot_n_rr * B3_rr - v3_rr * B_dot_n_rr))
+    f9 = equations.c_h * 0.5 * (B_dot_n_ll + B_dot_n_rr)
+    # total energy flux is complicated and involves the previous components
+    f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+          +
+          0.5 * (+p_ll * v_dot_n_rr + p_rr * v_dot_n_ll
+           + (v_dot_n_ll * B1_ll * B1_rr + v_dot_n_rr * B1_rr * B1_ll)
+           + (v_dot_n_ll * B2_ll * B2_rr + v_dot_n_rr * B2_rr * B2_ll)
+           + (v_dot_n_ll * B3_ll * B3_rr + v_dot_n_rr * B3_rr * B3_ll)
+           -
+           (v1_ll * B_dot_n_ll * B1_rr + v1_rr * B_dot_n_rr * B1_ll)
+           -
+           (v2_ll * B_dot_n_ll * B2_rr + v2_rr * B_dot_n_rr * B2_ll)
+           -
+           (v3_ll * B_dot_n_ll * B3_rr + v3_rr * B_dot_n_rr * B3_ll)
+           +
+           equations.c_h * (B_dot_n_ll * psi_rr + B_dot_n_rr * psi_ll)))
+
+    return SVector(f1, f2, f3, f4, f5, f6, f7, f8, f9)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations3D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr
-
-  # Calculate the left/right velocities and fast magnetoacoustic wave speeds
-  if orientation == 1
-    v_ll = rho_v1_ll / rho_ll
-    v_rr = rho_v1_rr / rho_rr
-  elseif orientation == 2
-    v_ll = rho_v2_ll / rho_ll
-    v_rr = rho_v2_rr / rho_rr
-  else # orientation == 3
-    v_ll = rho_v3_ll / rho_ll
-    v_rr = rho_v3_rr / rho_rr
-  end
-  cf_ll = calc_fast_wavespeed(u_ll, orientation, equations)
-  cf_rr = calc_fast_wavespeed(u_rr, orientation, equations)
-
-  return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::IdealGlmMhdEquations3D)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr
+
+    # Calculate the left/right velocities and fast magnetoacoustic wave speeds
+    if orientation == 1
+        v_ll = rho_v1_ll / rho_ll
+        v_rr = rho_v1_rr / rho_rr
+    elseif orientation == 2
+        v_ll = rho_v2_ll / rho_ll
+        v_rr = rho_v2_rr / rho_rr
+    else # orientation == 3
+        v_ll = rho_v3_ll / rho_ll
+        v_rr = rho_v3_rr / rho_rr
+    end
+    cf_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+    cf_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+
+    return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
 end
 
 @inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
                                      equations::IdealGlmMhdEquations3D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr
-
-  # Calculate normal velocities and fast magnetoacoustic wave speeds
-  # left
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-  v_ll = (  v1_ll * normal_direction[1]
-          + v2_ll * normal_direction[2]
-          + v3_ll * normal_direction[3] )
-  cf_ll = calc_fast_wavespeed(u_ll, normal_direction, equations)
-  # right
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
-  v3_rr = rho_v3_rr / rho_rr
-  v_rr = (  v1_rr * normal_direction[1]
-          + v2_rr * normal_direction[2]
-          + v3_rr * normal_direction[3] )
-  cf_rr = calc_fast_wavespeed(u_rr, normal_direction, equations)
-
-  # wave speeds already scaled by norm(normal_direction) in [`calc_fast_wavespeed`](@ref)
-  return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr
+
+    # Calculate normal velocities and fast magnetoacoustic wave speeds
+    # left
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v_ll = (v1_ll * normal_direction[1]
+            + v2_ll * normal_direction[2]
+            + v3_ll * normal_direction[3])
+    cf_ll = calc_fast_wavespeed(u_ll, normal_direction, equations)
+    # right
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+    v_rr = (v1_rr * normal_direction[1]
+            + v2_rr * normal_direction[2]
+            + v3_rr * normal_direction[3])
+    cf_rr = calc_fast_wavespeed(u_rr, normal_direction, equations)
+
+    # wave speeds already scaled by norm(normal_direction) in [`calc_fast_wavespeed`](@ref)
+    return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
 end
 
-
 """
     min_max_speed_naive(u_ll, u_rr, orientation_or_normal_direction, equations::IdealGlmMhdEquations3D)
 
@@ -629,270 +678,277 @@ Calculate minimum and maximum wave speeds for HLL-type fluxes as in
   An HLLC Riemann solver for magneto-hydrodynamics
   [DOI: 10.1016/j.jcp.2004.08.020](https://doi.org/10.1016/j.jcp.2004.08.020)
 """
-@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations3D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr
-
-  # Calculate primitive variables and speed of sound
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
-  v3_rr = rho_v3_rr / rho_rr
-
-  # Approximate the left-most and right-most eigenvalues in the Riemann fan
-  if orientation == 1 # x-direction
-    c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
-    c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
-    vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations)
-    λ_min = min(v1_ll - c_f_ll, vel_roe - c_f_roe)
-    λ_max = max(v1_rr + c_f_rr, vel_roe + c_f_roe)
-  elseif orientation == 2 # y-direction
-    c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
-    c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
-    vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations)
-    λ_min = min(v2_ll - c_f_ll, vel_roe - c_f_roe)
-    λ_max = max(v2_rr + c_f_rr, vel_roe + c_f_roe)
-  else # z-direction
-    c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
-    c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
-    vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations)
-    λ_min = min(v3_ll - c_f_ll, vel_roe - c_f_roe)
-    λ_max = max(v3_rr + c_f_rr, vel_roe + c_f_roe)
-  end
-
-  return λ_min, λ_max
+@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::IdealGlmMhdEquations3D)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr
+
+    # Calculate primitive variables and speed of sound
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+
+    # Approximate the left-most and right-most eigenvalues in the Riemann fan
+    if orientation == 1 # x-direction
+        c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+        c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+        vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations)
+        λ_min = min(v1_ll - c_f_ll, vel_roe - c_f_roe)
+        λ_max = max(v1_rr + c_f_rr, vel_roe + c_f_roe)
+    elseif orientation == 2 # y-direction
+        c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+        c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+        vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations)
+        λ_min = min(v2_ll - c_f_ll, vel_roe - c_f_roe)
+        λ_max = max(v2_rr + c_f_rr, vel_roe + c_f_roe)
+    else # z-direction
+        c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+        c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+        vel_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, orientation, equations)
+        λ_min = min(v3_ll - c_f_ll, vel_roe - c_f_roe)
+        λ_max = max(v3_rr + c_f_rr, vel_roe + c_f_roe)
+    end
+
+    return λ_min, λ_max
 end
 
 @inline function min_max_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
                                      equations::IdealGlmMhdEquations3D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr
-
-  # Calculate primitive velocity variables
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
-  v3_rr = rho_v3_rr / rho_rr
-
-  v_normal_ll = (v1_ll * normal_direction[1] +
-                 v2_ll * normal_direction[2] +
-                 v3_ll * normal_direction[3])
-  v_normal_rr = (v1_rr * normal_direction[1] +
-                 v2_rr * normal_direction[2] +
-                 v3_rr * normal_direction[3])
-
-  c_f_ll = calc_fast_wavespeed(u_ll, normal_direction, equations)
-  c_f_rr = calc_fast_wavespeed(u_rr, normal_direction, equations)
-  v_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction, equations)
-
-  # Estimate the min/max eigenvalues in the normal direction
-  λ_min = min(v_normal_ll - c_f_ll, v_roe - c_f_roe)
-  λ_max = max(v_normal_rr + c_f_rr, v_roe + c_f_roe)
-
-  return λ_min, λ_max
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr
+
+    # Calculate primitive velocity variables
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+
+    v_normal_ll = (v1_ll * normal_direction[1] +
+                   v2_ll * normal_direction[2] +
+                   v3_ll * normal_direction[3])
+    v_normal_rr = (v1_rr * normal_direction[1] +
+                   v2_rr * normal_direction[2] +
+                   v3_rr * normal_direction[3])
+
+    c_f_ll = calc_fast_wavespeed(u_ll, normal_direction, equations)
+    c_f_rr = calc_fast_wavespeed(u_rr, normal_direction, equations)
+    v_roe, c_f_roe = calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction, equations)
+
+    # Estimate the min/max eigenvalues in the normal direction
+    λ_min = min(v_normal_ll - c_f_ll, v_roe - c_f_roe)
+    λ_max = max(v_normal_rr + c_f_rr, v_roe + c_f_roe)
+
+    return λ_min, λ_max
 end
 
-
 # Rotate normal vector to x-axis; normal, tangent1 and tangent2 need to be orthonormal
 # Called inside `FluxRotated` in `numerical_fluxes.jl` so the directions
 # has been normalized prior to this rotation of the state vector
 # Note, for ideal GLM-MHD only the velocities and magnetic field variables rotate
-@inline function rotate_to_x(u, normal_vector, tangent1, tangent2, equations::IdealGlmMhdEquations3D)
-  # Multiply with [ 1   0        0       0   0   0        0       0   0;
-  #                 0   ―  normal_vector ―   0   0        0       0   0;
-  #                 0   ―    tangent1    ―   0   0        0       0   0;
-  #                 0   ―    tangent2    ―   0   0        0       0   0;
-  #                 0   0        0       0   1   0        0       0   0;
-  #                 0   0        0       0   0   ―  normal_vector ―   0;
-  #                 0   0        0       0   0   ―    tangent1    ―   0;
-  #                 0   0        0       0   0   ―    tangent2    ―   0;
-  #                 0   0        0       0   0   0        0       0   1 ]
-  return SVector(u[1],
-                 normal_vector[1] * u[2] + normal_vector[2] * u[3] + normal_vector[3] * u[4],
-                 tangent1[1] * u[2] + tangent1[2] * u[3] + tangent1[3] * u[4],
-                 tangent2[1] * u[2] + tangent2[2] * u[3] + tangent2[3] * u[4],
-                 u[5],
-                 normal_vector[1] * u[6] + normal_vector[2] * u[7] + normal_vector[3] * u[8],
-                 tangent1[1] * u[6] + tangent1[2] * u[7] + tangent1[3] * u[8],
-                 tangent2[1] * u[6] + tangent2[2] * u[7] + tangent2[3] * u[8],
-                 u[9])
-
+@inline function rotate_to_x(u, normal_vector, tangent1, tangent2,
+                             equations::IdealGlmMhdEquations3D)
+    # Multiply with [ 1   0        0       0   0   0        0       0   0;
+    #                 0   ―  normal_vector ―   0   0        0       0   0;
+    #                 0   ―    tangent1    ―   0   0        0       0   0;
+    #                 0   ―    tangent2    ―   0   0        0       0   0;
+    #                 0   0        0       0   1   0        0       0   0;
+    #                 0   0        0       0   0   ―  normal_vector ―   0;
+    #                 0   0        0       0   0   ―    tangent1    ―   0;
+    #                 0   0        0       0   0   ―    tangent2    ―   0;
+    #                 0   0        0       0   0   0        0       0   1 ]
+    return SVector(u[1],
+                   normal_vector[1] * u[2] + normal_vector[2] * u[3] +
+                   normal_vector[3] * u[4],
+                   tangent1[1] * u[2] + tangent1[2] * u[3] + tangent1[3] * u[4],
+                   tangent2[1] * u[2] + tangent2[2] * u[3] + tangent2[3] * u[4],
+                   u[5],
+                   normal_vector[1] * u[6] + normal_vector[2] * u[7] +
+                   normal_vector[3] * u[8],
+                   tangent1[1] * u[6] + tangent1[2] * u[7] + tangent1[3] * u[8],
+                   tangent2[1] * u[6] + tangent2[2] * u[7] + tangent2[3] * u[8],
+                   u[9])
 end
 
-
 # Rotate x-axis to normal vector; normal, tangent1 and tangent2 need to be orthonormal
 # Called inside `FluxRotated` in `numerical_fluxes.jl` so the directions
 # has been normalized prior to this back-rotation of the state vector
 # Note, for ideal GLM-MHD only the velocities and magnetic field variables back-rotate
-@inline function rotate_from_x(u, normal_vector, tangent1, tangent2, equations::IdealGlmMhdEquations3D)
-  # Multiply with [ 1        0          0        0      0        0          0        0      0;
-  #                 0        |          |        |      0        0          0        0      0;
-  #                 0  normal_vector tangent1 tangent2  0        0          0        0      0;
-  #                 0        |          |        |      0        0          0        0      0;
-  #                 0        0          0        0      1        0          0        0      0;
-  #                 0        0          0        0      0        |          |        |      0;
-  #                 0        0          0        0      0  normal_vector tangent1 tangent2  0;
-  #                 0        0          0        0      0        |          |        |      0;
-  #                 0        0          0        0      0        0          0        0      1 ]
-  return SVector(u[1],
-                 normal_vector[1] * u[2] + tangent1[1] * u[3] + tangent2[1] * u[4],
-                 normal_vector[2] * u[2] + tangent1[2] * u[3] + tangent2[2] * u[4],
-                 normal_vector[3] * u[2] + tangent1[3] * u[3] + tangent2[3] * u[4],
-                 u[5],
-                 normal_vector[1] * u[6] + tangent1[1] * u[7] + tangent2[1] * u[8],
-                 normal_vector[2] * u[6] + tangent1[2] * u[7] + tangent2[2] * u[8],
-                 normal_vector[3] * u[6] + tangent1[3] * u[7] + tangent2[3] * u[8],
-                 u[9])
+@inline function rotate_from_x(u, normal_vector, tangent1, tangent2,
+                               equations::IdealGlmMhdEquations3D)
+    # Multiply with [ 1        0          0        0      0        0          0        0      0;
+    #                 0        |          |        |      0        0          0        0      0;
+    #                 0  normal_vector tangent1 tangent2  0        0          0        0      0;
+    #                 0        |          |        |      0        0          0        0      0;
+    #                 0        0          0        0      1        0          0        0      0;
+    #                 0        0          0        0      0        |          |        |      0;
+    #                 0        0          0        0      0  normal_vector tangent1 tangent2  0;
+    #                 0        0          0        0      0        |          |        |      0;
+    #                 0        0          0        0      0        0          0        0      1 ]
+    return SVector(u[1],
+                   normal_vector[1] * u[2] + tangent1[1] * u[3] + tangent2[1] * u[4],
+                   normal_vector[2] * u[2] + tangent1[2] * u[3] + tangent2[2] * u[4],
+                   normal_vector[3] * u[2] + tangent1[3] * u[3] + tangent2[3] * u[4],
+                   u[5],
+                   normal_vector[1] * u[6] + tangent1[1] * u[7] + tangent2[1] * u[8],
+                   normal_vector[2] * u[6] + tangent1[2] * u[7] + tangent2[2] * u[8],
+                   normal_vector[3] * u[6] + tangent1[3] * u[7] + tangent2[3] * u[8],
+                   u[9])
 end
 
-
 @inline function max_abs_speeds(u, equations::IdealGlmMhdEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, _ = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  cf_x_direction = calc_fast_wavespeed(u, 1, equations)
-  cf_y_direction = calc_fast_wavespeed(u, 2, equations)
-  cf_z_direction = calc_fast_wavespeed(u, 3, equations)
-
-  return abs(v1) + cf_x_direction, abs(v2) + cf_y_direction, abs(v3) + cf_z_direction
+    rho, rho_v1, rho_v2, rho_v3, _ = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    cf_x_direction = calc_fast_wavespeed(u, 1, equations)
+    cf_y_direction = calc_fast_wavespeed(u, 2, equations)
+    cf_z_direction = calc_fast_wavespeed(u, 3, equations)
+
+    return abs(v1) + cf_x_direction, abs(v2) + cf_y_direction, abs(v3) + cf_z_direction
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::IdealGlmMhdEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
 
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3
-                                              + B1 * B1 + B2 * B2 + B3 * B3
-                                              + psi * psi))
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    p = (equations.gamma - 1) * (rho_e -
+         0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3
+          + B1 * B1 + B2 * B2 + B3 * B3
+          + psi * psi))
 
-  return SVector(rho, v1, v2, v3, p, B1, B2, B3, psi)
+    return SVector(rho, v1, v2, v3, p, B1, B2, B3, psi)
 end
 
-
 # Convert conservative variables to entropy
 @inline function cons2entropy(u, equations::IdealGlmMhdEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  v_square = v1^2 + v2^2 + v3^2
-  p = (equations.gamma - 1) * (rho_e - 0.5*rho*v_square - 0.5*(B1^2 + B2^2 + B3^2) - 0.5*psi^2)
-  s = log(p) - equations.gamma*log(rho)
-  rho_p = rho / p
-
-  w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square
-  w2 = rho_p * v1
-  w3 = rho_p * v2
-  w4 = rho_p * v3
-  w5 = -rho_p
-  w6 = rho_p * B1
-  w7 = rho_p * B2
-  w8 = rho_p * B3
-  w9 = rho_p * psi
-
-  return SVector(w1, w2, w3, w4, w5, w6, w7, w8, w9)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    v_square = v1^2 + v2^2 + v3^2
+    p = (equations.gamma - 1) *
+        (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2) - 0.5 * psi^2)
+    s = log(p) - equations.gamma * log(rho)
+    rho_p = rho / p
+
+    w1 = (equations.gamma - s) * equations.inv_gamma_minus_one - 0.5 * rho_p * v_square
+    w2 = rho_p * v1
+    w3 = rho_p * v2
+    w4 = rho_p * v3
+    w5 = -rho_p
+    w6 = rho_p * B1
+    w7 = rho_p * B2
+    w8 = rho_p * B3
+    w9 = rho_p * psi
+
+    return SVector(w1, w2, w3, w4, w5, w6, w7, w8, w9)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::IdealGlmMhdEquations3D)
-  rho, v1, v2, v3, p, B1, B2, B3, psi = prim
+    rho, v1, v2, v3, p, B1, B2, B3, psi = prim
 
-  rho_v1 = rho * v1
-  rho_v2 = rho * v2
-  rho_v3 = rho * v3
-  rho_e = p * equations.inv_gamma_minus_one + 0.5 * (rho_v1*v1 + rho_v2*v2 + rho_v3*v3) +
-                                  0.5 * (B1^2 + B2^2 + B3^2) + 0.5 * psi^2
+    rho_v1 = rho * v1
+    rho_v2 = rho * v2
+    rho_v3 = rho * v3
+    rho_e = p * equations.inv_gamma_minus_one +
+            0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) +
+            0.5 * (B1^2 + B2^2 + B3^2) + 0.5 * psi^2
 
-  return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi)
+    return SVector(rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi)
 end
 
-
 @inline function density(u, equations::IdealGlmMhdEquations3D)
-  return u[1]
+    return u[1]
 end
 
 @inline function pressure(u, equations::IdealGlmMhdEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  p = (equations.gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
-                                   - 0.5 * (B1^2 + B2^2 + B3^2)
-                                   - 0.5 * psi^2)
-  return p
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
+         -
+         0.5 * (B1^2 + B2^2 + B3^2)
+         -
+         0.5 * psi^2)
+    return p
 end
 
 @inline function density_pressure(u, equations::IdealGlmMhdEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  p = (equations.gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
-                                   - 0.5 * (B1^2 + B2^2 + B3^2)
-                                   - 0.5 * psi^2)
-  return rho * p
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
+         -
+         0.5 * (B1^2 + B2^2 + B3^2)
+         -
+         0.5 * psi^2)
+    return rho * p
 end
 
-
 # Compute the fastest wave speed for ideal MHD equations: c_f, the fast magnetoacoustic eigenvalue
-@inline function calc_fast_wavespeed(cons, orientation::Integer, equations::IdealGlmMhdEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
-  mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
-  p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2)
-  a_square = equations.gamma * p / rho
-  sqrt_rho = sqrt(rho)
-  b1 = B1 / sqrt_rho
-  b2 = B2 / sqrt_rho
-  b3 = B3 / sqrt_rho
-  b_square = b1 * b1 + b2 * b2 + b3 * b3
-  if orientation == 1 # x-direction
-    c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b1^2))
-  elseif orientation == 2 # y-direction
-    c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b2^2))
-  else # z-direction
-    c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b3^2))
-  end
-  return c_f
+@inline function calc_fast_wavespeed(cons, orientation::Integer,
+                                     equations::IdealGlmMhdEquations3D)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
+    mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
+    p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2)
+    a_square = equations.gamma * p / rho
+    sqrt_rho = sqrt(rho)
+    b1 = B1 / sqrt_rho
+    b2 = B2 / sqrt_rho
+    b3 = B3 / sqrt_rho
+    b_square = b1 * b1 + b2 * b2 + b3 * b3
+    if orientation == 1 # x-direction
+        c_f = sqrt(0.5 * (a_square + b_square) +
+                   0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b1^2))
+    elseif orientation == 2 # y-direction
+        c_f = sqrt(0.5 * (a_square + b_square) +
+                   0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b2^2))
+    else # z-direction
+        c_f = sqrt(0.5 * (a_square + b_square) +
+                   0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b3^2))
+    end
+    return c_f
 end
 
-@inline function calc_fast_wavespeed(cons, normal_direction::AbstractVector, equations::IdealGlmMhdEquations3D)
-  rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
-  mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
-  p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2)
-  a_square = equations.gamma * p / rho
-  sqrt_rho = sqrt(rho)
-  b1 = B1 / sqrt_rho
-  b2 = B2 / sqrt_rho
-  b3 = B3 / sqrt_rho
-  b_square = b1 * b1 + b2 * b2 + b3 * b3
-  norm_squared = (normal_direction[1] * normal_direction[1] +
-                  normal_direction[2] * normal_direction[2] +
-                  normal_direction[3] * normal_direction[3])
-  b_dot_n_squared = (b1 * normal_direction[1] +
-                     b2 * normal_direction[2] +
-                     b3 * normal_direction[3])^2 / norm_squared
-
-  c_f = sqrt(
-    (0.5 * (a_square + b_square) +
-     0.5 * sqrt((a_square + b_square)^2 - 4 * a_square * b_dot_n_squared)) * norm_squared)
-  return c_f
+@inline function calc_fast_wavespeed(cons, normal_direction::AbstractVector,
+                                     equations::IdealGlmMhdEquations3D)
+    rho, rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    kin_en = 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3)
+    mag_en = 0.5 * (B1 * B1 + B2 * B2 + B3 * B3)
+    p = (equations.gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2)
+    a_square = equations.gamma * p / rho
+    sqrt_rho = sqrt(rho)
+    b1 = B1 / sqrt_rho
+    b2 = B2 / sqrt_rho
+    b3 = B3 / sqrt_rho
+    b_square = b1 * b1 + b2 * b2 + b3 * b3
+    norm_squared = (normal_direction[1] * normal_direction[1] +
+                    normal_direction[2] * normal_direction[2] +
+                    normal_direction[3] * normal_direction[3])
+    b_dot_n_squared = (b1 * normal_direction[1] +
+                       b2 * normal_direction[2] +
+                       b3 * normal_direction[3])^2 / norm_squared
+
+    c_f = sqrt((0.5 * (a_square + b_square) +
+                0.5 * sqrt((a_square + b_square)^2 - 4 * a_square * b_dot_n_squared)) *
+               norm_squared)
+    return c_f
 end
 
-
 """
     calc_fast_wavespeed_roe(u_ll, u_rr, orientation_or_normal_direction, equations::IdealGlmMhdEquations3D)
 
@@ -902,201 +958,211 @@ Compute the fast magnetoacoustic wave speed using Roe averages as given by
   of Roe Matrices for Systems of Conservation Laws
   [DOI: 10.1006/jcph.1997.5773](https://doi.org/10.1006/jcph.1997.5773)
 """
-@inline function calc_fast_wavespeed_roe(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations3D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
-
-  # Calculate primitive variables
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-  kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll)
-  mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll
-  p_ll = (equations.gamma - 1)*(rho_e_ll - kin_en_ll - 0.5*mag_norm_ll - 0.5*psi_ll^2)
-
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
-  v3_rr = rho_v3_rr / rho_rr
-  kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr)
-  mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr
-  p_rr = (equations.gamma - 1)*(rho_e_rr - kin_en_rr - 0.5*mag_norm_rr - 0.5*psi_rr^2)
-
-  # compute total pressure which is thermal + magnetic pressures
-  p_total_ll = p_ll + 0.5 * mag_norm_ll
-  p_total_rr = p_rr + 0.5 * mag_norm_rr
-
-  # compute the Roe density averages
-  sqrt_rho_ll = sqrt(rho_ll)
-  sqrt_rho_rr = sqrt(rho_rr)
-  inv_sqrt_rho_add  = 1.0 / (sqrt_rho_ll + sqrt_rho_rr)
-  inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr)
-  rho_ll_roe =  sqrt_rho_ll * inv_sqrt_rho_add
-  rho_rr_roe =  sqrt_rho_rr * inv_sqrt_rho_add
-  # Roe averages
-  # velocities and magnetic fields
-  v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe
-  v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe
-  v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe
-  B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe
-  B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe
-  B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe
-  # enthalpy
-  H_ll  = (rho_e_ll + p_total_ll) / rho_ll
-  H_rr  = (rho_e_rr + p_total_rr) / rho_rr
-  H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe
-  # temporary variable see equation (4.12) in Cargo and Gallice
-  X = 0.5 * ( (B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2 ) * inv_sqrt_rho_add^2
-  # averaged components needed to compute c_f, the fast magnetoacoustic wave speed
-  b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum
-  a_square_roe = ((2.0 - equations.gamma) * X +
-                 (equations.gamma -1.0) * (H_roe - 0.5*(v1_roe^2 + v2_roe^2 + v3_roe^2) -
-                                          b_square_roe)) # acoustic speed
-  # finally compute the average wave speed and set the output velocity (depends on orientation)
-  if orientation == 1 # x-direction
-    c_a_roe = B1_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed
-    a_star_roe = sqrt( (a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe )
-    c_f_roe = sqrt( 0.5 * (a_square_roe + b_square_roe + a_star_roe) )
-    vel_out_roe = v1_roe
-  elseif orientation == 2 # y-direction
-    c_a_roe = B2_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed
-    a_star_roe = sqrt( (a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe )
-    c_f_roe = sqrt( 0.5 * (a_square_roe + b_square_roe + a_star_roe) )
-    vel_out_roe = v2_roe
-  else # z-direction
-    c_a_roe = B3_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed
-    a_star_roe = sqrt( (a_square_roe + b_square_roe)^2 - 4.0 * a_square_roe * c_a_roe )
-    c_f_roe = sqrt( 0.5 * (a_square_roe + b_square_roe + a_star_roe) )
-    vel_out_roe = v3_roe
-  end
-
-  return vel_out_roe, c_f_roe
+@inline function calc_fast_wavespeed_roe(u_ll, u_rr, orientation::Integer,
+                                         equations::IdealGlmMhdEquations3D)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    # Calculate primitive variables
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll)
+    mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll
+    p_ll = (equations.gamma - 1) *
+           (rho_e_ll - kin_en_ll - 0.5 * mag_norm_ll - 0.5 * psi_ll^2)
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+    kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr)
+    mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr
+    p_rr = (equations.gamma - 1) *
+           (rho_e_rr - kin_en_rr - 0.5 * mag_norm_rr - 0.5 * psi_rr^2)
+
+    # compute total pressure which is thermal + magnetic pressures
+    p_total_ll = p_ll + 0.5 * mag_norm_ll
+    p_total_rr = p_rr + 0.5 * mag_norm_rr
+
+    # compute the Roe density averages
+    sqrt_rho_ll = sqrt(rho_ll)
+    sqrt_rho_rr = sqrt(rho_rr)
+    inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr)
+    inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr)
+    rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add
+    rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add
+    # Roe averages
+    # velocities and magnetic fields
+    v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe
+    v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe
+    v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe
+    B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe
+    B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe
+    B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe
+    # enthalpy
+    H_ll = (rho_e_ll + p_total_ll) / rho_ll
+    H_rr = (rho_e_rr + p_total_rr) / rho_rr
+    H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe
+    # temporary variable see equation (4.12) in Cargo and Gallice
+    X = 0.5 * ((B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2) *
+        inv_sqrt_rho_add^2
+    # averaged components needed to compute c_f, the fast magnetoacoustic wave speed
+    b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum
+    a_square_roe = ((2.0 - equations.gamma) * X +
+                    (equations.gamma - 1.0) *
+                    (H_roe - 0.5 * (v1_roe^2 + v2_roe^2 + v3_roe^2) -
+                     b_square_roe)) # acoustic speed
+    # finally compute the average wave speed and set the output velocity (depends on orientation)
+    if orientation == 1 # x-direction
+        c_a_roe = B1_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed
+        a_star_roe = sqrt((a_square_roe + b_square_roe)^2 -
+                          4.0 * a_square_roe * c_a_roe)
+        c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe))
+        vel_out_roe = v1_roe
+    elseif orientation == 2 # y-direction
+        c_a_roe = B2_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed
+        a_star_roe = sqrt((a_square_roe + b_square_roe)^2 -
+                          4.0 * a_square_roe * c_a_roe)
+        c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe))
+        vel_out_roe = v2_roe
+    else # z-direction
+        c_a_roe = B3_roe^2 * inv_sqrt_rho_prod # (squared) Alfvén wave speed
+        a_star_roe = sqrt((a_square_roe + b_square_roe)^2 -
+                          4.0 * a_square_roe * c_a_roe)
+        c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe))
+        vel_out_roe = v3_roe
+    end
+
+    return vel_out_roe, c_f_roe
 end
 
-@inline function calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction::AbstractVector, equations::IdealGlmMhdEquations3D)
-  rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
-  rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
-
-  # Calculate primitive variables
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-  kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll)
-  mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll
-  p_ll = (equations.gamma - 1)*(rho_e_ll - kin_en_ll - 0.5*mag_norm_ll - 0.5*psi_ll^2)
-
-  v1_rr = rho_v1_rr / rho_rr
-  v2_rr = rho_v2_rr / rho_rr
-  v3_rr = rho_v3_rr / rho_rr
-  kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr)
-  mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr
-  p_rr = (equations.gamma - 1)*(rho_e_rr - kin_en_rr - 0.5*mag_norm_rr - 0.5*psi_rr^2)
-
-  # compute total pressure which is thermal + magnetic pressures
-  p_total_ll = p_ll + 0.5 * mag_norm_ll
-  p_total_rr = p_rr + 0.5 * mag_norm_rr
-
-  # compute the Roe density averages
-  sqrt_rho_ll = sqrt(rho_ll)
-  sqrt_rho_rr = sqrt(rho_rr)
-  inv_sqrt_rho_add  = 1.0 / (sqrt_rho_ll + sqrt_rho_rr)
-  inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr)
-  rho_ll_roe =  sqrt_rho_ll * inv_sqrt_rho_add
-  rho_rr_roe =  sqrt_rho_rr * inv_sqrt_rho_add
-  # Roe averages
-  # velocities and magnetic fields
-  v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe
-  v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe
-  v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe
-  B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe
-  B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe
-  B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe
-  # enthalpy
-  H_ll  = (rho_e_ll + p_total_ll) / rho_ll
-  H_rr  = (rho_e_rr + p_total_rr) / rho_rr
-  H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe
-  # temporary variable see equation (4.12) in Cargo and Gallice
-  X = 0.5 * ( (B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2 ) * inv_sqrt_rho_add^2
-  # averaged components needed to compute c_f, the fast magnetoacoustic wave speed
-  b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum
-  a_square_roe = ((2.0 - equations.gamma) * X +
-                 (equations.gamma -1.0) * (H_roe - 0.5*(v1_roe^2 + v2_roe^2 + v3_roe^2) -
-                                          b_square_roe)) # acoustic speed
-
-  # finally compute the average wave speed and set the output velocity (depends on orientation)
-  norm_squared = (normal_direction[1] * normal_direction[1] +
-                  normal_direction[2] * normal_direction[2] +
-                  normal_direction[3] * normal_direction[3])
-  B_roe_dot_n_squared = (B1_roe * normal_direction[1] +
-                         B2_roe * normal_direction[2] +
-                         B3_roe * normal_direction[3])^2 / norm_squared
-
-  c_a_roe = B_roe_dot_n_squared * inv_sqrt_rho_prod # (squared) Alfvén wave speed
-  a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - 4 * a_square_roe * c_a_roe)
-  c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe) * norm_squared)
-  vel_out_roe = (v1_roe * normal_direction[1] +
-                 v2_roe * normal_direction[2] +
-                 v3_roe * normal_direction[3])
-
-  return vel_out_roe, c_f_roe
+@inline function calc_fast_wavespeed_roe(u_ll, u_rr, normal_direction::AbstractVector,
+                                         equations::IdealGlmMhdEquations3D)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    # Calculate primitive variables
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    kin_en_ll = 0.5 * (rho_v1_ll * v1_ll + rho_v2_ll * v2_ll + rho_v3_ll * v3_ll)
+    mag_norm_ll = B1_ll * B1_ll + B2_ll * B2_ll + B3_ll * B3_ll
+    p_ll = (equations.gamma - 1) *
+           (rho_e_ll - kin_en_ll - 0.5 * mag_norm_ll - 0.5 * psi_ll^2)
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+    kin_en_rr = 0.5 * (rho_v1_rr * v1_rr + rho_v2_rr * v2_rr + rho_v3_rr * v3_rr)
+    mag_norm_rr = B1_rr * B1_rr + B2_rr * B2_rr + B3_rr * B3_rr
+    p_rr = (equations.gamma - 1) *
+           (rho_e_rr - kin_en_rr - 0.5 * mag_norm_rr - 0.5 * psi_rr^2)
+
+    # compute total pressure which is thermal + magnetic pressures
+    p_total_ll = p_ll + 0.5 * mag_norm_ll
+    p_total_rr = p_rr + 0.5 * mag_norm_rr
+
+    # compute the Roe density averages
+    sqrt_rho_ll = sqrt(rho_ll)
+    sqrt_rho_rr = sqrt(rho_rr)
+    inv_sqrt_rho_add = 1.0 / (sqrt_rho_ll + sqrt_rho_rr)
+    inv_sqrt_rho_prod = 1.0 / (sqrt_rho_ll * sqrt_rho_rr)
+    rho_ll_roe = sqrt_rho_ll * inv_sqrt_rho_add
+    rho_rr_roe = sqrt_rho_rr * inv_sqrt_rho_add
+    # Roe averages
+    # velocities and magnetic fields
+    v1_roe = v1_ll * rho_ll_roe + v1_rr * rho_rr_roe
+    v2_roe = v2_ll * rho_ll_roe + v2_rr * rho_rr_roe
+    v3_roe = v3_ll * rho_ll_roe + v3_rr * rho_rr_roe
+    B1_roe = B1_ll * rho_ll_roe + B1_rr * rho_rr_roe
+    B2_roe = B2_ll * rho_ll_roe + B2_rr * rho_rr_roe
+    B3_roe = B3_ll * rho_ll_roe + B3_rr * rho_rr_roe
+    # enthalpy
+    H_ll = (rho_e_ll + p_total_ll) / rho_ll
+    H_rr = (rho_e_rr + p_total_rr) / rho_rr
+    H_roe = H_ll * rho_ll_roe + H_rr * rho_rr_roe
+    # temporary variable see equation (4.12) in Cargo and Gallice
+    X = 0.5 * ((B1_ll - B1_rr)^2 + (B2_ll - B2_rr)^2 + (B3_ll - B3_rr)^2) *
+        inv_sqrt_rho_add^2
+    # averaged components needed to compute c_f, the fast magnetoacoustic wave speed
+    b_square_roe = (B1_roe^2 + B2_roe^2 + B3_roe^2) * inv_sqrt_rho_prod # scaled magnectic sum
+    a_square_roe = ((2.0 - equations.gamma) * X +
+                    (equations.gamma - 1.0) *
+                    (H_roe - 0.5 * (v1_roe^2 + v2_roe^2 + v3_roe^2) -
+                     b_square_roe)) # acoustic speed
+
+    # finally compute the average wave speed and set the output velocity (depends on orientation)
+    norm_squared = (normal_direction[1] * normal_direction[1] +
+                    normal_direction[2] * normal_direction[2] +
+                    normal_direction[3] * normal_direction[3])
+    B_roe_dot_n_squared = (B1_roe * normal_direction[1] +
+                           B2_roe * normal_direction[2] +
+                           B3_roe * normal_direction[3])^2 / norm_squared
+
+    c_a_roe = B_roe_dot_n_squared * inv_sqrt_rho_prod # (squared) Alfvén wave speed
+    a_star_roe = sqrt((a_square_roe + b_square_roe)^2 - 4 * a_square_roe * c_a_roe)
+    c_f_roe = sqrt(0.5 * (a_square_roe + b_square_roe + a_star_roe) * norm_squared)
+    vel_out_roe = (v1_roe * normal_direction[1] +
+                   v2_roe * normal_direction[2] +
+                   v3_roe * normal_direction[3])
+
+    return vel_out_roe, c_f_roe
 end
 
-
 # Calculate thermodynamic entropy for a conservative state `cons`
 @inline function entropy_thermodynamic(cons, equations::IdealGlmMhdEquations3D)
-  # Pressure
-  p = (equations.gamma - 1) * (cons[5] - 1/2 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1]
-                                       - 1/2 * (cons[6]^2 + cons[7]^2 + cons[8]^2)
-                                       - 1/2 * cons[9]^2)
-
-  # Thermodynamic entropy
-  s = log(p) - equations.gamma*log(cons[1])
-
-  return s
+    # Pressure
+    p = (equations.gamma - 1) *
+        (cons[5] - 1 / 2 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1]
+         -
+         1 / 2 * (cons[6]^2 + cons[7]^2 + cons[8]^2)
+         -
+         1 / 2 * cons[9]^2)
+
+    # Thermodynamic entropy
+    s = log(p) - equations.gamma * log(cons[1])
+
+    return s
 end
 
-
 # Calculate mathematical entropy for a conservative state `cons`
 @inline function entropy_math(cons, equations::IdealGlmMhdEquations3D)
-  S = -entropy_thermodynamic(cons, equations) * cons[1] * equations.inv_gamma_minus_one
+    S = -entropy_thermodynamic(cons, equations) * cons[1] *
+        equations.inv_gamma_minus_one
 
-  return S
+    return S
 end
 
-
 # Default entropy is the mathematical entropy
 @inline entropy(cons, equations::IdealGlmMhdEquations3D) = entropy_math(cons, equations)
 
-
 # Calculate total energy for a conservative state `cons`
 @inline energy_total(cons, ::IdealGlmMhdEquations3D) = cons[5]
 
-
 # Calculate kinetic energy for a conservative state `cons`
 @inline function energy_kinetic(cons, equations::IdealGlmMhdEquations3D)
-  return 0.5 * (cons[2]^2 + cons[3]^2 + cons[4]^2)/cons[1]
+    return 0.5 * (cons[2]^2 + cons[3]^2 + cons[4]^2) / cons[1]
 end
 
-
 # Calculate the magnetic energy for a conservative state `cons'.
 #  OBS! For non-dinmensional form of the ideal MHD magnetic pressure ≡ magnetic energy
 @inline function energy_magnetic(cons, ::IdealGlmMhdEquations3D)
-  return 0.5 * (cons[6]^2 + cons[7]^2 + cons[8]^2)
+    return 0.5 * (cons[6]^2 + cons[7]^2 + cons[8]^2)
 end
 
-
 # Calculate internal energy for a conservative state `cons`
 @inline function energy_internal(cons, equations::IdealGlmMhdEquations3D)
-  return (energy_total(cons, equations)
-          - energy_kinetic(cons, equations)
-          - energy_magnetic(cons, equations)
-          - cons[9]^2 / 2)
+    return (energy_total(cons, equations)
+            -
+            energy_kinetic(cons, equations)
+            -
+            energy_magnetic(cons, equations)
+            -
+            cons[9]^2 / 2)
 end
 
-
 # Calculate the cross helicity (\vec{v}⋅\vec{B}) for a conservative state `cons'
 @inline function cross_helicity(cons, ::IdealGlmMhdEquations3D)
-  return (cons[2]*cons[6] + cons[3]*cons[7] + cons[4]*cons[8]) / cons[1]
+    return (cons[2] * cons[6] + cons[3] * cons[7] + cons[4] * cons[8]) / cons[1]
 end
-
-
 end # @muladd
diff --git a/src/equations/ideal_glm_mhd_multicomponent_1d.jl b/src/equations/ideal_glm_mhd_multicomponent_1d.jl
index 59cb9bdfad8..0efa6426448 100644
--- a/src/equations/ideal_glm_mhd_multicomponent_1d.jl
+++ b/src/equations/ideal_glm_mhd_multicomponent_1d.jl
@@ -3,88 +3,107 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     IdealGlmMhdMulticomponentEquations1D
 
 The ideal compressible multicomponent GLM-MHD equations in one space dimension.
 """
-mutable struct IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT<:Real} <: AbstractIdealGlmMhdMulticomponentEquations{1, NVARS, NCOMP}
-  gammas            ::SVector{NCOMP, RealT}
-  gas_constants     ::SVector{NCOMP, RealT}
-  cv                ::SVector{NCOMP, RealT}
-  cp                ::SVector{NCOMP, RealT}
-
-  function IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT}(gammas       ::SVector{NCOMP, RealT},
-                                                                     gas_constants::SVector{NCOMP, RealT}) where {NVARS, NCOMP, RealT<:Real}
-
-    NCOMP >= 1 || throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value"))
-
-    cv = gas_constants ./ (gammas .- 1)
-    cp = gas_constants + gas_constants ./ (gammas .- 1)
-
-    new(gammas, gas_constants, cv, cp)
-  end
+mutable struct IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT <: Real} <:
+               AbstractIdealGlmMhdMulticomponentEquations{1, NVARS, NCOMP}
+    gammas::SVector{NCOMP, RealT}
+    gas_constants::SVector{NCOMP, RealT}
+    cv::SVector{NCOMP, RealT}
+    cp::SVector{NCOMP, RealT}
+
+    function IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT}(gammas::SVector{
+                                                                                       NCOMP,
+                                                                                       RealT
+                                                                                       },
+                                                                       gas_constants::SVector{
+                                                                                              NCOMP,
+                                                                                              RealT
+                                                                                              }) where {
+                                                                                                        NVARS,
+                                                                                                        NCOMP,
+                                                                                                        RealT <:
+                                                                                                        Real
+                                                                                                        }
+        NCOMP >= 1 ||
+            throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value"))
+
+        cv = gas_constants ./ (gammas .- 1)
+        cp = gas_constants + gas_constants ./ (gammas .- 1)
+
+        new(gammas, gas_constants, cv, cp)
+    end
 end
 
 function IdealGlmMhdMulticomponentEquations1D(; gammas, gas_constants)
+    _gammas = promote(gammas...)
+    _gas_constants = promote(gas_constants...)
+    RealT = promote_type(eltype(_gammas), eltype(_gas_constants))
 
-  _gammas        = promote(gammas...)
-  _gas_constants = promote(gas_constants...)
-  RealT          = promote_type(eltype(_gammas), eltype(_gas_constants))
-
-  NVARS = length(_gammas) + 7
-  NCOMP = length(_gammas)
+    NVARS = length(_gammas) + 7
+    NCOMP = length(_gammas)
 
-  __gammas        = SVector(map(RealT, _gammas))
-  __gas_constants = SVector(map(RealT, _gas_constants))
+    __gammas = SVector(map(RealT, _gammas))
+    __gas_constants = SVector(map(RealT, _gas_constants))
 
-  return IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT}(__gammas, __gas_constants)
+    return IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT}(__gammas,
+                                                                     __gas_constants)
 end
 
-@inline Base.real(::IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT}) where {NVARS, NCOMP, RealT} = RealT
+@inline function Base.real(::IdealGlmMhdMulticomponentEquations1D{NVARS, NCOMP, RealT}) where {
+                                                                                               NVARS,
+                                                                                               NCOMP,
+                                                                                               RealT
+                                                                                               }
+    RealT
+end
 
 have_nonconservative_terms(::IdealGlmMhdMulticomponentEquations1D) = False()
 
 function varnames(::typeof(cons2cons), equations::IdealGlmMhdMulticomponentEquations1D)
-
-  cons  = ("rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3")
-  rhos  = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
-  return (cons..., rhos...)
+    cons = ("rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3")
+    rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
+    return (cons..., rhos...)
 end
 
 function varnames(::typeof(cons2prim), equations::IdealGlmMhdMulticomponentEquations1D)
-
-  prim  = ("v1", "v2", "v3", "p", "B1", "B2", "B3")
-  rhos  = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
-  return (prim..., rhos...)
+    prim = ("v1", "v2", "v3", "p", "B1", "B2", "B3")
+    rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
+    return (prim..., rhos...)
 end
 
-
 """
     initial_condition_convergence_test(x, t, equations::IdealGlmMhdMulticomponentEquations1D)
 
 An Alfvén wave as smooth initial condition used for convergence tests.
 """
-function initial_condition_convergence_test(x, t, equations::IdealGlmMhdMulticomponentEquations1D)
-  # smooth Alfvén wave test from Derigs et al. FLASH (2016)
-  # domain must be set to [0, 1], γ = 5/3
-
-  rho = 1.0
-  prim_rho  = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * rho for i in eachcomponent(equations))
-  v1 = 0.0
-  si, co = sincos(2 * pi * x[1])
-  v2 = 0.1 * si
-  v3 = 0.1 * co
-  p = 0.1
-  B1 = 1.0
-  B2 = v2
-  B3 = v3
-  prim_other = SVector{7, real(equations)}(v1, v2, v3, p, B1, B2, B3)
-  return prim2cons(vcat(prim_other, prim_rho), equations)
-end
+function initial_condition_convergence_test(x, t,
+                                            equations::IdealGlmMhdMulticomponentEquations1D)
+    # smooth Alfvén wave test from Derigs et al. FLASH (2016)
+    # domain must be set to [0, 1], γ = 5/3
 
+    rho = 1.0
+    prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) * (1 - 2) /
+                                                                (1 -
+                                                                 2^ncomponents(equations)) *
+                                                                rho
+                                                                for i in eachcomponent(equations))
+    v1 = 0.0
+    si, co = sincos(2 * pi * x[1])
+    v2 = 0.1 * si
+    v3 = 0.1 * co
+    p = 0.1
+    B1 = 1.0
+    B2 = v2
+    B3 = v3
+    prim_other = SVector{7, real(equations)}(v1, v2, v3, p, B1, B2, B3)
+    return prim2cons(vcat(prim_other, prim_rho), equations)
+end
 
 """
     initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdMulticomponentEquations1D)
@@ -94,63 +113,70 @@ A weak blast wave adapted from
   A provably entropy stable subcell shock capturing approach for high order split form DG
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
-function initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdMulticomponentEquations1D)
-  # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
-  # Same discontinuity in the velocities but with magnetic fields
-  # Set up polar coordinates
-  inicenter = (0)
-  x_norm = x[1] - inicenter[1]
-  r = sqrt(x_norm^2)
-  phi = atan(x_norm)
-
-  # Calculate primitive variables
-  if r > 0.5
-    rho = 1.0
-    prim_rho  = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * rho for i in eachcomponent(equations))
-  else
-    rho = 1.1691
-    prim_rho  = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * rho for i in eachcomponent(equations))
-  end
-  v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi)
-  p = r > 0.5 ? 1.0 : 1.245
-
-  prim_other = SVector{7, real(equations)}(v1, 0.0, 0.0, p, 1.0, 1.0, 1.0)
-
-  return prim2cons(vcat(prim_other, prim_rho), equations)
+function initial_condition_weak_blast_wave(x, t,
+                                           equations::IdealGlmMhdMulticomponentEquations1D)
+    # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
+    # Same discontinuity in the velocities but with magnetic fields
+    # Set up polar coordinates
+    inicenter = (0)
+    x_norm = x[1] - inicenter[1]
+    r = sqrt(x_norm^2)
+    phi = atan(x_norm)
+
+    # Calculate primitive variables
+    if r > 0.5
+        rho = 1.0
+        prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) *
+                                                                    (1 - 2) / (1 -
+                                                                     2^ncomponents(equations)) *
+                                                                    rho
+                                                                    for i in eachcomponent(equations))
+    else
+        rho = 1.1691
+        prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) *
+                                                                    (1 - 2) / (1 -
+                                                                     2^ncomponents(equations)) *
+                                                                    rho
+                                                                    for i in eachcomponent(equations))
+    end
+    v1 = r > 0.5 ? 0.0 : 0.1882 * cos(phi)
+    p = r > 0.5 ? 1.0 : 1.245
+
+    prim_other = SVector{7, real(equations)}(v1, 0.0, 0.0, p, 1.0, 1.0, 1.0)
+
+    return prim2cons(vcat(prim_other, prim_rho), equations)
 end
 
-
 # Calculate 1D flux in for a single point
-@inline function flux(u, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations1D)
-  rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
-
-  rho = density(u, equations)
-
-  v1 = rho_v1/rho
-  v2 = rho_v2/rho
-  v3 = rho_v3/rho
-  kin_en = 0.5 * rho * (v1^2 + v2^2 + v3^2)
-  mag_en = 0.5*(B1^2 + B2^2 + B3^2)
-  gamma = totalgamma(u, equations)
-  p = (gamma - 1) * (rho_e - kin_en - mag_en)
-
-
-  f_rho = densities(u, v1, equations)
-  f1 = rho_v1*v1 + p + mag_en - B1^2
-  f2 = rho_v1*v2 - B1*B2
-  f3 = rho_v1*v3 - B1*B3
-  f4 = (kin_en + gamma*p/(gamma - 1) + 2*mag_en)*v1 - B1*(v1*B1 + v2*B2 + v3*B3)
-  f5 = 0.0
-  f6 = v1*B2 - v2*B1
-  f7 = v1*B3 - v3*B1
-
-
-  f_other  = SVector{7, real(equations)}(f1, f2, f3, f4, f5, f6, f7)
-
-  return vcat(f_other, f_rho)
+@inline function flux(u, orientation::Integer,
+                      equations::IdealGlmMhdMulticomponentEquations1D)
+    rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
+
+    rho = density(u, equations)
+
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    kin_en = 0.5 * rho * (v1^2 + v2^2 + v3^2)
+    mag_en = 0.5 * (B1^2 + B2^2 + B3^2)
+    gamma = totalgamma(u, equations)
+    p = (gamma - 1) * (rho_e - kin_en - mag_en)
+
+    f_rho = densities(u, v1, equations)
+    f1 = rho_v1 * v1 + p + mag_en - B1^2
+    f2 = rho_v1 * v2 - B1 * B2
+    f3 = rho_v1 * v3 - B1 * B3
+    f4 = (kin_en + gamma * p / (gamma - 1) + 2 * mag_en) * v1 -
+         B1 * (v1 * B1 + v2 * B2 + v3 * B3)
+    f5 = 0.0
+    f6 = v1 * B2 - v2 * B1
+    f7 = v1 * B3 - v3 * B1
+
+    f_other = SVector{7, real(equations)}(f1, f2, f3, f4, f5, f6, f7)
+
+    return vcat(f_other, f_rho)
 end
 
-
 """
     flux_derigs_etal(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations1D)
 
@@ -160,92 +186,97 @@ Entropy conserving two-point flux adapted by
   divergence diminishing ideal magnetohydrodynamics equations for multicomponent
   [DOI: 10.1016/j.jcp.2018.03.002](https://doi.org/10.1016/j.jcp.2018.03.002)
 """
-function flux_derigs_etal(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations1D)
-  # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta)
-  rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll = u_ll
-  rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr = u_rr
-  @unpack gammas, gas_constants, cv = equations
-
-  rho_ll = density(u_ll, equations)
-  rho_rr = density(u_rr, equations)
-
-  gamma_ll = totalgamma(u_ll, equations)
-  gamma_rr = totalgamma(u_rr, equations)
-
-  rhok_mean   = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+7], u_rr[i+7]) for i in eachcomponent(equations))
-  rhok_avg    = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+7] + u_rr[i+7]) for i in eachcomponent(equations))
-
-  v1_ll = rho_v1_ll/rho_ll
-  v2_ll = rho_v2_ll/rho_ll
-  v3_ll = rho_v3_ll/rho_ll
-  v1_rr = rho_v1_rr/rho_rr
-  v2_rr = rho_v2_rr/rho_rr
-  v3_rr = rho_v3_rr/rho_rr
-  vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2
-  vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2
-  mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
-  mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
-  # for convenience store v⋅B
-  vel_dot_mag_ll = v1_ll*B1_ll + v2_ll*B2_ll + v3_ll*B3_ll
-  vel_dot_mag_rr = v1_rr*B1_rr + v2_rr*B2_rr + v3_rr*B3_rr
-
-  # Compute the necessary mean values needed for either direction
-  v1_avg = 0.5*(v1_ll+v1_rr)
-  v2_avg = 0.5*(v2_ll+v2_rr)
-  v3_avg = 0.5*(v3_ll+v3_rr)
-  v_sum  = v1_avg + v2_avg + v3_avg
-  B1_avg = 0.5*(B1_ll+B1_rr)
-  B2_avg = 0.5*(B2_ll+B2_rr)
-  B3_avg = 0.5*(B3_ll+B3_rr)
-  vel_norm_avg = 0.5*(vel_norm_ll+vel_norm_rr)
-  mag_norm_avg = 0.5*(mag_norm_ll+mag_norm_rr)
-  vel_dot_mag_avg = 0.5*(vel_dot_mag_ll+vel_dot_mag_rr)
-
-  enth      = zero(v_sum)
-  help1_ll  = zero(v1_ll)
-  help1_rr  = zero(v1_rr)
-
-  for i in eachcomponent(equations)
-    enth      += rhok_avg[i] * gas_constants[i]
-    help1_ll  += u_ll[i+7] * cv[i]
-    help1_rr  += u_rr[i+7] * cv[i]
-  end
-
-  T_ll        = (rho_e_ll - 0.5*rho_ll * (vel_norm_ll) - 0.5*mag_norm_ll) / help1_ll
-  T_rr        = (rho_e_rr - 0.5*rho_rr * (vel_norm_rr) - 0.5*mag_norm_rr) / help1_rr
-  T           = 0.5 * (1.0/T_ll + 1.0/T_rr)
-  T_log       = ln_mean(1.0/T_ll, 1.0/T_rr)
-
-  # Calculate fluxes depending on orientation with specific direction averages
-  help1       = zero(T_ll)
-  help2       = zero(T_rr)
-
-  f_rho       = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations))
-  for i in eachcomponent(equations)
-    help1     += f_rho[i] * cv[i]
-    help2     += f_rho[i]
-  end
-  f1 = help2 * v1_avg + enth/T + 0.5 * mag_norm_avg - B1_avg*B1_avg
-  f2 = help2 * v2_avg - B1_avg*B2_avg
-  f3 = help2 * v3_avg - B1_avg*B3_avg
-  f5 = 0.0
-  f6 = v1_avg*B2_avg - v2_avg*B1_avg
-  f7 = v1_avg*B3_avg - v3_avg*B1_avg
-
-  # total energy flux is complicated and involves the previous eight components
-  v1_mag_avg = 0.5*(v1_ll*mag_norm_ll + v1_rr*mag_norm_rr)
-
-  f4 = (help1/T_log) - 0.5 * (vel_norm_avg) * (help2) + f1 * v1_avg + f2 * v2_avg + f3 * v3_avg +
-        f5 * B1_avg + f6 * B2_avg + f7 * B3_avg - 0.5*v1_mag_avg +
-        B1_avg * vel_dot_mag_avg
-
-
-  f_other  = SVector{7, real(equations)}(f1, f2, f3, f4, f5, f6, f7)
-
-  return vcat(f_other, f_rho)
+function flux_derigs_etal(u_ll, u_rr, orientation::Integer,
+                          equations::IdealGlmMhdMulticomponentEquations1D)
+    # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta)
+    rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll = u_ll
+    rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr = u_rr
+    @unpack gammas, gas_constants, cv = equations
+
+    rho_ll = density(u_ll, equations)
+    rho_rr = density(u_rr, equations)
+
+    gamma_ll = totalgamma(u_ll, equations)
+    gamma_rr = totalgamma(u_rr, equations)
+
+    rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 7],
+                                                                         u_rr[i + 7])
+                                                                 for i in eachcomponent(equations))
+    rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 7] +
+                                                                 u_rr[i + 7])
+                                                                for i in eachcomponent(equations))
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+    vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2
+    vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2
+    mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
+    mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
+    # for convenience store v⋅B
+    vel_dot_mag_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+    vel_dot_mag_rr = v1_rr * B1_rr + v2_rr * B2_rr + v3_rr * B3_rr
+
+    # Compute the necessary mean values needed for either direction
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    v_sum = v1_avg + v2_avg + v3_avg
+    B1_avg = 0.5 * (B1_ll + B1_rr)
+    B2_avg = 0.5 * (B2_ll + B2_rr)
+    B3_avg = 0.5 * (B3_ll + B3_rr)
+    vel_norm_avg = 0.5 * (vel_norm_ll + vel_norm_rr)
+    mag_norm_avg = 0.5 * (mag_norm_ll + mag_norm_rr)
+    vel_dot_mag_avg = 0.5 * (vel_dot_mag_ll + vel_dot_mag_rr)
+
+    enth = zero(v_sum)
+    help1_ll = zero(v1_ll)
+    help1_rr = zero(v1_rr)
+
+    for i in eachcomponent(equations)
+        enth += rhok_avg[i] * gas_constants[i]
+        help1_ll += u_ll[i + 7] * cv[i]
+        help1_rr += u_rr[i + 7] * cv[i]
+    end
+
+    T_ll = (rho_e_ll - 0.5 * rho_ll * (vel_norm_ll) - 0.5 * mag_norm_ll) / help1_ll
+    T_rr = (rho_e_rr - 0.5 * rho_rr * (vel_norm_rr) - 0.5 * mag_norm_rr) / help1_rr
+    T = 0.5 * (1.0 / T_ll + 1.0 / T_rr)
+    T_log = ln_mean(1.0 / T_ll, 1.0 / T_rr)
+
+    # Calculate fluxes depending on orientation with specific direction averages
+    help1 = zero(T_ll)
+    help2 = zero(T_rr)
+
+    f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg
+                                                             for i in eachcomponent(equations))
+    for i in eachcomponent(equations)
+        help1 += f_rho[i] * cv[i]
+        help2 += f_rho[i]
+    end
+    f1 = help2 * v1_avg + enth / T + 0.5 * mag_norm_avg - B1_avg * B1_avg
+    f2 = help2 * v2_avg - B1_avg * B2_avg
+    f3 = help2 * v3_avg - B1_avg * B3_avg
+    f5 = 0.0
+    f6 = v1_avg * B2_avg - v2_avg * B1_avg
+    f7 = v1_avg * B3_avg - v3_avg * B1_avg
+
+    # total energy flux is complicated and involves the previous eight components
+    v1_mag_avg = 0.5 * (v1_ll * mag_norm_ll + v1_rr * mag_norm_rr)
+
+    f4 = (help1 / T_log) - 0.5 * (vel_norm_avg) * (help2) + f1 * v1_avg + f2 * v2_avg +
+         f3 * v3_avg +
+         f5 * B1_avg + f6 * B2_avg + f7 * B3_avg - 0.5 * v1_mag_avg +
+         B1_avg * vel_dot_mag_avg
+
+    f_other = SVector{7, real(equations)}(f1, f2, f3, f4, f5, f6, f7)
+
+    return vcat(f_other, f_rho)
 end
 
-
 """
     flux_hindenlang_gassner(u_ll, u_rr, orientation_or_normal_direction,
                             equations::IdealGlmMhdMulticomponentEquations1D)
@@ -267,239 +298,250 @@ Hindenlang (2019), extending [`flux_ranocha`](@ref) to the MHD equations.
   the Euler Equations Using Summation-by-Parts Operators
   [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42)
 """
-@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations1D)
-  # Unpack left and right states
-  v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll = cons2prim(u_ll, equations)
-  v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr = cons2prim(u_rr, equations)
-
-  rho_ll = density(u_ll, equations)
-  rho_rr = density(u_rr, equations)
-
-  # Compute the necessary mean values needed for either direction
-  # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-  # in exact arithmetic since
-  #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-  #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-  inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-  v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-  v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-  v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-  p_avg   = 0.5 * (  p_ll +   p_rr)
-  velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-  magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
-
-  inv_gamma_minus_one = 1 / (totalgamma(0.5 * (u_ll + u_rr), equations) - 1)
-
-  rhok_mean   = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+7], u_rr[i+7]) for i in eachcomponent(equations))
-  rhok_avg    = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+7] + u_rr[i+7]) for i in eachcomponent(equations))
-
-  f1    = zero(rho_ll)
-  f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations))
-  for i in eachcomponent(equations)
-    f1 += f_rho[i]
-  end
-
-  # Calculate fluxes depending on orientation with specific direction averages
-  f2 = f1 * v1_avg + p_avg + magnetic_square_avg - 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll)
-  f3 = f1 * v2_avg                               - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll)
-  f4 = f1 * v3_avg                               - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll)
-  #f5 below
-  f6 = 0.0
-  f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr)
-  f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr)
-  # total energy flux is complicated and involves the previous components
-  f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one )
-            + 0.5 * (
-            +   p_ll * v1_rr +  p_rr * v1_ll
-            + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll)
-            + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll)
-            - (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll)
-            - (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll) ) )
-
-  f_other = SVector{7, real(equations)}(f2, f3, f4, f5, f6, f7, f8)
-
-  return vcat(f_other, f_rho)
+@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer,
+                                         equations::IdealGlmMhdMulticomponentEquations1D)
+    # Unpack left and right states
+    v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll = cons2prim(u_ll, equations)
+    v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr = cons2prim(u_rr, equations)
+
+    rho_ll = density(u_ll, equations)
+    rho_rr = density(u_rr, equations)
+
+    # Compute the necessary mean values needed for either direction
+    # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+    # in exact arithmetic since
+    #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+    #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+    magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
+
+    inv_gamma_minus_one = 1 / (totalgamma(0.5 * (u_ll + u_rr), equations) - 1)
+
+    rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 7],
+                                                                         u_rr[i + 7])
+                                                                 for i in eachcomponent(equations))
+    rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 7] +
+                                                                 u_rr[i + 7])
+                                                                for i in eachcomponent(equations))
+
+    f1 = zero(rho_ll)
+    f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg
+                                                             for i in eachcomponent(equations))
+    for i in eachcomponent(equations)
+        f1 += f_rho[i]
+    end
+
+    # Calculate fluxes depending on orientation with specific direction averages
+    f2 = f1 * v1_avg + p_avg + magnetic_square_avg -
+         0.5 * (B1_ll * B1_rr + B1_rr * B1_ll)
+    f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll)
+    f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll)
+    #f5 below
+    f6 = 0.0
+    f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr)
+    f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr)
+    # total energy flux is complicated and involves the previous components
+    f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one)
+          +
+          0.5 * (+p_ll * v1_rr + p_rr * v1_ll
+           + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll)
+           + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll)
+           -
+           (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll)
+           -
+           (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll)))
+
+    f_other = SVector{7, real(equations)}(f2, f3, f4, f5, f6, f7, f8)
+
+    return vcat(f_other, f_rho)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations1D)
-  rho_v1_ll, _ = u_ll
-  rho_v1_rr, _ = u_rr
-
-  rho_ll   = density(u_ll, equations)
-  rho_rr   = density(u_rr, equations)
-
-  # Calculate velocities (ignore orientation since it is always "1" in 1D)
-  # and fast magnetoacoustic wave speeds
-  # left
-  v_ll = rho_v1_ll / rho_ll
-  cf_ll = calc_fast_wavespeed(u_ll, orientation, equations)
-  # right
-  v_rr = rho_v1_rr / rho_rr
-  cf_rr = calc_fast_wavespeed(u_rr, orientation, equations)
-
-  λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::IdealGlmMhdMulticomponentEquations1D)
+    rho_v1_ll, _ = u_ll
+    rho_v1_rr, _ = u_rr
+
+    rho_ll = density(u_ll, equations)
+    rho_rr = density(u_rr, equations)
+
+    # Calculate velocities (ignore orientation since it is always "1" in 1D)
+    # and fast magnetoacoustic wave speeds
+    # left
+    v_ll = rho_v1_ll / rho_ll
+    cf_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+    # right
+    v_rr = rho_v1_rr / rho_rr
+    cf_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+
+    λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
 end
 
-
 @inline function max_abs_speeds(u, equations::IdealGlmMhdMulticomponentEquations1D)
-  rho_v1, _ = u
+    rho_v1, _ = u
 
-  rho = density(u, equations)
+    rho = density(u, equations)
 
-  v1 = rho_v1 / rho
+    v1 = rho_v1 / rho
 
-  cf_x_direction = calc_fast_wavespeed(u, 1, equations)
+    cf_x_direction = calc_fast_wavespeed(u, 1, equations)
 
-  return (abs(v1) + cf_x_direction, )
+    return (abs(v1) + cf_x_direction,)
 end
 
-
 # Convert conservative variables to primitive
 function cons2prim(u, equations::IdealGlmMhdMulticomponentEquations1D)
-  rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
+    rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
 
-  prim_rho = SVector{ncomponents(equations), real(equations)}(u[i+7] for i in eachcomponent(equations))
-  rho = density(u, equations)
+    prim_rho = SVector{ncomponents(equations), real(equations)}(u[i + 7]
+                                                                for i in eachcomponent(equations))
+    rho = density(u, equations)
 
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
 
-  gamma = totalgamma(u, equations)
+    gamma = totalgamma(u, equations)
 
-  p = (gamma - 1) * (rho_e - 0.5*rho*(v1^2 + v2^2 + v3^2) - 0.5*(B1^2 + B2^2 + B3^2))
-  prim_other =  SVector{7, real(equations)}(v1, v2, v3, p, B1, B2, B3)
-  return vcat(prim_other, prim_rho)
+    p = (gamma - 1) *
+        (rho_e - 0.5 * rho * (v1^2 + v2^2 + v3^2) - 0.5 * (B1^2 + B2^2 + B3^2))
+    prim_other = SVector{7, real(equations)}(v1, v2, v3, p, B1, B2, B3)
+    return vcat(prim_other, prim_rho)
 end
 
 # Convert conservative variables to entropy
 @inline function cons2entropy(u, equations::IdealGlmMhdMulticomponentEquations1D)
-  rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
-  @unpack cv, gammas, gas_constants = equations
-
-  rho = density(u, equations)
-
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  v_square = v1^2 + v2^2 + v3^2
-  gamma = totalgamma(u, equations)
-  p = (gamma - 1) * (rho_e - 0.5*rho*v_square - 0.5*(B1^2 + B2^2 + B3^2))
-  s = log(p) - gamma*log(rho)
-  rho_p = rho / p
-
-  # Multicomponent stuff
-  help1 = zero(v1)
-
-  for i in eachcomponent(equations)
-    help1 += u[i+7] * cv[i]
-  end
-
-  T         = (rho_e - 0.5 * rho * v_square - 0.5*(B1^2 + B2^2 + B3^2)) / (help1)
-
-  entrop_rho  = SVector{ncomponents(equations), real(equations)}( -1.0 * (cv[i] * log(T) - gas_constants[i] * log(u[i+7])) + gas_constants[i] + cv[i] - (v_square / (2*T)) for i in eachcomponent(equations))
-
-  w1 = v1 / T
-  w2 = v2 / T
-  w3 = v3 / T
-  w4 = -1.0 / T
-  w5 = B1 / T
-  w6 = B2 / T
-  w7 = B3 / T
-
-  entrop_other = SVector{7, real(equations)}(w1, w2, w3, w4, w5, w6, w7)
-
-  return vcat(entrop_other, entrop_rho)
+    rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
+    @unpack cv, gammas, gas_constants = equations
+
+    rho = density(u, equations)
+
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    v_square = v1^2 + v2^2 + v3^2
+    gamma = totalgamma(u, equations)
+    p = (gamma - 1) * (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2))
+    s = log(p) - gamma * log(rho)
+    rho_p = rho / p
+
+    # Multicomponent stuff
+    help1 = zero(v1)
+
+    for i in eachcomponent(equations)
+        help1 += u[i + 7] * cv[i]
+    end
+
+    T = (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2)) / (help1)
+
+    entrop_rho = SVector{ncomponents(equations), real(equations)}(-1.0 *
+                                                                  (cv[i] * log(T) -
+                                                                   gas_constants[i] *
+                                                                   log(u[i + 7])) +
+                                                                  gas_constants[i] +
+                                                                  cv[i] -
+                                                                  (v_square / (2 * T))
+                                                                  for i in eachcomponent(equations))
+
+    w1 = v1 / T
+    w2 = v2 / T
+    w3 = v3 / T
+    w4 = -1.0 / T
+    w5 = B1 / T
+    w6 = B2 / T
+    w7 = B3 / T
+
+    entrop_other = SVector{7, real(equations)}(w1, w2, w3, w4, w5, w6, w7)
+
+    return vcat(entrop_other, entrop_rho)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::IdealGlmMhdMulticomponentEquations1D)
-  v1, v2, v3, p, B1, B2, B3 = prim
+    v1, v2, v3, p, B1, B2, B3 = prim
 
-  cons_rho = SVector{ncomponents(equations), real(equations)}(prim[i+7] for i in eachcomponent(equations))
-  rho = density(prim, equations)
+    cons_rho = SVector{ncomponents(equations), real(equations)}(prim[i + 7]
+                                                                for i in eachcomponent(equations))
+    rho = density(prim, equations)
 
-  rho_v1 = rho * v1
-  rho_v2 = rho * v2
-  rho_v3 = rho * v3
+    rho_v1 = rho * v1
+    rho_v2 = rho * v2
+    rho_v3 = rho * v3
 
-  gamma = totalgamma(prim, equations)
-  rho_e = p/(gamma-1) + 0.5 * (rho_v1*v1 + rho_v2*v2 + rho_v3*v3) +
-                                 0.5 * (B1^2 + B2^2 + B3^2)
+    gamma = totalgamma(prim, equations)
+    rho_e = p / (gamma - 1) + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) +
+            0.5 * (B1^2 + B2^2 + B3^2)
 
-  cons_other = SVector{7, real(equations)}(rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3)
+    cons_other = SVector{7, real(equations)}(rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3)
 
-  return vcat(cons_other, cons_rho)
+    return vcat(cons_other, cons_rho)
 end
 
-
 @inline function density_pressure(u, equations::IdealGlmMhdMulticomponentEquations1D)
-  rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
-  rho = density(u, equations)
-  gamma = totalgamma(u, equations)
-  p = (gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
-                                   - 0.5 * (B1^2 + B2^2 + B3^2)
-                                   )
-  return rho * p
+    rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = u
+    rho = density(u, equations)
+    gamma = totalgamma(u, equations)
+    p = (gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
+         -
+         0.5 * (B1^2 + B2^2 + B3^2))
+    return rho * p
 end
 
-
 # Compute the fastest wave speed for ideal MHD equations: c_f, the fast magnetoacoustic eigenvalue
-@inline function calc_fast_wavespeed(cons, direction, equations::IdealGlmMhdMulticomponentEquations1D)
-  rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = cons
-  rho = density(cons, equations)
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  v_mag = sqrt(v1^2 + v2^2 + v3^2)
-  gamma = totalgamma(cons, equations)
-  p = (gamma - 1)*(rho_e - 0.5*rho*v_mag^2 - 0.5*(B1^2 + B2^2 + B3^2))
-  a_square = gamma * p / rho
-  sqrt_rho = sqrt(rho)
-  b1 = B1 / sqrt_rho
-  b2 = B2 / sqrt_rho
-  b3 = B3 / sqrt_rho
-  b_square = b1^2 + b2^2 + b3^2
-
-  c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b1^2))
-
-  return c_f
+@inline function calc_fast_wavespeed(cons, direction,
+                                     equations::IdealGlmMhdMulticomponentEquations1D)
+    rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3 = cons
+    rho = density(cons, equations)
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    v_mag = sqrt(v1^2 + v2^2 + v3^2)
+    gamma = totalgamma(cons, equations)
+    p = (gamma - 1) * (rho_e - 0.5 * rho * v_mag^2 - 0.5 * (B1^2 + B2^2 + B3^2))
+    a_square = gamma * p / rho
+    sqrt_rho = sqrt(rho)
+    b1 = B1 / sqrt_rho
+    b2 = B2 / sqrt_rho
+    b3 = B3 / sqrt_rho
+    b_square = b1^2 + b2^2 + b3^2
+
+    c_f = sqrt(0.5 * (a_square + b_square) +
+               0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b1^2))
+
+    return c_f
 end
 
-
 @inline function density(u, equations::IdealGlmMhdMulticomponentEquations1D)
-  rho = zero(u[1])
-
-  for i in eachcomponent(equations)
-    rho += u[i+7]
-  end
+    rho = zero(u[1])
 
-  return rho
- end
+    for i in eachcomponent(equations)
+        rho += u[i + 7]
+    end
 
+    return rho
+end
 
- @inline function totalgamma(u, equations::IdealGlmMhdMulticomponentEquations1D)
-  @unpack cv, gammas = equations
+@inline function totalgamma(u, equations::IdealGlmMhdMulticomponentEquations1D)
+    @unpack cv, gammas = equations
 
-  help1 = zero(u[1])
-  help2 = zero(u[1])
+    help1 = zero(u[1])
+    help2 = zero(u[1])
 
-  for i in eachcomponent(equations)
-    help1 += u[i+7] * cv[i] * gammas[i]
-    help2 += u[i+7] * cv[i]
-  end
+    for i in eachcomponent(equations)
+        help1 += u[i + 7] * cv[i] * gammas[i]
+        help2 += u[i + 7] * cv[i]
+    end
 
-  return help1/help2
+    return help1 / help2
 end
 
-
 @inline function densities(u, v, equations::IdealGlmMhdMulticomponentEquations1D)
-
-  return SVector{ncomponents(equations), real(equations)}(u[i+7]*v for i in eachcomponent(equations))
- end
-
-
+    return SVector{ncomponents(equations), real(equations)}(u[i + 7] * v
+                                                            for i in eachcomponent(equations))
+end
 end # @muladd
diff --git a/src/equations/ideal_glm_mhd_multicomponent_2d.jl b/src/equations/ideal_glm_mhd_multicomponent_2d.jl
index 3403341b47e..9b0eeb411e8 100644
--- a/src/equations/ideal_glm_mhd_multicomponent_2d.jl
+++ b/src/equations/ideal_glm_mhd_multicomponent_2d.jl
@@ -3,95 +3,116 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     IdealGlmMhdMulticomponentEquations2D
 
 The ideal compressible multicomponent GLM-MHD equations in two space dimensions.
 """
-mutable struct IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT<:Real} <: AbstractIdealGlmMhdMulticomponentEquations{2, NVARS, NCOMP}
-  gammas            ::SVector{NCOMP, RealT}
-  gas_constants     ::SVector{NCOMP, RealT}
-  cv                ::SVector{NCOMP, RealT}
-  cp                ::SVector{NCOMP, RealT}
-  c_h               ::RealT # GLM cleaning speed
-
-  function IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT}(gammas       ::SVector{NCOMP, RealT},
-                                                                     gas_constants::SVector{NCOMP, RealT}) where {NVARS, NCOMP, RealT<:Real}
-
-    NCOMP >= 1 || throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value"))
-
-    cv = gas_constants ./ (gammas .- 1)
-    cp = gas_constants + gas_constants ./ (gammas .- 1)
-    c_h = convert(eltype(gammas), NaN)
-
-    new(gammas, gas_constants, cv, cp, c_h)
-  end
+mutable struct IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT <: Real} <:
+               AbstractIdealGlmMhdMulticomponentEquations{2, NVARS, NCOMP}
+    gammas::SVector{NCOMP, RealT}
+    gas_constants::SVector{NCOMP, RealT}
+    cv::SVector{NCOMP, RealT}
+    cp::SVector{NCOMP, RealT}
+    c_h::RealT # GLM cleaning speed
+
+    function IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT}(gammas::SVector{
+                                                                                       NCOMP,
+                                                                                       RealT
+                                                                                       },
+                                                                       gas_constants::SVector{
+                                                                                              NCOMP,
+                                                                                              RealT
+                                                                                              }) where {
+                                                                                                        NVARS,
+                                                                                                        NCOMP,
+                                                                                                        RealT <:
+                                                                                                        Real
+                                                                                                        }
+        NCOMP >= 1 ||
+            throw(DimensionMismatch("`gammas` and `gas_constants` have to be filled with at least one value"))
+
+        cv = gas_constants ./ (gammas .- 1)
+        cp = gas_constants + gas_constants ./ (gammas .- 1)
+        c_h = convert(eltype(gammas), NaN)
+
+        new(gammas, gas_constants, cv, cp, c_h)
+    end
 end
 
 function IdealGlmMhdMulticomponentEquations2D(; gammas, gas_constants)
+    _gammas = promote(gammas...)
+    _gas_constants = promote(gas_constants...)
+    RealT = promote_type(eltype(_gammas), eltype(_gas_constants))
 
-  _gammas        = promote(gammas...)
-  _gas_constants = promote(gas_constants...)
-  RealT          = promote_type(eltype(_gammas), eltype(_gas_constants))
+    NVARS = length(_gammas) + 8
+    NCOMP = length(_gammas)
 
-  NVARS = length(_gammas) + 8
-  NCOMP = length(_gammas)
+    __gammas = SVector(map(RealT, _gammas))
+    __gas_constants = SVector(map(RealT, _gas_constants))
 
-  __gammas        = SVector(map(RealT, _gammas))
-  __gas_constants = SVector(map(RealT, _gas_constants))
-
-  return IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT}(__gammas, __gas_constants)
+    return IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT}(__gammas,
+                                                                     __gas_constants)
 end
 
-@inline Base.real(::IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT}) where {NVARS, NCOMP, RealT} = RealT
+@inline function Base.real(::IdealGlmMhdMulticomponentEquations2D{NVARS, NCOMP, RealT}) where {
+                                                                                               NVARS,
+                                                                                               NCOMP,
+                                                                                               RealT
+                                                                                               }
+    RealT
+end
 
 have_nonconservative_terms(::IdealGlmMhdMulticomponentEquations2D) = True()
 
 function varnames(::typeof(cons2cons), equations::IdealGlmMhdMulticomponentEquations2D)
-
-  cons  = ("rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3", "psi")
-  rhos  = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
-  return (cons..., rhos...)
+    cons = ("rho_v1", "rho_v2", "rho_v3", "rho_e", "B1", "B2", "B3", "psi")
+    rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
+    return (cons..., rhos...)
 end
 
 function varnames(::typeof(cons2prim), equations::IdealGlmMhdMulticomponentEquations2D)
-
-  prim  = ("v1", "v2", "v3", "p", "B1", "B2", "B3", "psi")
-  rhos  = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
-  return (prim..., rhos...)
+    prim = ("v1", "v2", "v3", "p", "B1", "B2", "B3", "psi")
+    rhos = ntuple(n -> "rho" * string(n), Val(ncomponents(equations)))
+    return (prim..., rhos...)
 end
 
-default_analysis_integrals(::IdealGlmMhdMulticomponentEquations2D)  = (entropy_timederivative, Val(:l2_divb), Val(:linf_divb))
-
+function default_analysis_integrals(::IdealGlmMhdMulticomponentEquations2D)
+    (entropy_timederivative, Val(:l2_divb), Val(:linf_divb))
+end
 
 """
     initial_condition_convergence_test(x, t, equations::IdealGlmMhdMulticomponentEquations2D)
 
 An Alfvén wave as smooth initial condition used for convergence tests.
 """
-function initial_condition_convergence_test(x, t, equations::IdealGlmMhdMulticomponentEquations2D)
-  # smooth Alfvén wave test from Derigs et al. FLASH (2016)
-  # domain must be set to [0, 1/cos(α)] x [0, 1/sin(α)], γ = 5/3
-  alpha = 0.25*pi
-  x_perp = x[1]*cos(alpha) + x[2]*sin(alpha)
-  B_perp = 0.1*sin(2.0*pi*x_perp)
-  rho = 1
-  prim_rho  = SVector{ncomponents(equations), real(equations)}(2^(i-1) * (1-2)/(1-2^ncomponents(equations)) * rho for i in eachcomponent(equations))
-  v1 = -B_perp*sin(alpha)
-  v2 =  B_perp*cos(alpha)
-  v3 = 0.1*cos(2.0*pi*x_perp)
-  p = 0.1
-  B1 = cos(alpha) + v1
-  B2 = sin(alpha) + v2
-  B3 = v3
-  psi = 0.0
-  prim_other         = SVector{8, real(equations)}(v1, v2, v3, p, B1, B2, B3, psi)
-  return prim2cons(vcat(prim_other, prim_rho), equations)
+function initial_condition_convergence_test(x, t,
+                                            equations::IdealGlmMhdMulticomponentEquations2D)
+    # smooth Alfvén wave test from Derigs et al. FLASH (2016)
+    # domain must be set to [0, 1/cos(α)] x [0, 1/sin(α)], γ = 5/3
+    alpha = 0.25 * pi
+    x_perp = x[1] * cos(alpha) + x[2] * sin(alpha)
+    B_perp = 0.1 * sin(2.0 * pi * x_perp)
+    rho = 1
+    prim_rho = SVector{ncomponents(equations), real(equations)}(2^(i - 1) * (1 - 2) /
+                                                                (1 -
+                                                                 2^ncomponents(equations)) *
+                                                                rho
+                                                                for i in eachcomponent(equations))
+    v1 = -B_perp * sin(alpha)
+    v2 = B_perp * cos(alpha)
+    v3 = 0.1 * cos(2.0 * pi * x_perp)
+    p = 0.1
+    B1 = cos(alpha) + v1
+    B2 = sin(alpha) + v2
+    B3 = v3
+    psi = 0.0
+    prim_other = SVector{8, real(equations)}(v1, v2, v3, p, B1, B2, B3, psi)
+    return prim2cons(vcat(prim_other, prim_rho), equations)
 end
 
-
 """
     initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdMulticomponentEquations2D)
 
@@ -100,72 +121,82 @@ A weak blast wave adapted from
   A provably entropy stable subcell shock capturing approach for high order split form DG
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
-function initial_condition_weak_blast_wave(x, t, equations::IdealGlmMhdMulticomponentEquations2D)
-  # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
-  # Same discontinuity in the velocities but with magnetic fields
-  # Set up polar coordinates
-  inicenter         = SVector(0.0, 0.0)
-  x_norm            = x[1] - inicenter[1]
-  y_norm            = x[2] - inicenter[2]
-  r                 = sqrt(x_norm^2 + y_norm^2)
-  phi               = atan(y_norm, x_norm)
-  sin_phi, cos_phi  = sincos(phi)
-
-  prim_rho          = SVector{ncomponents(equations), real(equations)}(r > 0.5 ? 2^(i-1) * (1-2)/(1-2^ncomponents(equations))*1.0 : 2^(i-1) * (1-2)/(1-2^ncomponents(equations))*1.1691 for i in eachcomponent(equations))
-
-  v1                = r > 0.5 ? 0.0 : 0.1882 * cos_phi
-  v2                = r > 0.5 ? 0.0 : 0.1882 * sin_phi
-  p                 = r > 0.5 ? 1.0 : 1.245
-
-  prim_other         = SVector{8, real(equations)}(v1, v2, 0.0, p, 1.0, 1.0, 1.0, 0.0)
-
-  return prim2cons(vcat(prim_other, prim_rho),equations)
+function initial_condition_weak_blast_wave(x, t,
+                                           equations::IdealGlmMhdMulticomponentEquations2D)
+    # Adapted MHD version of the weak blast wave from Hennemann & Gassner JCP paper 2020 (Sec. 6.3)
+    # Same discontinuity in the velocities but with magnetic fields
+    # Set up polar coordinates
+    inicenter = SVector(0.0, 0.0)
+    x_norm = x[1] - inicenter[1]
+    y_norm = x[2] - inicenter[2]
+    r = sqrt(x_norm^2 + y_norm^2)
+    phi = atan(y_norm, x_norm)
+    sin_phi, cos_phi = sincos(phi)
+
+    prim_rho = SVector{ncomponents(equations), real(equations)}(r > 0.5 ?
+                                                                2^(i - 1) * (1 - 2) /
+                                                                (1 -
+                                                                 2^ncomponents(equations)) *
+                                                                1.0 :
+                                                                2^(i - 1) * (1 - 2) /
+                                                                (1 -
+                                                                 2^ncomponents(equations)) *
+                                                                1.1691
+                                                                for i in eachcomponent(equations))
+
+    v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi
+    v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi
+    p = r > 0.5 ? 1.0 : 1.245
+
+    prim_other = SVector{8, real(equations)}(v1, v2, 0.0, p, 1.0, 1.0, 1.0, 0.0)
+
+    return prim2cons(vcat(prim_other, prim_rho), equations)
 end
 
-
 # Calculate 1D flux in for a single point
-@inline function flux(u, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations2D)
-  rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  @unpack c_h = equations
-
-  rho = density(u, equations)
-
-  v1 = rho_v1/rho
-  v2 = rho_v2/rho
-  v3 = rho_v3/rho
-  kin_en = 0.5 * rho * (v1^2 + v2^2 + v3^2)
-  mag_en = 0.5*(B1^2 + B2^2 + B3^2)
-  gamma = totalgamma(u, equations)
-  p = (gamma - 1) * (rho_e - kin_en - mag_en - 0.5*psi^2)
-
-  if orientation == 1
-    f_rho = densities(u, v1, equations)
-    f1 = rho_v1*v1 + p + mag_en - B1^2
-    f2 = rho_v1*v2 - B1*B2
-    f3 = rho_v1*v3 - B1*B3
-    f4 = (kin_en + gamma*p/(gamma - 1) + 2*mag_en)*v1 - B1*(v1*B1 + v2*B2 + v3*B3) + c_h*psi*B1
-    f5 = c_h*psi
-    f6 = v1*B2 - v2*B1
-    f7 = v1*B3 - v3*B1
-    f8 = c_h*B1
-  else # orientation == 2
-    f_rho = densities(u, v2, equations)
-    f1 = rho_v2*v1 - B1*B2
-    f2 = rho_v2*v2 + p + mag_en - B2^2
-    f3 = rho_v2*v3 - B2*B3
-    f4 = (kin_en + gamma*p/(gamma - 1) + 2*mag_en)*v2 - B2*(v1*B1 + v2*B2 + v3*B3) + c_h*psi*B2
-    f5 = v2*B1 - v1*B2
-    f6 = c_h*psi
-    f7 = v2*B3 - v3*B2
-    f8 = c_h*B2
-  end
-
-  f_other  = SVector{8, real(equations)}(f1, f2, f3, f4, f5, f6, f7, f8)
-
-  return vcat(f_other, f_rho)
-end
+@inline function flux(u, orientation::Integer,
+                      equations::IdealGlmMhdMulticomponentEquations2D)
+    rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    @unpack c_h = equations
+
+    rho = density(u, equations)
+
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    kin_en = 0.5 * rho * (v1^2 + v2^2 + v3^2)
+    mag_en = 0.5 * (B1^2 + B2^2 + B3^2)
+    gamma = totalgamma(u, equations)
+    p = (gamma - 1) * (rho_e - kin_en - mag_en - 0.5 * psi^2)
+
+    if orientation == 1
+        f_rho = densities(u, v1, equations)
+        f1 = rho_v1 * v1 + p + mag_en - B1^2
+        f2 = rho_v1 * v2 - B1 * B2
+        f3 = rho_v1 * v3 - B1 * B3
+        f4 = (kin_en + gamma * p / (gamma - 1) + 2 * mag_en) * v1 -
+             B1 * (v1 * B1 + v2 * B2 + v3 * B3) + c_h * psi * B1
+        f5 = c_h * psi
+        f6 = v1 * B2 - v2 * B1
+        f7 = v1 * B3 - v3 * B1
+        f8 = c_h * B1
+    else # orientation == 2
+        f_rho = densities(u, v2, equations)
+        f1 = rho_v2 * v1 - B1 * B2
+        f2 = rho_v2 * v2 + p + mag_en - B2^2
+        f3 = rho_v2 * v3 - B2 * B3
+        f4 = (kin_en + gamma * p / (gamma - 1) + 2 * mag_en) * v2 -
+             B2 * (v1 * B1 + v2 * B2 + v3 * B3) + c_h * psi * B2
+        f5 = v2 * B1 - v1 * B2
+        f6 = c_h * psi
+        f7 = v2 * B3 - v3 * B2
+        f8 = c_h * B2
+    end
 
+    f_other = SVector{8, real(equations)}(f1, f2, f3, f4, f5, f6, f7, f8)
 
+    return vcat(f_other, f_rho)
+end
 
 """
     flux_nonconservative_powell(u_ll, u_rr, orientation::Integer,
@@ -184,47 +215,46 @@ of the [`IdealGlmMhdMulticomponentEquations2D`](@ref).
 """
 @inline function flux_nonconservative_powell(u_ll, u_rr, orientation::Integer,
                                              equations::IdealGlmMhdMulticomponentEquations2D)
-  rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
-  rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
-
-  rho_ll = density(u_ll, equations)
-
-  v1_ll = rho_v1_ll / rho_ll
-  v2_ll = rho_v2_ll / rho_ll
-  v3_ll = rho_v3_ll / rho_ll
-  v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
-
-  # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
-  # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2})
-  # Note that the order of conserved variables is changed compared to the
-  # standard GLM MHD equations, i.e., the densities are moved to the end
-  # Here, we compute the non-density components at first and append zero density
-  # components afterwards
-  zero_densities = SVector{ncomponents(equations), real(equations)}(
-    ntuple(_ -> zero(real(equations)), Val(ncomponents(equations))))
-  if orientation == 1
-    f = SVector(B1_ll      * B1_rr,
-                B2_ll      * B1_rr,
-                B3_ll      * B1_rr,
-                v_dot_B_ll * B1_rr + v1_ll * psi_ll * psi_rr,
-                v1_ll      * B1_rr,
-                v2_ll      * B1_rr,
-                v3_ll      * B1_rr,
-                                     v1_ll * psi_rr)
-  else # orientation == 2
-    f = SVector(B1_ll      * B2_rr,
-                B2_ll      * B2_rr,
-                B3_ll      * B2_rr,
-                v_dot_B_ll * B2_rr + v2_ll * psi_ll * psi_rr,
-                v1_ll      * B2_rr,
-                v2_ll      * B2_rr,
-                v3_ll      * B2_rr,
-                                     v2_ll * psi_rr)
-  end
-
-  return vcat(f, zero_densities)
-end
+    rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+
+    rho_ll = density(u_ll, equations)
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v_dot_B_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+
+    # Powell nonconservative term:   (0, B_1, B_2, B_3, v⋅B, v_1, v_2, v_3, 0)
+    # Galilean nonconservative term: (0, 0, 0, 0, ψ v_{1,2}, 0, 0, 0, v_{1,2})
+    # Note that the order of conserved variables is changed compared to the
+    # standard GLM MHD equations, i.e., the densities are moved to the end
+    # Here, we compute the non-density components at first and append zero density
+    # components afterwards
+    zero_densities = SVector{ncomponents(equations), real(equations)}(ntuple(_ -> zero(real(equations)),
+                                                                             Val(ncomponents(equations))))
+    if orientation == 1
+        f = SVector(B1_ll * B1_rr,
+                    B2_ll * B1_rr,
+                    B3_ll * B1_rr,
+                    v_dot_B_ll * B1_rr + v1_ll * psi_ll * psi_rr,
+                    v1_ll * B1_rr,
+                    v2_ll * B1_rr,
+                    v3_ll * B1_rr,
+                    v1_ll * psi_rr)
+    else # orientation == 2
+        f = SVector(B1_ll * B2_rr,
+                    B2_ll * B2_rr,
+                    B3_ll * B2_rr,
+                    v_dot_B_ll * B2_rr + v2_ll * psi_ll * psi_rr,
+                    v1_ll * B2_rr,
+                    v2_ll * B2_rr,
+                    v3_ll * B2_rr,
+                    v2_ll * psi_rr)
+    end
 
+    return vcat(f, zero_densities)
+end
 
 """
     flux_derigs_etal(u_ll, u_rr, orientation, equations::IdealGlmMhdMulticomponentEquations2D)
@@ -235,125 +265,134 @@ Entropy conserving two-point flux adapted by
   divergence diminishing ideal magnetohydrodynamics equations for multicomponent
   [DOI: 10.1016/j.jcp.2018.03.002](https://doi.org/10.1016/j.jcp.2018.03.002)
 """
-function flux_derigs_etal(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations2D)
-  # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta)
-  rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
-  rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
-  @unpack gammas, gas_constants, cv, c_h = equations
-
-  rho_ll = density(u_ll, equations)
-  rho_rr = density(u_rr, equations)
-
-  gamma_ll = totalgamma(u_ll, equations)
-  gamma_rr = totalgamma(u_rr, equations)
-
-  rhok_mean   = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+8], u_rr[i+8]) for i in eachcomponent(equations))
-  rhok_avg    = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+8] + u_rr[i+8]) for i in eachcomponent(equations))
-
-  v1_ll = rho_v1_ll/rho_ll
-  v2_ll = rho_v2_ll/rho_ll
-  v3_ll = rho_v3_ll/rho_ll
-  v1_rr = rho_v1_rr/rho_rr
-  v2_rr = rho_v2_rr/rho_rr
-  v3_rr = rho_v3_rr/rho_rr
-  v1_sq = 0.5 * (v1_ll^2 + v1_rr^2)
-  v2_sq = 0.5 * (v2_ll^2 + v2_rr^2)
-  v3_sq = 0.5 * (v3_ll^2 + v3_rr^2)
-  v_sq = v1_sq + v2_sq + v3_sq
-  B1_sq = 0.5 * (B1_ll^2 + B1_rr^2)
-  B2_sq = 0.5 * (B2_ll^2 + B2_rr^2)
-  B3_sq = 0.5 * (B3_ll^2 + B3_rr^2)
-  B_sq = B1_sq + B2_sq + B3_sq
-  vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2
-  vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2
-  mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
-  mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
-  # for convenience store v⋅B
-  vel_dot_mag_ll = v1_ll*B1_ll + v2_ll*B2_ll + v3_ll*B3_ll
-  vel_dot_mag_rr = v1_rr*B1_rr + v2_rr*B2_rr + v3_rr*B3_rr
-
-  # Compute the necessary mean values needed for either direction
-  v1_avg = 0.5*(v1_ll+v1_rr)
-  v2_avg = 0.5*(v2_ll+v2_rr)
-  v3_avg = 0.5*(v3_ll+v3_rr)
-  v_sum  = v1_avg + v2_avg + v3_avg
-  B1_avg = 0.5*(B1_ll+B1_rr)
-  B2_avg = 0.5*(B2_ll+B2_rr)
-  B3_avg = 0.5*(B3_ll+B3_rr)
-  psi_avg = 0.5*(psi_ll+psi_rr)
-  vel_norm_avg = 0.5*(vel_norm_ll+vel_norm_rr)
-  mag_norm_avg = 0.5*(mag_norm_ll+mag_norm_rr)
-  vel_dot_mag_avg = 0.5*(vel_dot_mag_ll+vel_dot_mag_rr)
-
-  enth      = zero(v_sum)
-  help1_ll  = zero(v1_ll)
-  help1_rr  = zero(v1_rr)
-
-  for i in eachcomponent(equations)
-    enth      += rhok_avg[i] * gas_constants[i]
-    help1_ll  += u_ll[i+8] * cv[i]
-    help1_rr  += u_rr[i+8] * cv[i]
-  end
-
-  T_ll        = (rho_e_ll - 0.5*rho_ll * (vel_norm_ll) - 0.5*mag_norm_ll - 0.5*psi_ll^2) / help1_ll
-  T_rr        = (rho_e_rr - 0.5*rho_rr * (vel_norm_rr) - 0.5*mag_norm_rr - 0.5*psi_rr^2) / help1_rr
-  T           = 0.5 * (1.0/T_ll + 1.0/T_rr)
-  T_log       = ln_mean(1.0/T_ll, 1.0/T_rr)
-
-  # Calculate fluxes depending on orientation with specific direction averages
-  help1       = zero(T_ll)
-  help2       = zero(T_rr)
-  if orientation == 1
-    f_rho       = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations))
-    for i in eachcomponent(equations)
-      help1     += f_rho[i] * cv[i]
-      help2     += f_rho[i]
-    end
-    f1 = help2 * v1_avg + enth/T + 0.5 * mag_norm_avg - B1_avg*B1_avg
-    f2 = help2 * v2_avg - B1_avg*B2_avg
-    f3 = help2 * v3_avg - B1_avg*B3_avg
-    f5 = c_h*psi_avg
-    f6 = v1_avg*B2_avg - v2_avg*B1_avg
-    f7 = v1_avg*B3_avg - v3_avg*B1_avg
-    f8 = c_h*B1_avg
-    # total energy flux is complicated and involves the previous eight components
-    psi_B1_avg = 0.5*(B1_ll*psi_ll + B1_rr*psi_rr)
-    v1_mag_avg = 0.5*(v1_ll*mag_norm_ll + v1_rr*mag_norm_rr)
-
-    f4 = (help1/T_log) - 0.5 * (vel_norm_avg) * (help2) + f1 * v1_avg + f2 * v2_avg + f3 * v3_avg +
-          f5 * B1_avg + f6 * B2_avg + f7 * B3_avg + f8 * psi_avg - 0.5*v1_mag_avg +
-          B1_avg * vel_dot_mag_avg - c_h * psi_B1_avg
-
-  else
-    f_rho       = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v2_avg for i in eachcomponent(equations))
+function flux_derigs_etal(u_ll, u_rr, orientation::Integer,
+                          equations::IdealGlmMhdMulticomponentEquations2D)
+    # Unpack left and right states to get velocities, pressure, and inverse temperature (called beta)
+    rho_v1_ll, rho_v2_ll, rho_v3_ll, rho_e_ll, B1_ll, B2_ll, B3_ll, psi_ll = u_ll
+    rho_v1_rr, rho_v2_rr, rho_v3_rr, rho_e_rr, B1_rr, B2_rr, B3_rr, psi_rr = u_rr
+    @unpack gammas, gas_constants, cv, c_h = equations
+
+    rho_ll = density(u_ll, equations)
+    rho_rr = density(u_rr, equations)
+
+    gamma_ll = totalgamma(u_ll, equations)
+    gamma_rr = totalgamma(u_rr, equations)
+
+    rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 8],
+                                                                         u_rr[i + 8])
+                                                                 for i in eachcomponent(equations))
+    rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 8] +
+                                                                 u_rr[i + 8])
+                                                                for i in eachcomponent(equations))
+
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+    v1_sq = 0.5 * (v1_ll^2 + v1_rr^2)
+    v2_sq = 0.5 * (v2_ll^2 + v2_rr^2)
+    v3_sq = 0.5 * (v3_ll^2 + v3_rr^2)
+    v_sq = v1_sq + v2_sq + v3_sq
+    B1_sq = 0.5 * (B1_ll^2 + B1_rr^2)
+    B2_sq = 0.5 * (B2_ll^2 + B2_rr^2)
+    B3_sq = 0.5 * (B3_ll^2 + B3_rr^2)
+    B_sq = B1_sq + B2_sq + B3_sq
+    vel_norm_ll = v1_ll^2 + v2_ll^2 + v3_ll^2
+    vel_norm_rr = v1_rr^2 + v2_rr^2 + v3_rr^2
+    mag_norm_ll = B1_ll^2 + B2_ll^2 + B3_ll^2
+    mag_norm_rr = B1_rr^2 + B2_rr^2 + B3_rr^2
+    # for convenience store v⋅B
+    vel_dot_mag_ll = v1_ll * B1_ll + v2_ll * B2_ll + v3_ll * B3_ll
+    vel_dot_mag_rr = v1_rr * B1_rr + v2_rr * B2_rr + v3_rr * B3_rr
+
+    # Compute the necessary mean values needed for either direction
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    v_sum = v1_avg + v2_avg + v3_avg
+    B1_avg = 0.5 * (B1_ll + B1_rr)
+    B2_avg = 0.5 * (B2_ll + B2_rr)
+    B3_avg = 0.5 * (B3_ll + B3_rr)
+    psi_avg = 0.5 * (psi_ll + psi_rr)
+    vel_norm_avg = 0.5 * (vel_norm_ll + vel_norm_rr)
+    mag_norm_avg = 0.5 * (mag_norm_ll + mag_norm_rr)
+    vel_dot_mag_avg = 0.5 * (vel_dot_mag_ll + vel_dot_mag_rr)
+
+    enth = zero(v_sum)
+    help1_ll = zero(v1_ll)
+    help1_rr = zero(v1_rr)
+
     for i in eachcomponent(equations)
-      help1     += f_rho[i] * cv[i]
-      help2     += f_rho[i]
+        enth += rhok_avg[i] * gas_constants[i]
+        help1_ll += u_ll[i + 8] * cv[i]
+        help1_rr += u_rr[i + 8] * cv[i]
     end
-    f1 = help2 * v1_avg - B1_avg*B2_avg
-    f2 = help2 * v2_avg + enth/T + 0.5 * mag_norm_avg - B2_avg*B2_avg
-    f3 = help2 * v3_avg - B2_avg*B3_avg
-    f5 = v2_avg*B1_avg - v1_avg*B2_avg
-    f6 = c_h*psi_avg
-    f7 = v2_avg*B3_avg - v3_avg*B2_avg
-    f8 = c_h*B2_avg
 
-    # total energy flux is complicated and involves the previous eight components
-    psi_B2_avg = 0.5*(B2_ll*psi_ll + B2_rr*psi_rr)
-    v2_mag_avg = 0.5*(v2_ll*mag_norm_ll + v2_rr*mag_norm_rr)
+    T_ll = (rho_e_ll - 0.5 * rho_ll * (vel_norm_ll) - 0.5 * mag_norm_ll -
+            0.5 * psi_ll^2) / help1_ll
+    T_rr = (rho_e_rr - 0.5 * rho_rr * (vel_norm_rr) - 0.5 * mag_norm_rr -
+            0.5 * psi_rr^2) / help1_rr
+    T = 0.5 * (1.0 / T_ll + 1.0 / T_rr)
+    T_log = ln_mean(1.0 / T_ll, 1.0 / T_rr)
 
-    f4 = (help1/T_log) - 0.5 * (vel_norm_avg) * (help2) + f1 * v1_avg + f2 * v2_avg + f3 * v3_avg +
-          f5 * B1_avg + f6 * B2_avg + f7 * B3_avg + f8 * psi_avg - 0.5*v2_mag_avg +
-          B2_avg * vel_dot_mag_avg - c_h * psi_B2_avg
-
-  end
+    # Calculate fluxes depending on orientation with specific direction averages
+    help1 = zero(T_ll)
+    help2 = zero(T_rr)
+    if orientation == 1
+        f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg
+                                                                 for i in eachcomponent(equations))
+        for i in eachcomponent(equations)
+            help1 += f_rho[i] * cv[i]
+            help2 += f_rho[i]
+        end
+        f1 = help2 * v1_avg + enth / T + 0.5 * mag_norm_avg - B1_avg * B1_avg
+        f2 = help2 * v2_avg - B1_avg * B2_avg
+        f3 = help2 * v3_avg - B1_avg * B3_avg
+        f5 = c_h * psi_avg
+        f6 = v1_avg * B2_avg - v2_avg * B1_avg
+        f7 = v1_avg * B3_avg - v3_avg * B1_avg
+        f8 = c_h * B1_avg
+        # total energy flux is complicated and involves the previous eight components
+        psi_B1_avg = 0.5 * (B1_ll * psi_ll + B1_rr * psi_rr)
+        v1_mag_avg = 0.5 * (v1_ll * mag_norm_ll + v1_rr * mag_norm_rr)
+
+        f4 = (help1 / T_log) - 0.5 * (vel_norm_avg) * (help2) + f1 * v1_avg +
+             f2 * v2_avg + f3 * v3_avg +
+             f5 * B1_avg + f6 * B2_avg + f7 * B3_avg + f8 * psi_avg - 0.5 * v1_mag_avg +
+             B1_avg * vel_dot_mag_avg - c_h * psi_B1_avg
+
+    else
+        f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v2_avg
+                                                                 for i in eachcomponent(equations))
+        for i in eachcomponent(equations)
+            help1 += f_rho[i] * cv[i]
+            help2 += f_rho[i]
+        end
+        f1 = help2 * v1_avg - B1_avg * B2_avg
+        f2 = help2 * v2_avg + enth / T + 0.5 * mag_norm_avg - B2_avg * B2_avg
+        f3 = help2 * v3_avg - B2_avg * B3_avg
+        f5 = v2_avg * B1_avg - v1_avg * B2_avg
+        f6 = c_h * psi_avg
+        f7 = v2_avg * B3_avg - v3_avg * B2_avg
+        f8 = c_h * B2_avg
+
+        # total energy flux is complicated and involves the previous eight components
+        psi_B2_avg = 0.5 * (B2_ll * psi_ll + B2_rr * psi_rr)
+        v2_mag_avg = 0.5 * (v2_ll * mag_norm_ll + v2_rr * mag_norm_rr)
+
+        f4 = (help1 / T_log) - 0.5 * (vel_norm_avg) * (help2) + f1 * v1_avg +
+             f2 * v2_avg + f3 * v3_avg +
+             f5 * B1_avg + f6 * B2_avg + f7 * B3_avg + f8 * psi_avg - 0.5 * v2_mag_avg +
+             B2_avg * vel_dot_mag_avg - c_h * psi_B2_avg
+    end
 
-  f_other  = SVector{8, real(equations)}(f1, f2, f3, f4, f5, f6, f7, f8)
+    f_other = SVector{8, real(equations)}(f1, f2, f3, f4, f5, f6, f7, f8)
 
-  return vcat(f_other, f_rho)
+    return vcat(f_other, f_rho)
 end
 
-
 """
     flux_hindenlang_gassner(u_ll, u_rr, orientation_or_normal_direction,
                             equations::IdealGlmMhdMulticomponentEquations2D)
@@ -375,277 +414,301 @@ Hindenlang (2019), extending [`flux_ranocha`](@ref) to the MHD equations.
   the Euler Equations Using Summation-by-Parts Operators
   [Proceedings of ICOSAHOM 2018](https://doi.org/10.1007/978-3-030-39647-3_42)
 """
-@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations2D)
-  # Unpack left and right states
-  v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations)
-  v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations)
-
-  rho_ll = density(u_ll, equations)
-  rho_rr = density(u_rr, equations)
-
-  # Compute the necessary mean values needed for either direction
-  # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-  # in exact arithmetic since
-  #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-  #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-  inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-  v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-  v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-  v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-  p_avg   = 0.5 * (  p_ll +   p_rr)
-  psi_avg = 0.5 * (psi_ll + psi_rr)
-  velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-  magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
-
-  inv_gamma_minus_one = 1 / (totalgamma(0.5 * (u_ll + u_rr), equations) - 1)
-
-  rhok_mean   = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i+8], u_rr[i+8]) for i in eachcomponent(equations))
-  rhok_avg    = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i+8] + u_rr[i+8]) for i in eachcomponent(equations))
-
-
-  if orientation == 1
-    f1    = zero(rho_ll)
-    f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v1_avg for i in eachcomponent(equations))
-    for i in eachcomponent(equations)
-      f1 += f_rho[i]
+@inline function flux_hindenlang_gassner(u_ll, u_rr, orientation::Integer,
+                                         equations::IdealGlmMhdMulticomponentEquations2D)
+    # Unpack left and right states
+    v1_ll, v2_ll, v3_ll, p_ll, B1_ll, B2_ll, B3_ll, psi_ll = cons2prim(u_ll, equations)
+    v1_rr, v2_rr, v3_rr, p_rr, B1_rr, B2_rr, B3_rr, psi_rr = cons2prim(u_rr, equations)
+
+    rho_ll = density(u_ll, equations)
+    rho_rr = density(u_rr, equations)
+
+    # Compute the necessary mean values needed for either direction
+    # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+    # in exact arithmetic since
+    #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+    #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+    inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    v3_avg = 0.5 * (v3_ll + v3_rr)
+    p_avg = 0.5 * (p_ll + p_rr)
+    psi_avg = 0.5 * (psi_ll + psi_rr)
+    velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+    magnetic_square_avg = 0.5 * (B1_ll * B1_rr + B2_ll * B2_rr + B3_ll * B3_rr)
+
+    inv_gamma_minus_one = 1 / (totalgamma(0.5 * (u_ll + u_rr), equations) - 1)
+
+    rhok_mean = SVector{ncomponents(equations), real(equations)}(ln_mean(u_ll[i + 8],
+                                                                         u_rr[i + 8])
+                                                                 for i in eachcomponent(equations))
+    rhok_avg = SVector{ncomponents(equations), real(equations)}(0.5 * (u_ll[i + 8] +
+                                                                 u_rr[i + 8])
+                                                                for i in eachcomponent(equations))
+
+    if orientation == 1
+        f1 = zero(rho_ll)
+        f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v1_avg
+                                                                 for i in eachcomponent(equations))
+        for i in eachcomponent(equations)
+            f1 += f_rho[i]
+        end
+
+        # Calculate fluxes depending on orientation with specific direction averages
+        f2 = f1 * v1_avg + p_avg + magnetic_square_avg -
+             0.5 * (B1_ll * B1_rr + B1_rr * B1_ll)
+        f3 = f1 * v2_avg - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll)
+        f4 = f1 * v3_avg - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll)
+        #f5 below
+        f6 = f6 = equations.c_h * psi_avg
+        f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr)
+        f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr)
+        f9 = equations.c_h * 0.5 * (B1_ll + B1_rr)
+        # total energy flux is complicated and involves the previous components
+        f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one)
+              +
+              0.5 * (+p_ll * v1_rr + p_rr * v1_ll
+               + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll)
+               + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll)
+               -
+               (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll)
+               -
+               (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll)
+               +
+               equations.c_h * (B1_ll * psi_rr + B1_rr * psi_ll)))
+    else
+        f1 = zero(rho_ll)
+        f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i] * v2_avg
+                                                                 for i in eachcomponent(equations))
+        for i in eachcomponent(equations)
+            f1 += f_rho[i]
+        end
+
+        # Calculate fluxes depending on orientation with specific direction averages
+        f2 = f1 * v1_avg - 0.5 * (B2_ll * B1_rr + B2_rr * B1_ll)
+        f3 = f1 * v2_avg + p_avg + magnetic_square_avg -
+             0.5 * (B2_ll * B2_rr + B2_rr * B2_ll)
+        f4 = f1 * v3_avg - 0.5 * (B2_ll * B3_rr + B2_rr * B3_ll)
+        #f5 below
+        f6 = 0.5 * (v2_ll * B1_ll - v1_ll * B2_ll + v2_rr * B1_rr - v1_rr * B2_rr)
+        f7 = equations.c_h * psi_avg
+        f8 = 0.5 * (v2_ll * B3_ll - v3_ll * B2_ll + v2_rr * B3_rr - v3_rr * B2_rr)
+        f9 = equations.c_h * 0.5 * (B2_ll + B2_rr)
+        # total energy flux is complicated and involves the previous components
+        f5 = (f1 * (velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one)
+              +
+              0.5 * (+p_ll * v2_rr + p_rr * v2_ll
+               + (v2_ll * B1_ll * B1_rr + v2_rr * B1_rr * B1_ll)
+               + (v2_ll * B3_ll * B3_rr + v2_rr * B3_rr * B3_ll)
+               -
+               (v1_ll * B2_ll * B1_rr + v1_rr * B2_rr * B1_ll)
+               -
+               (v3_ll * B2_ll * B3_rr + v3_rr * B2_rr * B3_ll)
+               +
+               equations.c_h * (B2_ll * psi_rr + B2_rr * psi_ll)))
     end
 
-    # Calculate fluxes depending on orientation with specific direction averages
-    f2 = f1 * v1_avg + p_avg + magnetic_square_avg - 0.5 * (B1_ll * B1_rr + B1_rr * B1_ll)
-    f3 = f1 * v2_avg                               - 0.5 * (B1_ll * B2_rr + B1_rr * B2_ll)
-    f4 = f1 * v3_avg                               - 0.5 * (B1_ll * B3_rr + B1_rr * B3_ll)
-    #f5 below
-    f6 = f6 = equations.c_h * psi_avg
-    f7 = 0.5 * (v1_ll * B2_ll - v2_ll * B1_ll + v1_rr * B2_rr - v2_rr * B1_rr)
-    f8 = 0.5 * (v1_ll * B3_ll - v3_ll * B1_ll + v1_rr * B3_rr - v3_rr * B1_rr)
-    f9 = equations.c_h * 0.5 * (B1_ll + B1_rr)
-    # total energy flux is complicated and involves the previous components
-    f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one )
-              + 0.5 * (
-              +   p_ll * v1_rr +  p_rr * v1_ll
-              + (v1_ll * B2_ll * B2_rr + v1_rr * B2_rr * B2_ll)
-              + (v1_ll * B3_ll * B3_rr + v1_rr * B3_rr * B3_ll)
-              - (v2_ll * B1_ll * B2_rr + v2_rr * B1_rr * B2_ll)
-              - (v3_ll * B1_ll * B3_rr + v3_rr * B1_rr * B3_ll)
-              + equations.c_h * (B1_ll * psi_rr + B1_rr * psi_ll) ) )
-  else
-    f1    = zero(rho_ll)
-    f_rho = SVector{ncomponents(equations), real(equations)}(rhok_mean[i]*v2_avg for i in eachcomponent(equations))
-    for i in eachcomponent(equations)
-      f1 += f_rho[i]
-    end
+    f_other = SVector{8, real(equations)}(f2, f3, f4, f5, f6, f7, f8, f9)
 
-    # Calculate fluxes depending on orientation with specific direction averages
-    f2 = f1 * v1_avg                               - 0.5 * (B2_ll * B1_rr + B2_rr * B1_ll)
-    f3 = f1 * v2_avg + p_avg + magnetic_square_avg - 0.5 * (B2_ll * B2_rr + B2_rr * B2_ll)
-    f4 = f1 * v3_avg                               - 0.5 * (B2_ll * B3_rr + B2_rr * B3_ll)
-    #f5 below
-    f6 = 0.5 * (v2_ll * B1_ll - v1_ll * B2_ll + v2_rr * B1_rr - v1_rr * B2_rr)
-    f7 = equations.c_h * psi_avg
-    f8 = 0.5 * (v2_ll * B3_ll - v3_ll * B2_ll + v2_rr * B3_rr - v3_rr * B2_rr)
-    f9 = equations.c_h * 0.5 * (B2_ll + B2_rr)
-    # total energy flux is complicated and involves the previous components
-    f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * inv_gamma_minus_one )
-              + 0.5 * (
-              +   p_ll * v2_rr +  p_rr * v2_ll
-              + (v2_ll * B1_ll * B1_rr + v2_rr * B1_rr * B1_ll)
-              + (v2_ll * B3_ll * B3_rr + v2_rr * B3_rr * B3_ll)
-              - (v1_ll * B2_ll * B1_rr + v1_rr * B2_rr * B1_ll)
-              - (v3_ll * B2_ll * B3_rr + v3_rr * B2_rr * B3_ll)
-              + equations.c_h * (B2_ll * psi_rr + B2_rr * psi_ll) ) )
-  end
-
-  f_other = SVector{8, real(equations)}(f2, f3, f4, f5, f6, f7, f8, f9)
-
-  return vcat(f_other, f_rho)
+    return vcat(f_other, f_rho)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdMulticomponentEquations2D)
-  rho_v1_ll, rho_v2_ll, _ = u_ll
-  rho_v1_rr, rho_v2_rr, _ = u_rr
-
-  rho_ll   = density(u_ll, equations)
-  rho_rr   = density(u_rr, equations)
-
-  # Calculate velocities and fast magnetoacoustic wave speeds
-  if orientation == 1
-    v_ll = rho_v1_ll / rho_ll
-    v_rr = rho_v1_rr / rho_rr
-  else # orientation == 2
-    v_ll = rho_v2_ll / rho_ll
-    v_rr = rho_v2_rr / rho_rr
-  end
-  cf_ll = calc_fast_wavespeed(u_ll, orientation, equations)
-  cf_rr = calc_fast_wavespeed(u_rr, orientation, equations)
-
-  λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
-end
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::IdealGlmMhdMulticomponentEquations2D)
+    rho_v1_ll, rho_v2_ll, _ = u_ll
+    rho_v1_rr, rho_v2_rr, _ = u_rr
+
+    rho_ll = density(u_ll, equations)
+    rho_rr = density(u_rr, equations)
+
+    # Calculate velocities and fast magnetoacoustic wave speeds
+    if orientation == 1
+        v_ll = rho_v1_ll / rho_ll
+        v_rr = rho_v1_rr / rho_rr
+    else # orientation == 2
+        v_ll = rho_v2_ll / rho_ll
+        v_rr = rho_v2_rr / rho_rr
+    end
+    cf_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+    cf_rr = calc_fast_wavespeed(u_rr, orientation, equations)
 
+    λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
+end
 
 @inline function max_abs_speeds(u, equations::IdealGlmMhdMulticomponentEquations2D)
-  rho_v1, rho_v2, _ = u
+    rho_v1, rho_v2, _ = u
 
-  rho = density(u, equations)
+    rho = density(u, equations)
 
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
 
-  cf_x_direction = calc_fast_wavespeed(u, 1, equations)
-  cf_y_direction = calc_fast_wavespeed(u, 2, equations)
+    cf_x_direction = calc_fast_wavespeed(u, 1, equations)
+    cf_y_direction = calc_fast_wavespeed(u, 2, equations)
 
-  return (abs(v1) + cf_x_direction, abs(v2) + cf_y_direction, )
+    return (abs(v1) + cf_x_direction, abs(v2) + cf_y_direction)
 end
 
-
 @inline function density_pressure(u, equations::IdealGlmMhdMulticomponentEquations2D)
-  rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  rho = density(u, equations)
-  gamma = totalgamma(u, equations)
-  p = (gamma - 1)*(rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
-                                   - 0.5 * (B1^2 + B2^2 + B3^2)
-                                   - 0.5 * psi^2)
-  return rho * p
+    rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    rho = density(u, equations)
+    gamma = totalgamma(u, equations)
+    p = (gamma - 1) * (rho_e - 0.5 * (rho_v1^2 + rho_v2^2 + rho_v3^2) / rho
+         -
+         0.5 * (B1^2 + B2^2 + B3^2)
+         -
+         0.5 * psi^2)
+    return rho * p
 end
 
-
 # Convert conservative variables to primitive
 function cons2prim(u, equations::IdealGlmMhdMulticomponentEquations2D)
-  rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
 
-  prim_rho = SVector{ncomponents(equations), real(equations)}(u[i+8] for i in eachcomponent(equations))
-  rho = density(u, equations)
+    prim_rho = SVector{ncomponents(equations), real(equations)}(u[i + 8]
+                                                                for i in eachcomponent(equations))
+    rho = density(u, equations)
 
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
 
-  gamma = totalgamma(u, equations)
+    gamma = totalgamma(u, equations)
 
-  p = (gamma - 1) * (rho_e - 0.5*rho*(v1^2 + v2^2 + v3^2) - 0.5*(B1^2 + B2^2 + B3^2) - 0.5*psi^2)
-  prim_other =  SVector{8, real(equations)}(v1, v2, v3, p, B1, B2, B3, psi)
-  return vcat(prim_other, prim_rho)
+    p = (gamma - 1) *
+        (rho_e - 0.5 * rho * (v1^2 + v2^2 + v3^2) - 0.5 * (B1^2 + B2^2 + B3^2) -
+         0.5 * psi^2)
+    prim_other = SVector{8, real(equations)}(v1, v2, v3, p, B1, B2, B3, psi)
+    return vcat(prim_other, prim_rho)
 end
 
 # Convert conservative variables to entropy
 @inline function cons2entropy(u, equations::IdealGlmMhdMulticomponentEquations2D)
-  rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
-  @unpack cv, gammas, gas_constants = equations
+    rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = u
+    @unpack cv, gammas, gas_constants = equations
 
-  rho = density(u, equations)
+    rho = density(u, equations)
 
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  v_square = v1^2 + v2^2 + v3^2
-  gamma = totalgamma(u, equations)
-  p = (gamma - 1) * (rho_e - 0.5*rho*v_square - 0.5*(B1^2 + B2^2 + B3^2) - 0.5*psi^2)
-  s = log(p) - gamma*log(rho)
-  rho_p = rho / p
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    v_square = v1^2 + v2^2 + v3^2
+    gamma = totalgamma(u, equations)
+    p = (gamma - 1) *
+        (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2) - 0.5 * psi^2)
+    s = log(p) - gamma * log(rho)
+    rho_p = rho / p
 
-  # Multicomponent stuff
-  help1 = zero(v1)
+    # Multicomponent stuff
+    help1 = zero(v1)
 
-  for i in eachcomponent(equations)
-    help1 += u[i+8] * cv[i]
-  end
-
-  T         = (rho_e - 0.5 * rho * v_square - 0.5*(B1^2 + B2^2 + B3^2) - 0.5*psi^2) / (help1)
-
-  entrop_rho  = SVector{ncomponents(equations), real(equations)}( -1.0 * (cv[i] * log(T) - gas_constants[i] * log(u[i+8])) + gas_constants[i] + cv[i] - (v_square / (2*T)) for i in eachcomponent(equations))
-
-  w1 = v1 / T
-  w2 = v2 / T
-  w3 = v3 / T
-  w4 = -1.0 / T
-  w5 = B1 / T
-  w6 = B2 / T
-  w7 = B3 / T
-  w8 = psi / T
-
-  entrop_other = SVector{8, real(equations)}(w1, w2, w3, w4, w5, w6, w7, w8)
+    for i in eachcomponent(equations)
+        help1 += u[i + 8] * cv[i]
+    end
 
-  return vcat(entrop_other, entrop_rho)
+    T = (rho_e - 0.5 * rho * v_square - 0.5 * (B1^2 + B2^2 + B3^2) - 0.5 * psi^2) /
+        (help1)
+
+    entrop_rho = SVector{ncomponents(equations), real(equations)}(-1.0 *
+                                                                  (cv[i] * log(T) -
+                                                                   gas_constants[i] *
+                                                                   log(u[i + 8])) +
+                                                                  gas_constants[i] +
+                                                                  cv[i] -
+                                                                  (v_square / (2 * T))
+                                                                  for i in eachcomponent(equations))
+
+    w1 = v1 / T
+    w2 = v2 / T
+    w3 = v3 / T
+    w4 = -1.0 / T
+    w5 = B1 / T
+    w6 = B2 / T
+    w7 = B3 / T
+    w8 = psi / T
+
+    entrop_other = SVector{8, real(equations)}(w1, w2, w3, w4, w5, w6, w7, w8)
+
+    return vcat(entrop_other, entrop_rho)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::IdealGlmMhdMulticomponentEquations2D)
-  v1, v2, v3, p, B1, B2, B3, psi = prim
+    v1, v2, v3, p, B1, B2, B3, psi = prim
 
-  cons_rho = SVector{ncomponents(equations), real(equations)}(prim[i+8] for i in eachcomponent(equations))
-  rho = density(prim, equations)
+    cons_rho = SVector{ncomponents(equations), real(equations)}(prim[i + 8]
+                                                                for i in eachcomponent(equations))
+    rho = density(prim, equations)
 
-  rho_v1 = rho * v1
-  rho_v2 = rho * v2
-  rho_v3 = rho * v3
+    rho_v1 = rho * v1
+    rho_v2 = rho * v2
+    rho_v3 = rho * v3
 
-  gamma = totalgamma(prim, equations)
-  rho_e = p/(gamma-1) + 0.5 * (rho_v1*v1 + rho_v2*v2 + rho_v3*v3) +
-                                 0.5 * (B1^2 + B2^2 + B3^2) + 0.5 * psi^2
+    gamma = totalgamma(prim, equations)
+    rho_e = p / (gamma - 1) + 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3) +
+            0.5 * (B1^2 + B2^2 + B3^2) + 0.5 * psi^2
 
-  cons_other = SVector{8, real(equations)}(rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi)
+    cons_other = SVector{8, real(equations)}(rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3,
+                                             psi)
 
-  return vcat(cons_other, cons_rho)
+    return vcat(cons_other, cons_rho)
 end
 
-
 # Compute the fastest wave speed for ideal MHD equations: c_f, the fast magnetoacoustic eigenvalue
-@inline function calc_fast_wavespeed(cons, direction, equations::IdealGlmMhdMulticomponentEquations2D)
-  rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons
-  rho = density(cons, equations)
-  v1 = rho_v1 / rho
-  v2 = rho_v2 / rho
-  v3 = rho_v3 / rho
-  v_mag = sqrt(v1^2 + v2^2 + v3^2)
-  gamma = totalgamma(cons, equations)
-  p = (gamma - 1)*(rho_e - 0.5*rho*v_mag^2 - 0.5*(B1^2 + B2^2 + B3^2) - 0.5*psi^2)
-  a_square = gamma * p / rho
-  sqrt_rho = sqrt(rho)
-  b1 = B1 / sqrt_rho
-  b2 = B2 / sqrt_rho
-  b3 = B3 / sqrt_rho
-  b_square = b1^2 + b2^2 + b3^2
-  if direction == 1 # x-direction
-    c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b1^2))
-  else
-    c_f = sqrt(0.5*(a_square + b_square) + 0.5*sqrt((a_square + b_square)^2 - 4.0*a_square*b2^2))
-  end
-  return c_f
+@inline function calc_fast_wavespeed(cons, direction,
+                                     equations::IdealGlmMhdMulticomponentEquations2D)
+    rho_v1, rho_v2, rho_v3, rho_e, B1, B2, B3, psi = cons
+    rho = density(cons, equations)
+    v1 = rho_v1 / rho
+    v2 = rho_v2 / rho
+    v3 = rho_v3 / rho
+    v_mag = sqrt(v1^2 + v2^2 + v3^2)
+    gamma = totalgamma(cons, equations)
+    p = (gamma - 1) *
+        (rho_e - 0.5 * rho * v_mag^2 - 0.5 * (B1^2 + B2^2 + B3^2) - 0.5 * psi^2)
+    a_square = gamma * p / rho
+    sqrt_rho = sqrt(rho)
+    b1 = B1 / sqrt_rho
+    b2 = B2 / sqrt_rho
+    b3 = B3 / sqrt_rho
+    b_square = b1^2 + b2^2 + b3^2
+    if direction == 1 # x-direction
+        c_f = sqrt(0.5 * (a_square + b_square) +
+                   0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b1^2))
+    else
+        c_f = sqrt(0.5 * (a_square + b_square) +
+                   0.5 * sqrt((a_square + b_square)^2 - 4.0 * a_square * b2^2))
+    end
+    return c_f
 end
 
-
 @inline function density(u, equations::IdealGlmMhdMulticomponentEquations2D)
-  rho = zero(u[1])
-
-  for i in eachcomponent(equations)
-    rho += u[i+8]
-  end
+    rho = zero(u[1])
 
-  return rho
- end
+    for i in eachcomponent(equations)
+        rho += u[i + 8]
+    end
 
+    return rho
+end
 
- @inline function totalgamma(u, equations::IdealGlmMhdMulticomponentEquations2D)
-  @unpack cv, gammas = equations
+@inline function totalgamma(u, equations::IdealGlmMhdMulticomponentEquations2D)
+    @unpack cv, gammas = equations
 
-  help1 = zero(u[1])
-  help2 = zero(u[1])
+    help1 = zero(u[1])
+    help2 = zero(u[1])
 
-  for i in eachcomponent(equations)
-    help1 += u[i+8] * cv[i] * gammas[i]
-    help2 += u[i+8] * cv[i]
-  end
+    for i in eachcomponent(equations)
+        help1 += u[i + 8] * cv[i] * gammas[i]
+        help2 += u[i + 8] * cv[i]
+    end
 
-  return help1/help2
+    return help1 / help2
 end
 
-
 @inline function densities(u, v, equations::IdealGlmMhdMulticomponentEquations2D)
-
-  return SVector{ncomponents(equations), real(equations)}(u[i+8]*v for i in eachcomponent(equations))
- end
-
-
+    return SVector{ncomponents(equations), real(equations)}(u[i + 8] * v
+                                                            for i in eachcomponent(equations))
+end
 end # @muladd
diff --git a/src/equations/inviscid_burgers_1d.jl b/src/equations/inviscid_burgers_1d.jl
index 18e2ed4600b..8d4410b6ffe 100644
--- a/src/equations/inviscid_burgers_1d.jl
+++ b/src/equations/inviscid_burgers_1d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     InviscidBurgersEquation1D
@@ -16,10 +16,8 @@ in one space dimension.
 """
 struct InviscidBurgersEquation1D <: AbstractInviscidBurgersEquation{1, 1} end
 
-
-varnames(::typeof(cons2cons), ::InviscidBurgersEquation1D) = ("scalar", )
-varnames(::typeof(cons2prim), ::InviscidBurgersEquation1D) = ("scalar", )
-
+varnames(::typeof(cons2cons), ::InviscidBurgersEquation1D) = ("scalar",)
+varnames(::typeof(cons2prim), ::InviscidBurgersEquation1D) = ("scalar",)
 
 # Set initial conditions at physical location `x` for time `t`
 """
@@ -28,109 +26,104 @@ varnames(::typeof(cons2prim), ::InviscidBurgersEquation1D) = ("scalar", )
 A constant initial condition to test free-stream preservation.
 """
 function initial_condition_constant(x, t, equation::InviscidBurgersEquation1D)
-  return SVector(2.0)
+    return SVector(2.0)
 end
 
-
 """
     initial_condition_convergence_test(x, t, equations::InviscidBurgersEquation1D)
 
 A smooth initial condition used for convergence tests.
 """
 function initial_condition_convergence_test(x, t, equation::InviscidBurgersEquation1D)
-  c = 2.0
-  A = 1.0
-  L = 1
-  f = 1/L
-  omega = 2 * pi * f
-  scalar = c + A * sin(omega * (x[1] - t))
-
-  return SVector(scalar)
+    c = 2.0
+    A = 1.0
+    L = 1
+    f = 1 / L
+    omega = 2 * pi * f
+    scalar = c + A * sin(omega * (x[1] - t))
+
+    return SVector(scalar)
 end
 
-
 """
     source_terms_convergence_test(u, x, t, equations::InviscidBurgersEquation1D)
 
 Source terms used for convergence tests in combination with
 [`initial_condition_convergence_test`](@ref).
 """
-@inline function source_terms_convergence_test(u, x, t, equations::InviscidBurgersEquation1D)
-  # Same settings as in `initial_condition`
-  c = 2.0
-  A = 1.0
-  L = 1
-  f = 1/L
-  omega = 2 * pi * f
-  du = omega * A * cos(omega * (x[1] - t)) * (c - 1 + A * sin(omega * (x[1] - t)))
-
-  return SVector(du)
+@inline function source_terms_convergence_test(u, x, t,
+                                               equations::InviscidBurgersEquation1D)
+    # Same settings as in `initial_condition`
+    c = 2.0
+    A = 1.0
+    L = 1
+    f = 1 / L
+    omega = 2 * pi * f
+    du = omega * A * cos(omega * (x[1] - t)) * (c - 1 + A * sin(omega * (x[1] - t)))
+
+    return SVector(du)
 end
 
-
 # Pre-defined source terms should be implemented as
 # function source_terms_WHATEVER(u, x, t, equations::InviscidBurgersEquation1D)
 
-
 # Calculate 1D flux in for a single point
 @inline function flux(u, orientation::Integer, equation::InviscidBurgersEquation1D)
-  return SVector(0.5 * u[1]^2)
+    return SVector(0.5 * u[1]^2)
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::InviscidBurgersEquation1D)
-  u_L = u_ll[1]
-  u_R = u_rr[1]
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::InviscidBurgersEquation1D)
+    u_L = u_ll[1]
+    u_R = u_rr[1]
 
-  λ_max = max(abs(u_L), abs(u_R))
+    λ_max = max(abs(u_L), abs(u_R))
 end
 
 # Calculate minimum and maximum wave speeds for HLL-type fluxes
-@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::InviscidBurgersEquation1D)
-  u_L = u_ll[1]
-  u_R = u_rr[1]
+@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::InviscidBurgersEquation1D)
+    u_L = u_ll[1]
+    u_R = u_rr[1]
 
-  λ_min = min(u_L, u_R)
-  λ_max = max(u_L, u_R)
+    λ_min = min(u_L, u_R)
+    λ_max = max(u_L, u_R)
 
-  return λ_min, λ_max
+    return λ_min, λ_max
 end
 
 @inline function max_abs_speeds(u, equation::InviscidBurgersEquation1D)
-  return (abs(u[1]),)
+    return (abs(u[1]),)
 end
 
-
 # (Symmetric) Entropy Conserving flux
 function flux_ec(u_ll, u_rr, orientation, equation::InviscidBurgersEquation1D)
-  u_L = u_ll[1]
-  u_R = u_rr[1]
+    u_L = u_ll[1]
+    u_R = u_rr[1]
 
-  return SVector((u_L^2 + u_L * u_R + u_R^2) / 6)
+    return SVector((u_L^2 + u_L * u_R + u_R^2) / 6)
 end
 
-
 # See https://metaphor.ethz.ch/x/2019/hs/401-4671-00L/literature/mishra_hyperbolic_pdes.pdf ,
 # section 4.1.5 and especially equation (4.16).
 function flux_godunov(u_ll, u_rr, orientation, equation::InviscidBurgersEquation1D)
-  u_L = u_ll[1]
-  u_R = u_rr[1]
+    u_L = u_ll[1]
+    u_R = u_rr[1]
 
-  return SVector(0.5 * max(max(u_L, zero(u_L))^2, min(u_R, zero(u_R))^2))
+    return SVector(0.5 * max(max(u_L, zero(u_L))^2, min(u_R, zero(u_R))^2))
 end
 
-
 # See https://metaphor.ethz.ch/x/2019/hs/401-4671-00L/literature/mishra_hyperbolic_pdes.pdf ,
 # section 4.2.5 and especially equation (4.34).
-function flux_engquist_osher(u_ll, u_rr, orientation, equation::InviscidBurgersEquation1D)
-  u_L = u_ll[1]
-  u_R = u_rr[1]
+function flux_engquist_osher(u_ll, u_rr, orientation,
+                             equation::InviscidBurgersEquation1D)
+    u_L = u_ll[1]
+    u_R = u_rr[1]
 
-  return SVector(0.5 * (max(u_L, zero(u_L))^2 + min(u_R, zero(u_R))^2))
+    return SVector(0.5 * (max(u_L, zero(u_L))^2 + min(u_R, zero(u_R))^2))
 end
 
-
 """
     splitting_lax_friedrichs(u, orientation::Integer,
                              equations::InviscidBurgersEquation1D)
@@ -151,41 +144,38 @@ function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
 """
 @inline function splitting_lax_friedrichs(u, orientation::Integer,
                                           equations::InviscidBurgersEquation1D)
-  fm = splitting_lax_friedrichs(u, Val{:minus}(), orientation, equations)
-  fp = splitting_lax_friedrichs(u, Val{:plus}(),  orientation, equations)
-  return fm, fp
+    fm = splitting_lax_friedrichs(u, Val{:minus}(), orientation, equations)
+    fp = splitting_lax_friedrichs(u, Val{:plus}(), orientation, equations)
+    return fm, fp
 end
 
 @inline function splitting_lax_friedrichs(u, ::Val{:plus}, orientation::Integer,
                                           equations::InviscidBurgersEquation1D)
-  f = 0.5 * u[1]^2
-  lambda = abs(u[1])
-  return SVector(0.5 * (f + lambda * u[1]))
+    f = 0.5 * u[1]^2
+    lambda = abs(u[1])
+    return SVector(0.5 * (f + lambda * u[1]))
 end
 
 @inline function splitting_lax_friedrichs(u, ::Val{:minus}, orientation::Integer,
                                           equations::InviscidBurgersEquation1D)
-  f = 0.5 * u[1]^2
-  lambda = abs(u[1])
-  return SVector(0.5 * (f - lambda * u[1]))
+    f = 0.5 * u[1]^2
+    lambda = abs(u[1])
+    return SVector(0.5 * (f - lambda * u[1]))
 end
 
-
 # Convert conservative variables to primitive
 @inline cons2prim(u, equation::InviscidBurgersEquation1D) = u
 
 # Convert conservative variables to entropy variables
 @inline cons2entropy(u, equation::InviscidBurgersEquation1D) = u
 
-
 # Calculate entropy for a conservative state `cons`
 @inline entropy(u::Real, ::InviscidBurgersEquation1D) = 0.5 * u^2
 @inline entropy(u, equation::InviscidBurgersEquation1D) = entropy(u[1], equation)
 
-
 # Calculate total energy for a conservative state `cons`
 @inline energy_total(u::Real, ::InviscidBurgersEquation1D) = 0.5 * u^2
-@inline energy_total(u, equation::InviscidBurgersEquation1D) = energy_total(u[1], equation)
-
-
+@inline function energy_total(u, equation::InviscidBurgersEquation1D)
+    energy_total(u[1], equation)
+end
 end # @muladd
diff --git a/src/equations/laplace_diffusion_1d.jl b/src/equations/laplace_diffusion_1d.jl
index 2573a3d0d04..815b9908c1e 100644
--- a/src/equations/laplace_diffusion_1d.jl
+++ b/src/equations/laplace_diffusion_1d.jl
@@ -5,44 +5,54 @@
 with diffusivity ``\kappa`` applied to each solution component defined by `equations`.
 """
 struct LaplaceDiffusion1D{E, N, T} <: AbstractLaplaceDiffusion{1, N}
-  diffusivity::T
-  equations_hyperbolic::E
+    diffusivity::T
+    equations_hyperbolic::E
 end
 
-LaplaceDiffusion1D(diffusivity, equations_hyperbolic) =
-  LaplaceDiffusion1D{typeof(equations_hyperbolic), nvariables(equations_hyperbolic), typeof(diffusivity)}(diffusivity, equations_hyperbolic)
+function LaplaceDiffusion1D(diffusivity, equations_hyperbolic)
+    LaplaceDiffusion1D{typeof(equations_hyperbolic), nvariables(equations_hyperbolic),
+                       typeof(diffusivity)}(diffusivity, equations_hyperbolic)
+end
 
-varnames(variable_mapping, equations_parabolic::LaplaceDiffusion1D) =
-  varnames(variable_mapping, equations_parabolic.equations_hyperbolic)
+function varnames(variable_mapping, equations_parabolic::LaplaceDiffusion1D)
+    varnames(variable_mapping, equations_parabolic.equations_hyperbolic)
+end
 
 function flux(u, gradients, orientation::Integer, equations_parabolic::LaplaceDiffusion1D)
-  dudx = gradients
-  # orientation == 1
-  return equations_parabolic.diffusivity * dudx
+    dudx = gradients
+    # orientation == 1
+    return equations_parabolic.diffusivity * dudx
 end
 
-
 # Dirichlet-type boundary condition for use with a parabolic solver in weak form
-@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner, normal::AbstractVector,
-                                                                  x, t, operator_type::Gradient,
+@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner,
+                                                                  normal::AbstractVector,
+                                                                  x, t,
+                                                                  operator_type::Gradient,
                                                                   equations_parabolic::LaplaceDiffusion1D)
-  return boundary_condition.boundary_value_function(x, t, equations_parabolic)
+    return boundary_condition.boundary_value_function(x, t, equations_parabolic)
 end
 
-@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner, normal::AbstractVector,
-                                                                  x, t, operator_type::Divergence,
+@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner,
+                                                                  normal::AbstractVector,
+                                                                  x, t,
+                                                                  operator_type::Divergence,
                                                                   equations_parabolic::LaplaceDiffusion1D)
-  return flux_inner
+    return flux_inner
 end
 
-@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner, normal::AbstractVector,
-                                                                x, t, operator_type::Divergence,
+@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner,
+                                                                normal::AbstractVector,
+                                                                x, t,
+                                                                operator_type::Divergence,
                                                                 equations_parabolic::LaplaceDiffusion1D)
-  return boundary_condition.boundary_normal_flux_function(x, t, equations_parabolic)
+    return boundary_condition.boundary_normal_flux_function(x, t, equations_parabolic)
 end
 
-@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner, normal::AbstractVector,
-                                                                x, t, operator_type::Gradient,
+@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner,
+                                                                normal::AbstractVector,
+                                                                x, t,
+                                                                operator_type::Gradient,
                                                                 equations_parabolic::LaplaceDiffusion1D)
-  return flux_inner
+    return flux_inner
 end
diff --git a/src/equations/laplace_diffusion_2d.jl b/src/equations/laplace_diffusion_2d.jl
index 3963c616af2..3443e9c097b 100644
--- a/src/equations/laplace_diffusion_2d.jl
+++ b/src/equations/laplace_diffusion_2d.jl
@@ -5,54 +5,66 @@
 with diffusivity ``\kappa`` applied to each solution component defined by `equations`.
 """
 struct LaplaceDiffusion2D{E, N, T} <: AbstractLaplaceDiffusion{2, N}
-  diffusivity::T
-  equations_hyperbolic::E
+    diffusivity::T
+    equations_hyperbolic::E
 end
 
-LaplaceDiffusion2D(diffusivity, equations_hyperbolic) =
-  LaplaceDiffusion2D{typeof(equations_hyperbolic), nvariables(equations_hyperbolic), typeof(diffusivity)}(diffusivity, equations_hyperbolic)
+function LaplaceDiffusion2D(diffusivity, equations_hyperbolic)
+    LaplaceDiffusion2D{typeof(equations_hyperbolic), nvariables(equations_hyperbolic),
+                       typeof(diffusivity)}(diffusivity, equations_hyperbolic)
+end
 
-varnames(variable_mapping, equations_parabolic::LaplaceDiffusion2D) =
-  varnames(variable_mapping, equations_parabolic.equations_hyperbolic)
+function varnames(variable_mapping, equations_parabolic::LaplaceDiffusion2D)
+    varnames(variable_mapping, equations_parabolic.equations_hyperbolic)
+end
 
 # no orientation specified since the flux is vector-valued
 function flux(u, gradients, orientation::Integer, equations_parabolic::LaplaceDiffusion2D)
-  dudx, dudy = gradients
-  if orientation == 1
-    return SVector(equations_parabolic.diffusivity * dudx)
-  else # if orientation == 2
-    return SVector(equations_parabolic.diffusivity * dudy)
-  end
+    dudx, dudy = gradients
+    if orientation == 1
+        return SVector(equations_parabolic.diffusivity * dudx)
+    else # if orientation == 2
+        return SVector(equations_parabolic.diffusivity * dudy)
+    end
 end
 
 # TODO: parabolic; should this remain in the equations file, be moved to solvers, or live in the elixir?
 # The penalization depends on the solver, but also depends explicitly on physical parameters,
 # and would probably need to be specialized for every different equation.
-function penalty(u_outer, u_inner, inv_h, equations_parabolic::LaplaceDiffusion2D, dg::ViscousFormulationLocalDG)
-  return dg.penalty_parameter * (u_outer - u_inner) * equations_parabolic.diffusivity
+function penalty(u_outer, u_inner, inv_h, equations_parabolic::LaplaceDiffusion2D,
+                 dg::ViscousFormulationLocalDG)
+    return dg.penalty_parameter * (u_outer - u_inner) * equations_parabolic.diffusivity
 end
 
 # Dirichlet-type boundary condition for use with a parabolic solver in weak form
-@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner, normal::AbstractVector,
-                                                                  x, t, operator_type::Gradient,
+@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner,
+                                                                  normal::AbstractVector,
+                                                                  x, t,
+                                                                  operator_type::Gradient,
                                                                   equations_parabolic::LaplaceDiffusion2D)
-  return boundary_condition.boundary_value_function(x, t, equations_parabolic)
+    return boundary_condition.boundary_value_function(x, t, equations_parabolic)
 end
 
-@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner, normal::AbstractVector,
-                                                                  x, t, operator_type::Divergence,
+@inline function (boundary_condition::BoundaryConditionDirichlet)(flux_inner, u_inner,
+                                                                  normal::AbstractVector,
+                                                                  x, t,
+                                                                  operator_type::Divergence,
                                                                   equations_parabolic::LaplaceDiffusion2D)
-  return flux_inner
+    return flux_inner
 end
 
-@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner, normal::AbstractVector,
-                                                                x, t, operator_type::Divergence,
+@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner,
+                                                                normal::AbstractVector,
+                                                                x, t,
+                                                                operator_type::Divergence,
                                                                 equations_parabolic::LaplaceDiffusion2D)
-  return boundary_condition.boundary_normal_flux_function(x, t, equations_parabolic)
+    return boundary_condition.boundary_normal_flux_function(x, t, equations_parabolic)
 end
 
-@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner, normal::AbstractVector,
-                                                                x, t, operator_type::Gradient,
+@inline function (boundary_condition::BoundaryConditionNeumann)(flux_inner, u_inner,
+                                                                normal::AbstractVector,
+                                                                x, t,
+                                                                operator_type::Gradient,
                                                                 equations_parabolic::LaplaceDiffusion2D)
-  return flux_inner
+    return flux_inner
 end
diff --git a/src/equations/lattice_boltzmann_2d.jl b/src/equations/lattice_boltzmann_2d.jl
index ee64ae591ae..272dd897ce3 100644
--- a/src/equations/lattice_boltzmann_2d.jl
+++ b/src/equations/lattice_boltzmann_2d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     LatticeBoltzmannEquations2D(; Ma, Re, collision_op=collision_bgk,
@@ -61,85 +61,90 @@ The main sources for the base implementation were
 4. Dieter Krüger et al., **The Lattice Boltzmann Method**, Springer International Publishing, 2017
    [doi:10.1007/978-3-319-44649-3](https://doi.org/10.1007/978-3-319-44649-3)
 """
-struct LatticeBoltzmannEquations2D{RealT<:Real, CollisionOp} <: AbstractLatticeBoltzmannEquations{2, 9}
-  c::RealT    # mean thermal molecular velocity
-  c_s::RealT  # isothermal speed of sound
-  rho0::RealT # macroscopic reference density
+struct LatticeBoltzmannEquations2D{RealT <: Real, CollisionOp} <:
+       AbstractLatticeBoltzmannEquations{2, 9}
+    c::RealT    # mean thermal molecular velocity
+    c_s::RealT  # isothermal speed of sound
+    rho0::RealT # macroscopic reference density
 
-  Ma::RealT   # characteristic Mach number
-  u0::RealT   # macroscopic reference velocity
+    Ma::RealT   # characteristic Mach number
+    u0::RealT   # macroscopic reference velocity
 
-  Re::RealT   # characteristic Reynolds number
-  L::RealT    # reference length
-  nu::RealT   # kinematic viscosity
+    Re::RealT   # characteristic Reynolds number
+    L::RealT    # reference length
+    nu::RealT   # kinematic viscosity
 
-  weights::SVector{9, RealT}  # weighting factors for the equilibrium distribution
-  v_alpha1::SVector{9, RealT} # discrete molecular velocity components in x-direction
-  v_alpha2::SVector{9, RealT} # discrete molecular velocity components in y-direction
+    weights::SVector{9, RealT}  # weighting factors for the equilibrium distribution
+    v_alpha1::SVector{9, RealT} # discrete molecular velocity components in x-direction
+    v_alpha2::SVector{9, RealT} # discrete molecular velocity components in y-direction
 
-  collision_op::CollisionOp   # collision operator for the collision kernel
+    collision_op::CollisionOp   # collision operator for the collision kernel
 end
 
-function LatticeBoltzmannEquations2D(; Ma, Re, collision_op=collision_bgk,
-                                       c=1, L=1, rho0=1, u0=nothing, nu=nothing)
-  # Sanity check that exactly one of Ma, u0 is not `nothing`
-  if isnothing(Ma) && isnothing(u0)
-    error("Mach number `Ma` and reference speed `u0` may not both be `nothing`")
-  elseif !isnothing(Ma) && !isnothing(u0)
-    error("Mach number `Ma` and reference speed `u0` may not both be set")
-  end
-
-  # Sanity check that exactly one of Re, nu is not `nothing`
-  if isnothing(Re) && isnothing(nu)
-    error("Reynolds number `Re` and visocsity `nu` may not both be `nothing`")
-  elseif !isnothing(Re) && !isnothing(nu)
-    error("Reynolds number `Re` and visocsity `nu` may not both be set")
-  end
-
-  # Calculate isothermal speed of sound
-  # The relation between the isothermal speed of sound `c_s` and the mean thermal molecular velocity
-  # `c` depends on the used phase space discretization, and is valid for D2Q9 (and others). For
-  # details, see, e.g., [3] in the docstring above.
-  c_s = c / sqrt(3)
-
-  # Calculate missing quantities
-  if isnothing(Ma)
-    Ma = u0 / c_s
-  elseif isnothing(u0)
-    u0 = Ma * c_s
-  end
-  if isnothing(Re)
-    Re = u0 * L / nu
-  elseif isnothing(nu)
-    nu = u0 * L / Re
-  end
-
-  # Promote to common data type
-  Ma, Re, c, L, rho0, u0, nu = promote(Ma, Re, c, L, rho0, u0, nu)
-
-  # Source for weights and speeds: [4] in the docstring above
-  weights  = SVector(1/9, 1/9, 1/9, 1/9, 1/36, 1/36, 1/36, 1/36, 4/9)
-  v_alpha1 = SVector( c,   0,  -c,   0,   c,   -c,   -c,    c,    0 )
-  v_alpha2 = SVector( 0,   c,   0,  -c,   c,    c,   -c,   -c,    0 )
-
-  LatticeBoltzmannEquations2D(c, c_s, rho0, Ma, u0, Re, L, nu,
-                             weights, v_alpha1, v_alpha2,
-                             collision_op)
+function LatticeBoltzmannEquations2D(; Ma, Re, collision_op = collision_bgk,
+                                     c = 1, L = 1, rho0 = 1, u0 = nothing, nu = nothing)
+    # Sanity check that exactly one of Ma, u0 is not `nothing`
+    if isnothing(Ma) && isnothing(u0)
+        error("Mach number `Ma` and reference speed `u0` may not both be `nothing`")
+    elseif !isnothing(Ma) && !isnothing(u0)
+        error("Mach number `Ma` and reference speed `u0` may not both be set")
+    end
+
+    # Sanity check that exactly one of Re, nu is not `nothing`
+    if isnothing(Re) && isnothing(nu)
+        error("Reynolds number `Re` and visocsity `nu` may not both be `nothing`")
+    elseif !isnothing(Re) && !isnothing(nu)
+        error("Reynolds number `Re` and visocsity `nu` may not both be set")
+    end
+
+    # Calculate isothermal speed of sound
+    # The relation between the isothermal speed of sound `c_s` and the mean thermal molecular velocity
+    # `c` depends on the used phase space discretization, and is valid for D2Q9 (and others). For
+    # details, see, e.g., [3] in the docstring above.
+    c_s = c / sqrt(3)
+
+    # Calculate missing quantities
+    if isnothing(Ma)
+        Ma = u0 / c_s
+    elseif isnothing(u0)
+        u0 = Ma * c_s
+    end
+    if isnothing(Re)
+        Re = u0 * L / nu
+    elseif isnothing(nu)
+        nu = u0 * L / Re
+    end
+
+    # Promote to common data type
+    Ma, Re, c, L, rho0, u0, nu = promote(Ma, Re, c, L, rho0, u0, nu)
+
+    # Source for weights and speeds: [4] in the docstring above
+    weights = SVector(1 / 9, 1 / 9, 1 / 9, 1 / 9, 1 / 36, 1 / 36, 1 / 36, 1 / 36, 4 / 9)
+    v_alpha1 = SVector(c, 0, -c, 0, c, -c, -c, c, 0)
+    v_alpha2 = SVector(0, c, 0, -c, c, c, -c, -c, 0)
+
+    LatticeBoltzmannEquations2D(c, c_s, rho0, Ma, u0, Re, L, nu,
+                                weights, v_alpha1, v_alpha2,
+                                collision_op)
 end
 
-
-varnames(::typeof(cons2cons), equations::LatticeBoltzmannEquations2D) = ntuple(v -> "pdf"*string(v), nvariables(equations))
-varnames(::typeof(cons2prim), equations::LatticeBoltzmannEquations2D) = varnames(cons2cons, equations)
-
+function varnames(::typeof(cons2cons), equations::LatticeBoltzmannEquations2D)
+    ntuple(v -> "pdf" * string(v), nvariables(equations))
+end
+function varnames(::typeof(cons2prim), equations::LatticeBoltzmannEquations2D)
+    varnames(cons2cons, equations)
+end
 
 # Convert conservative variables to macroscopic
 @inline function cons2macroscopic(u, equations::LatticeBoltzmannEquations2D)
-  rho    = density(u, equations)
-  v1, v2 = velocity(u, equations)
-  p      = pressure(u, equations)
-  return SVector(rho, v1, v2, p)
+    rho = density(u, equations)
+    v1, v2 = velocity(u, equations)
+    p = pressure(u, equations)
+    return SVector(rho, v1, v2, p)
+end
+function varnames(::typeof(cons2macroscopic), ::LatticeBoltzmannEquations2D)
+    ("rho", "v1", "v2", "p")
 end
-varnames(::typeof(cons2macroscopic), ::LatticeBoltzmannEquations2D) = ("rho", "v1", "v2", "p")
 
 # Set initial conditions at physical location `x` for time `t`
 """
@@ -148,15 +153,14 @@ varnames(::typeof(cons2macroscopic), ::LatticeBoltzmannEquations2D) = ("rho", "v
 A constant initial condition to test free-stream preservation.
 """
 function initial_condition_constant(x, t, equations::LatticeBoltzmannEquations2D)
-  @unpack u0 = equations
-  rho = pi
-  v1 = u0
-  v2 = u0
+    @unpack u0 = equations
+    rho = pi
+    v1 = u0
+    v2 = u0
 
-  return equilibrium_distribution(rho, v1, v2, equations)
+    return equilibrium_distribution(rho, v1, v2, equations)
 end
 
-
 """
     boundary_condition_noslip_wall(u_inner, orientation, direction, x, t,
                                    surface_flux_function,
@@ -167,94 +171,91 @@ No-slip wall boundary condition using the bounce-back approach.
 @inline function boundary_condition_noslip_wall(u_inner, orientation, direction, x, t,
                                                 surface_flux_function,
                                                 equations::LatticeBoltzmannEquations2D)
-  # For LBM no-slip wall boundary conditions, we set the boundary state to
-  # - the inner state for outgoing particle distribution functions
-  # - the *opposite* inner state for all other particle distribution functions
-  # See the list of (opposite) directions in the docstring of `LatticeBoltzmannEquations2D`.
-  if direction == 1 # boundary in -x direction
-    pdf1 = u_inner[3]
-    pdf2 = u_inner[4]
-    pdf3 = u_inner[3] # outgoing
-    pdf4 = u_inner[2]
-    pdf5 = u_inner[7]
-    pdf6 = u_inner[6] # outgoing
-    pdf7 = u_inner[7] # outgoing
-    pdf8 = u_inner[6]
-    pdf9 = u_inner[9]
-  elseif direction == 2 # boundary in +x direction
-    pdf1 = u_inner[1] # outgoing
-    pdf2 = u_inner[4]
-    pdf3 = u_inner[1]
-    pdf4 = u_inner[2]
-    pdf5 = u_inner[5] # outgoing
-    pdf6 = u_inner[8]
-    pdf7 = u_inner[5]
-    pdf8 = u_inner[8] # outgoing
-    pdf9 = u_inner[9]
-  elseif direction == 3 # boundary in -y direction
-    pdf1 = u_inner[3]
-    pdf2 = u_inner[4]
-    pdf3 = u_inner[1]
-    pdf4 = u_inner[4] # outgoing
-    pdf5 = u_inner[7]
-    pdf6 = u_inner[8]
-    pdf7 = u_inner[7] # outgoing
-    pdf8 = u_inner[8] # outgoing
-    pdf9 = u_inner[9]
-  else # boundary in +y direction
-    pdf1 = u_inner[3]
-    pdf2 = u_inner[2] # outgoing
-    pdf3 = u_inner[1]
-    pdf4 = u_inner[2]
-    pdf5 = u_inner[5] # outgoing
-    pdf6 = u_inner[6] # outgoing
-    pdf7 = u_inner[5]
-    pdf8 = u_inner[6]
-    pdf9 = u_inner[9]
-  end
-  u_boundary = SVector(pdf1, pdf2, pdf3, pdf4, pdf5, pdf6, pdf7, pdf8, pdf9)
-
-  # Calculate boundary flux
-  if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    flux = surface_flux_function(u_boundary, u_inner, orientation, equations)
-  end
-
-  return flux
+    # For LBM no-slip wall boundary conditions, we set the boundary state to
+    # - the inner state for outgoing particle distribution functions
+    # - the *opposite* inner state for all other particle distribution functions
+    # See the list of (opposite) directions in the docstring of `LatticeBoltzmannEquations2D`.
+    if direction == 1 # boundary in -x direction
+        pdf1 = u_inner[3]
+        pdf2 = u_inner[4]
+        pdf3 = u_inner[3] # outgoing
+        pdf4 = u_inner[2]
+        pdf5 = u_inner[7]
+        pdf6 = u_inner[6] # outgoing
+        pdf7 = u_inner[7] # outgoing
+        pdf8 = u_inner[6]
+        pdf9 = u_inner[9]
+    elseif direction == 2 # boundary in +x direction
+        pdf1 = u_inner[1] # outgoing
+        pdf2 = u_inner[4]
+        pdf3 = u_inner[1]
+        pdf4 = u_inner[2]
+        pdf5 = u_inner[5] # outgoing
+        pdf6 = u_inner[8]
+        pdf7 = u_inner[5]
+        pdf8 = u_inner[8] # outgoing
+        pdf9 = u_inner[9]
+    elseif direction == 3 # boundary in -y direction
+        pdf1 = u_inner[3]
+        pdf2 = u_inner[4]
+        pdf3 = u_inner[1]
+        pdf4 = u_inner[4] # outgoing
+        pdf5 = u_inner[7]
+        pdf6 = u_inner[8]
+        pdf7 = u_inner[7] # outgoing
+        pdf8 = u_inner[8] # outgoing
+        pdf9 = u_inner[9]
+    else # boundary in +y direction
+        pdf1 = u_inner[3]
+        pdf2 = u_inner[2] # outgoing
+        pdf3 = u_inner[1]
+        pdf4 = u_inner[2]
+        pdf5 = u_inner[5] # outgoing
+        pdf6 = u_inner[6] # outgoing
+        pdf7 = u_inner[5]
+        pdf8 = u_inner[6]
+        pdf9 = u_inner[9]
+    end
+    u_boundary = SVector(pdf1, pdf2, pdf3, pdf4, pdf5, pdf6, pdf7, pdf8, pdf9)
+
+    # Calculate boundary flux
+    if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation, equations)
+    end
+
+    return flux
 end
 
-
 # Pre-defined source terms should be implemented as
 # function source_terms_WHATEVER(u, x, t, equations::LatticeBoltzmannEquations2D)
 
-
 # Calculate 1D flux in for a single point
 @inline function flux(u, orientation::Integer, equations::LatticeBoltzmannEquations2D)
-  if orientation == 1
-    v_alpha = equations.v_alpha1
-  else
-    v_alpha = equations.v_alpha2
-  end
-  return v_alpha .* u
+    if orientation == 1
+        v_alpha = equations.v_alpha1
+    else
+        v_alpha = equations.v_alpha2
+    end
+    return v_alpha .* u
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
 # @inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::LatticeBoltzmannEquations2D)
 #   λ_max =
 # end
 
-@inline function flux_godunov(u_ll, u_rr, orientation::Integer, equations::LatticeBoltzmannEquations2D)
-  if orientation == 1
-    v_alpha = equations.v_alpha1
-  else
-    v_alpha = equations.v_alpha2
-  end
-  return 0.5 * ( v_alpha .* (u_ll + u_rr) - abs.(v_alpha) .* (u_rr - u_ll) )
+@inline function flux_godunov(u_ll, u_rr, orientation::Integer,
+                              equations::LatticeBoltzmannEquations2D)
+    if orientation == 1
+        v_alpha = equations.v_alpha1
+    else
+        v_alpha = equations.v_alpha2
+    end
+    return 0.5 * (v_alpha .* (u_ll + u_rr) - abs.(v_alpha) .* (u_rr - u_ll))
 end
 
-
 """
     density(p::Real, equations::LatticeBoltzmannEquations2D)
     density(u, equations::LatticeBoltzmannEquations2D)
@@ -264,38 +265,36 @@ Calculate the macroscopic density from the pressure `p` or the particle distribu
 @inline density(p::Real, equations::LatticeBoltzmannEquations2D) = p / equations.c_s^2
 @inline density(u, equations::LatticeBoltzmannEquations2D) = sum(u)
 
-
 """
     velocity(u, orientation, equations::LatticeBoltzmannEquations2D)
 
 Calculate the macroscopic velocity for the given `orientation` (1 -> x, 2 -> y) from the
 particle distribution functions `u`.
 """
-@inline function velocity(u, orientation::Integer, equations::LatticeBoltzmannEquations2D)
-  if orientation == 1
-    v_alpha = equations.v_alpha1
-  else
-    v_alpha = equations.v_alpha2
-  end
-
-  return dot(v_alpha, u)/density(u, equations)
+@inline function velocity(u, orientation::Integer,
+                          equations::LatticeBoltzmannEquations2D)
+    if orientation == 1
+        v_alpha = equations.v_alpha1
+    else
+        v_alpha = equations.v_alpha2
+    end
+
+    return dot(v_alpha, u) / density(u, equations)
 end
 
-
 """
     velocity(u, equations::LatticeBoltzmannEquations2D)
 
 Calculate the macroscopic velocity vector from the particle distribution functions `u`.
 """
 @inline function velocity(u, equations::LatticeBoltzmannEquations2D)
-  @unpack v_alpha1, v_alpha2 = equations
-  rho = density(u, equations)
+    @unpack v_alpha1, v_alpha2 = equations
+    rho = density(u, equations)
 
-  return SVector(dot(v_alpha1, u)/rho,
-                 dot(v_alpha2, u)/rho)
+    return SVector(dot(v_alpha1, u) / rho,
+                   dot(v_alpha2, u) / rho)
 end
 
-
 """
     pressure(rho::Real, equations::LatticeBoltzmannEquations2D)
     pressure(u, equations::LatticeBoltzmannEquations2D)
@@ -303,9 +302,12 @@ end
 Calculate the macroscopic pressure from the density `rho` or the  particle distribution functions
 `u`.
 """
-@inline pressure(rho::Real, equations::LatticeBoltzmannEquations2D) = rho * equations.c_s^2
-@inline pressure(u, equations::LatticeBoltzmannEquations2D) = pressure(density(u, equations), equations)
-
+@inline function pressure(rho::Real, equations::LatticeBoltzmannEquations2D)
+    rho * equations.c_s^2
+end
+@inline function pressure(u, equations::LatticeBoltzmannEquations2D)
+    pressure(density(u, equations), equations)
+end
 
 """
     equilibrium_distribution(alpha, rho, v1, v2, equations::LatticeBoltzmannEquations2D)
@@ -313,67 +315,63 @@ Calculate the macroscopic pressure from the density `rho` or the  particle distr
 Calculate the local equilibrium distribution for the distribution function with index `alpha` and
 given the macroscopic state defined by `rho`, `v1`, `v2`.
 """
-@inline function equilibrium_distribution(alpha, rho, v1, v2, equations::LatticeBoltzmannEquations2D)
-  @unpack weights, c_s, v_alpha1, v_alpha2 = equations
-
-  va_v = v_alpha1[alpha]*v1 + v_alpha2[alpha]*v2
-  cs_squared = c_s^2
-  v_squared = v1^2 + v2^2
-
-  return weights[alpha] * rho * (1 + va_v/cs_squared
-                                   + va_v^2/(2*cs_squared^2)
-                                   - v_squared/(2*cs_squared))
+@inline function equilibrium_distribution(alpha, rho, v1, v2,
+                                          equations::LatticeBoltzmannEquations2D)
+    @unpack weights, c_s, v_alpha1, v_alpha2 = equations
+
+    va_v = v_alpha1[alpha] * v1 + v_alpha2[alpha] * v2
+    cs_squared = c_s^2
+    v_squared = v1^2 + v2^2
+
+    return weights[alpha] * rho *
+           (1 + va_v / cs_squared
+            + va_v^2 / (2 * cs_squared^2)
+            -
+            v_squared / (2 * cs_squared))
 end
 
-
-@inline function equilibrium_distribution(rho, v1, v2, equations::LatticeBoltzmannEquations2D)
-  return SVector(equilibrium_distribution(1, rho, v1, v2, equations),
-                 equilibrium_distribution(2, rho, v1, v2, equations),
-                 equilibrium_distribution(3, rho, v1, v2, equations),
-                 equilibrium_distribution(4, rho, v1, v2, equations),
-                 equilibrium_distribution(5, rho, v1, v2, equations),
-                 equilibrium_distribution(6, rho, v1, v2, equations),
-                 equilibrium_distribution(7, rho, v1, v2, equations),
-                 equilibrium_distribution(8, rho, v1, v2, equations),
-                 equilibrium_distribution(9, rho, v1, v2, equations))
+@inline function equilibrium_distribution(rho, v1, v2,
+                                          equations::LatticeBoltzmannEquations2D)
+    return SVector(equilibrium_distribution(1, rho, v1, v2, equations),
+                   equilibrium_distribution(2, rho, v1, v2, equations),
+                   equilibrium_distribution(3, rho, v1, v2, equations),
+                   equilibrium_distribution(4, rho, v1, v2, equations),
+                   equilibrium_distribution(5, rho, v1, v2, equations),
+                   equilibrium_distribution(6, rho, v1, v2, equations),
+                   equilibrium_distribution(7, rho, v1, v2, equations),
+                   equilibrium_distribution(8, rho, v1, v2, equations),
+                   equilibrium_distribution(9, rho, v1, v2, equations))
 end
 
-
 function equilibrium_distribution(u, equations::LatticeBoltzmannEquations2D)
-  rho = density(u, equations)
-  v1, v2 = velocity(u, equations)
+    rho = density(u, equations)
+    v1, v2 = velocity(u, equations)
 
-  return equilibrium_distribution(rho, v1, v2, equations)
+    return equilibrium_distribution(rho, v1, v2, equations)
 end
 
-
 """
     collision_bgk(u, dt, equations::LatticeBoltzmannEquations2D)
 
 Collision operator for the Bhatnagar, Gross, and Krook (BGK) model.
 """
 @inline function collision_bgk(u, dt, equations::LatticeBoltzmannEquations2D)
-  @unpack c_s, nu = equations
-  tau = nu / (c_s^2 * dt)
-  return -(u - equilibrium_distribution(u, equations))/(tau + 1/2)
+    @unpack c_s, nu = equations
+    tau = nu / (c_s^2 * dt)
+    return -(u - equilibrium_distribution(u, equations)) / (tau + 1 / 2)
 end
 
-
-
 @inline have_constant_speed(::LatticeBoltzmannEquations2D) = True()
 
 @inline function max_abs_speeds(equations::LatticeBoltzmannEquations2D)
-  @unpack c = equations
+    @unpack c = equations
 
-  return c, c
+    return c, c
 end
 
-
 # Convert conservative variables to primitive
 @inline cons2prim(u, equations::LatticeBoltzmannEquations2D) = u
 
 # Convert conservative variables to entropy variables
 @inline cons2entropy(u, equations::LatticeBoltzmannEquations2D) = u
-
-
 end # @muladd
diff --git a/src/equations/lattice_boltzmann_3d.jl b/src/equations/lattice_boltzmann_3d.jl
index 2e51af2245b..d3eada15f56 100644
--- a/src/equations/lattice_boltzmann_3d.jl
+++ b/src/equations/lattice_boltzmann_3d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     LatticeBoltzmannEquations3D(; Ma, Re, collision_op=collision_bgk,
@@ -100,96 +100,103 @@ The main sources for the base implementation were
 4. Dieter Krüger et al., **The Lattice Boltzmann Method**, Springer International Publishing, 2017
    [doi:10.1007/978-3-319-44649-3](https://doi.org/10.1007/978-3-319-44649-3)
 """
-struct LatticeBoltzmannEquations3D{RealT<:Real, CollisionOp} <: AbstractLatticeBoltzmannEquations{3, 27}
-  c::RealT    # mean thermal molecular velocity
-  c_s::RealT  # isothermal speed of sound
-  rho0::RealT # macroscopic reference density
+struct LatticeBoltzmannEquations3D{RealT <: Real, CollisionOp} <:
+       AbstractLatticeBoltzmannEquations{3, 27}
+    c::RealT    # mean thermal molecular velocity
+    c_s::RealT  # isothermal speed of sound
+    rho0::RealT # macroscopic reference density
 
-  Ma::RealT   # characteristic Mach number
-  u0::RealT   # macroscopic reference velocity
+    Ma::RealT   # characteristic Mach number
+    u0::RealT   # macroscopic reference velocity
 
-  Re::RealT   # characteristic Reynolds number
-  L::RealT    # reference length
-  nu::RealT   # kinematic viscosity
+    Re::RealT   # characteristic Reynolds number
+    L::RealT    # reference length
+    nu::RealT   # kinematic viscosity
 
-  weights::SVector{27, RealT}  # weighting factors for the equilibrium distribution
-  v_alpha1::SVector{27, RealT} # discrete molecular velocity components in x-direction
-  v_alpha2::SVector{27, RealT} # discrete molecular velocity components in y-direction
-  v_alpha3::SVector{27, RealT} # discrete molecular velocity components in z-direction
+    weights::SVector{27, RealT}  # weighting factors for the equilibrium distribution
+    v_alpha1::SVector{27, RealT} # discrete molecular velocity components in x-direction
+    v_alpha2::SVector{27, RealT} # discrete molecular velocity components in y-direction
+    v_alpha3::SVector{27, RealT} # discrete molecular velocity components in z-direction
 
-  collision_op::CollisionOp   # collision operator for the collision kernel
+    collision_op::CollisionOp   # collision operator for the collision kernel
 end
 
-function LatticeBoltzmannEquations3D(; Ma, Re, collision_op=collision_bgk,
-                                     c=1, L=1, rho0=1, u0=nothing, nu=nothing)
-  # Sanity check that exactly one of Ma, u0 is not `nothing`
-  if isnothing(Ma) && isnothing(u0)
-    error("Mach number `Ma` and reference speed `u0` may not both be `nothing`")
-  elseif !isnothing(Ma) && !isnothing(u0)
-    error("Mach number `Ma` and reference speed `u0` may not both be set")
-  end
-
-  # Sanity check that exactly one of Re, nu is not `nothing`
-  if isnothing(Re) && isnothing(nu)
-    error("Reynolds number `Re` and visocsity `nu` may not both be `nothing`")
-  elseif !isnothing(Re) && !isnothing(nu)
-    error("Reynolds number `Re` and visocsity `nu` may not both be set")
-  end
-
-  # Calculate isothermal speed of sound
-  # The relation between the isothermal speed of sound `c_s` and the mean thermal molecular velocity
-  # `c` depends on the used phase space discretization, and is valid for D3Q27 (and others). For
-  # details, see, e.g., [3] in the docstring above.
-  c_s = c / sqrt(3)
-
-  # Calculate missing quantities
-  if isnothing(Ma)
-    Ma = u0 / c_s
-  elseif isnothing(u0)
-    u0 = Ma * c_s
-  end
-  if isnothing(Re)
-    Re = u0 * L / nu
-  elseif isnothing(nu)
-    nu = u0 * L / Re
-  end
-
-  # Promote to common data type
-  Ma, Re, c, L, rho0, u0, nu = promote(Ma, Re, c, L, rho0, u0, nu)
-
-  # Source for weights and speeds: [4] in docstring above
-  weights  = SVector(2/27,  2/27,  2/27,  2/27,  2/27,  2/27,  1/54,  1/54,  1/54,
-                     1/54,  1/54,  1/54,  1/54,  1/54,  1/54,  1/54,  1/54,  1/54,
-                     1/216, 1/216, 1/216, 1/216, 1/216, 1/216, 1/216, 1/216, 8/27)
-  v_alpha1 = SVector( c, -c,  0,  0,  0,  0,  c, -c,  c,
-                     -c,  0,  0,  c, -c,  c, -c,  0,  0,
-                      c, -c,  c, -c,  c, -c, -c,  c,  0)
-  v_alpha2 = SVector( 0,  0,  c, -c,  0,  0,  c, -c,  0,
-                      0,  c, -c, -c,  c,  0,  0,  c, -c,
-                      c, -c,  c, -c, -c,  c,  c, -c,  0)
-  v_alpha3 = SVector( 0,  0,  0,  0,  c, -c,  0,  0,  c,
-                     -c,  c, -c,  0,  0, -c,  c, -c,  c,
-                      c, -c, -c,  c,  c, -c,  c, -c,  0)
-
-  LatticeBoltzmannEquations3D(c, c_s, rho0, Ma, u0, Re, L, nu,
-                             weights, v_alpha1, v_alpha2, v_alpha3,
-                             collision_op)
+function LatticeBoltzmannEquations3D(; Ma, Re, collision_op = collision_bgk,
+                                     c = 1, L = 1, rho0 = 1, u0 = nothing, nu = nothing)
+    # Sanity check that exactly one of Ma, u0 is not `nothing`
+    if isnothing(Ma) && isnothing(u0)
+        error("Mach number `Ma` and reference speed `u0` may not both be `nothing`")
+    elseif !isnothing(Ma) && !isnothing(u0)
+        error("Mach number `Ma` and reference speed `u0` may not both be set")
+    end
+
+    # Sanity check that exactly one of Re, nu is not `nothing`
+    if isnothing(Re) && isnothing(nu)
+        error("Reynolds number `Re` and visocsity `nu` may not both be `nothing`")
+    elseif !isnothing(Re) && !isnothing(nu)
+        error("Reynolds number `Re` and visocsity `nu` may not both be set")
+    end
+
+    # Calculate isothermal speed of sound
+    # The relation between the isothermal speed of sound `c_s` and the mean thermal molecular velocity
+    # `c` depends on the used phase space discretization, and is valid for D3Q27 (and others). For
+    # details, see, e.g., [3] in the docstring above.
+    c_s = c / sqrt(3)
+
+    # Calculate missing quantities
+    if isnothing(Ma)
+        Ma = u0 / c_s
+    elseif isnothing(u0)
+        u0 = Ma * c_s
+    end
+    if isnothing(Re)
+        Re = u0 * L / nu
+    elseif isnothing(nu)
+        nu = u0 * L / Re
+    end
+
+    # Promote to common data type
+    Ma, Re, c, L, rho0, u0, nu = promote(Ma, Re, c, L, rho0, u0, nu)
+
+    # Source for weights and speeds: [4] in docstring above
+    weights = SVector(2 / 27, 2 / 27, 2 / 27, 2 / 27, 2 / 27, 2 / 27, 1 / 54, 1 / 54,
+                      1 / 54,
+                      1 / 54, 1 / 54, 1 / 54, 1 / 54, 1 / 54, 1 / 54, 1 / 54, 1 / 54,
+                      1 / 54,
+                      1 / 216, 1 / 216, 1 / 216, 1 / 216, 1 / 216, 1 / 216, 1 / 216,
+                      1 / 216, 8 / 27)
+    v_alpha1 = SVector(c, -c, 0, 0, 0, 0, c, -c, c,
+                       -c, 0, 0, c, -c, c, -c, 0, 0,
+                       c, -c, c, -c, c, -c, -c, c, 0)
+    v_alpha2 = SVector(0, 0, c, -c, 0, 0, c, -c, 0,
+                       0, c, -c, -c, c, 0, 0, c, -c,
+                       c, -c, c, -c, -c, c, c, -c, 0)
+    v_alpha3 = SVector(0, 0, 0, 0, c, -c, 0, 0, c,
+                       -c, c, -c, 0, 0, -c, c, -c, c,
+                       c, -c, -c, c, c, -c, c, -c, 0)
+
+    LatticeBoltzmannEquations3D(c, c_s, rho0, Ma, u0, Re, L, nu,
+                                weights, v_alpha1, v_alpha2, v_alpha3,
+                                collision_op)
 end
 
-
-varnames(::typeof(cons2cons), equations::LatticeBoltzmannEquations3D) = ntuple(v -> "pdf"*string(v), Val(nvariables(equations)))
-varnames(::typeof(cons2prim), equations::LatticeBoltzmannEquations3D) = varnames(cons2cons, equations)
-
+function varnames(::typeof(cons2cons), equations::LatticeBoltzmannEquations3D)
+    ntuple(v -> "pdf" * string(v), Val(nvariables(equations)))
+end
+function varnames(::typeof(cons2prim), equations::LatticeBoltzmannEquations3D)
+    varnames(cons2cons, equations)
+end
 
 # Convert conservative variables to macroscopic
 @inline function cons2macroscopic(u, equations::LatticeBoltzmannEquations3D)
-  rho        = density(u, equations)
-  v1, v2, v3 = velocity(u, equations)
-  p          = pressure(u, equations)
-  return SVector(rho, v1, v2, v3, p)
+    rho = density(u, equations)
+    v1, v2, v3 = velocity(u, equations)
+    p = pressure(u, equations)
+    return SVector(rho, v1, v2, v3, p)
+end
+function varnames(::typeof(cons2macroscopic), ::LatticeBoltzmannEquations3D)
+    ("rho", "v1", "v2", "v3", "p")
 end
-varnames(::typeof(cons2macroscopic), ::LatticeBoltzmannEquations3D) = ("rho", "v1", "v2", "v3", "p")
-
 
 # Set initial conditions at physical location `x` for time `t`
 """
@@ -198,50 +205,47 @@ varnames(::typeof(cons2macroscopic), ::LatticeBoltzmannEquations3D) = ("rho", "v
 A constant initial condition to test free-stream preservation.
 """
 function initial_condition_constant(x, t, equations::LatticeBoltzmannEquations3D)
-  @unpack u0 = equations
-  rho = pi
-  v1 = u0
-  v2 = u0
-  v3 = u0
+    @unpack u0 = equations
+    rho = pi
+    v1 = u0
+    v2 = u0
+    v3 = u0
 
-  return equilibrium_distribution(rho, v1, v2, v3, equations)
+    return equilibrium_distribution(rho, v1, v2, v3, equations)
 end
 
-
 # Pre-defined source terms should be implemented as
 # function source_terms_WHATEVER(u, x, t, equations::LatticeBoltzmannEquations3D)
 
-
 # Calculate 1D flux in for a single point
 @inline function flux(u, orientation::Integer, equations::LatticeBoltzmannEquations3D)
-  if orientation == 1 # x-direction
-    v_alpha = equations.v_alpha1
-  elseif orientation == 2 # y-direction
-    v_alpha = equations.v_alpha2
-  else # z-direction
-    v_alpha = equations.v_alpha3
-  end
-  return v_alpha .* u
+    if orientation == 1 # x-direction
+        v_alpha = equations.v_alpha1
+    elseif orientation == 2 # y-direction
+        v_alpha = equations.v_alpha2
+    else # z-direction
+        v_alpha = equations.v_alpha3
+    end
+    return v_alpha .* u
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
 # @inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::LatticeBoltzmannEquations3D)
 #   λ_max =
 # end
 
-@inline function flux_godunov(u_ll, u_rr, orientation::Integer, equations::LatticeBoltzmannEquations3D)
-  if orientation == 1 # x-direction
-    v_alpha = equations.v_alpha1
-  elseif orientation == 2 # y-direction
-    v_alpha = equations.v_alpha2
-  else # z-direction
-    v_alpha = equations.v_alpha3
-  end
-  return 0.5 * ( v_alpha .* (u_ll + u_rr) - abs.(v_alpha) .* (u_rr - u_ll) )
+@inline function flux_godunov(u_ll, u_rr, orientation::Integer,
+                              equations::LatticeBoltzmannEquations3D)
+    if orientation == 1 # x-direction
+        v_alpha = equations.v_alpha1
+    elseif orientation == 2 # y-direction
+        v_alpha = equations.v_alpha2
+    else # z-direction
+        v_alpha = equations.v_alpha3
+    end
+    return 0.5 * (v_alpha .* (u_ll + u_rr) - abs.(v_alpha) .* (u_rr - u_ll))
 end
 
-
 """
     density(p::Real, equations::LatticeBoltzmannEquations3D)
     density(u, equations::LatticeBoltzmannEquations3D)
@@ -251,41 +255,39 @@ Calculate the macroscopic density from the pressure `p` or the particle distribu
 @inline density(p::Real, equations::LatticeBoltzmannEquations3D) = p / equations.c_s^2
 @inline density(u, equations::LatticeBoltzmannEquations3D) = sum(u)
 
-
 """
     velocity(u, orientation, equations::LatticeBoltzmannEquations3D)
 
 Calculate the macroscopic velocity for the given `orientation` (1 -> x, 2 -> y, 3 -> z) from the
 particle distribution functions `u`.
 """
-@inline function velocity(u, orientation::Integer, equations::LatticeBoltzmannEquations3D)
-  if orientation == 1 # x-direction
-    v_alpha = equations.v_alpha1
-  elseif orientation == 2 # y-direction
-    v_alpha = equations.v_alpha2
-  else # z-direction
-    v_alpha = equations.v_alpha3
-  end
-
-  return dot(v_alpha, u) / density(u, equations)
+@inline function velocity(u, orientation::Integer,
+                          equations::LatticeBoltzmannEquations3D)
+    if orientation == 1 # x-direction
+        v_alpha = equations.v_alpha1
+    elseif orientation == 2 # y-direction
+        v_alpha = equations.v_alpha2
+    else # z-direction
+        v_alpha = equations.v_alpha3
+    end
+
+    return dot(v_alpha, u) / density(u, equations)
 end
 
-
 """
     velocity(u, equations::LatticeBoltzmannEquations3D)
 
 Calculate the macroscopic velocity vector from the particle distribution functions `u`.
 """
 @inline function velocity(u, equations::LatticeBoltzmannEquations3D)
-  @unpack v_alpha1, v_alpha2, v_alpha3 = equations
-  rho = density(u, equations)
+    @unpack v_alpha1, v_alpha2, v_alpha3 = equations
+    rho = density(u, equations)
 
-  return SVector(dot(v_alpha1, u)/rho,
-                 dot(v_alpha2, u)/rho,
-                 dot(v_alpha3, u)/rho)
+    return SVector(dot(v_alpha1, u) / rho,
+                   dot(v_alpha2, u) / rho,
+                   dot(v_alpha3, u) / rho)
 end
 
-
 """
     pressure(rho::Real, equations::LatticeBoltzmannEquations3D)
     pressure(u, equations::LatticeBoltzmannEquations3D)
@@ -293,9 +295,12 @@ end
 Calculate the macroscopic pressure from the density `rho` or the  particle distribution functions
 `u`.
 """
-@inline pressure(rho::Real, equations::LatticeBoltzmannEquations3D) = rho * equations.c_s^2
-@inline pressure(u, equations::LatticeBoltzmannEquations3D) = pressure(density(u, equations), equations)
-
+@inline function pressure(rho::Real, equations::LatticeBoltzmannEquations3D)
+    rho * equations.c_s^2
+end
+@inline function pressure(u, equations::LatticeBoltzmannEquations3D)
+    pressure(density(u, equations), equations)
+end
 
 """
     equilibrium_distribution(alpha, rho, v1, v2, v3, equations::LatticeBoltzmannEquations3D)
@@ -303,99 +308,95 @@ Calculate the macroscopic pressure from the density `rho` or the  particle distr
 Calculate the local equilibrium distribution for the distribution function with index `alpha` and
 given the macroscopic state defined by `rho`, `v1`, `v2`, `v3`.
 """
-@inline function equilibrium_distribution(alpha, rho, v1, v2, v3, equations::LatticeBoltzmannEquations3D)
-  @unpack weights, c_s, v_alpha1, v_alpha2, v_alpha3 = equations
-
-  va_v = v_alpha1[alpha]*v1 + v_alpha2[alpha]*v2 + v_alpha3[alpha]*v3
-  cs_squared = c_s^2
-  v_squared = v1^2 + v2^2 + v3^2
-
-  return weights[alpha] * rho * (1 + va_v/cs_squared
-                                   + va_v^2/(2*cs_squared^2)
-                                   - v_squared/(2*cs_squared))
+@inline function equilibrium_distribution(alpha, rho, v1, v2, v3,
+                                          equations::LatticeBoltzmannEquations3D)
+    @unpack weights, c_s, v_alpha1, v_alpha2, v_alpha3 = equations
+
+    va_v = v_alpha1[alpha] * v1 + v_alpha2[alpha] * v2 + v_alpha3[alpha] * v3
+    cs_squared = c_s^2
+    v_squared = v1^2 + v2^2 + v3^2
+
+    return weights[alpha] * rho *
+           (1 + va_v / cs_squared
+            + va_v^2 / (2 * cs_squared^2)
+            -
+            v_squared / (2 * cs_squared))
 end
 
-
-@inline function equilibrium_distribution(rho, v1, v2, v3, equations::LatticeBoltzmannEquations3D)
-  return SVector(equilibrium_distribution( 1, rho, v1, v2, v3, equations),
-                 equilibrium_distribution( 2, rho, v1, v2, v3, equations),
-                 equilibrium_distribution( 3, rho, v1, v2, v3, equations),
-                 equilibrium_distribution( 4, rho, v1, v2, v3, equations),
-                 equilibrium_distribution( 5, rho, v1, v2, v3, equations),
-                 equilibrium_distribution( 6, rho, v1, v2, v3, equations),
-                 equilibrium_distribution( 7, rho, v1, v2, v3, equations),
-                 equilibrium_distribution( 8, rho, v1, v2, v3, equations),
-                 equilibrium_distribution( 9, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(10, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(11, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(12, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(13, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(14, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(15, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(16, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(17, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(18, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(19, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(20, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(21, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(22, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(23, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(24, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(25, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(26, rho, v1, v2, v3, equations),
-                 equilibrium_distribution(27, rho, v1, v2, v3, equations))
+@inline function equilibrium_distribution(rho, v1, v2, v3,
+                                          equations::LatticeBoltzmannEquations3D)
+    return SVector(equilibrium_distribution(1, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(2, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(3, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(4, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(5, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(6, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(7, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(8, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(9, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(10, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(11, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(12, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(13, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(14, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(15, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(16, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(17, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(18, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(19, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(20, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(21, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(22, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(23, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(24, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(25, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(26, rho, v1, v2, v3, equations),
+                   equilibrium_distribution(27, rho, v1, v2, v3, equations))
 end
 
-
 function equilibrium_distribution(u, equations::LatticeBoltzmannEquations3D)
-  rho = density(u, equations)
-  v1, v2, v3 = velocity(u, equations)
+    rho = density(u, equations)
+    v1, v2, v3 = velocity(u, equations)
 
-  return equilibrium_distribution(rho, v1, v2, v3, equations)
+    return equilibrium_distribution(rho, v1, v2, v3, equations)
 end
 
-
 """
     collision_bgk(u, dt, equations::LatticeBoltzmannEquations3D)
 
 Collision operator for the Bhatnagar, Gross, and Krook (BGK) model.
 """
 @inline function collision_bgk(u, dt, equations::LatticeBoltzmannEquations3D)
-  @unpack c_s, nu = equations
-  tau = nu / (c_s^2 * dt)
-  return -(u - equilibrium_distribution(u, equations))/(tau + 1/2)
+    @unpack c_s, nu = equations
+    tau = nu / (c_s^2 * dt)
+    return -(u - equilibrium_distribution(u, equations)) / (tau + 1 / 2)
 end
 
-
-
 @inline have_constant_speed(::LatticeBoltzmannEquations3D) = True()
 
 @inline function max_abs_speeds(equations::LatticeBoltzmannEquations3D)
-  @unpack c = equations
+    @unpack c = equations
 
-  return c, c, c
+    return c, c, c
 end
 
-
 # Convert conservative variables to primitive
 @inline cons2prim(u, equations::LatticeBoltzmannEquations3D) = u
 
 # Convert conservative variables to entropy variables
 @inline cons2entropy(u, equations::LatticeBoltzmannEquations3D) = u
 
-
 # Calculate kinetic energy for a conservative state `u`
 @inline function energy_kinetic(u, equations::LatticeBoltzmannEquations3D)
-  rho = density(u, equations)
-  v1, v2, v3 = velocity(u, equations)
+    rho = density(u, equations)
+    v1, v2, v3 = velocity(u, equations)
 
-  return 0.5 * (v1^2 + v2^2 + v3^2) / rho / equations.rho0
+    return 0.5 * (v1^2 + v2^2 + v3^2) / rho / equations.rho0
 end
 
 # Calculate nondimensionalized kinetic energy for a conservative state `u`
-@inline function energy_kinetic_nondimensional(u, equations::LatticeBoltzmannEquations3D)
-  return energy_kinetic(u, equations) / equations.u0^2
+@inline function energy_kinetic_nondimensional(u,
+                                               equations::LatticeBoltzmannEquations3D)
+    return energy_kinetic(u, equations) / equations.u0^2
 end
-
-
 end # @muladd
diff --git a/src/equations/linear_scalar_advection_1d.jl b/src/equations/linear_scalar_advection_1d.jl
index a70c3b72b9b..7769cb61fbf 100644
--- a/src/equations/linear_scalar_advection_1d.jl
+++ b/src/equations/linear_scalar_advection_1d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     LinearScalarAdvectionEquation1D
@@ -14,18 +14,17 @@ The linear scalar advection equation
 ```
 in one space dimension with constant velocity `a`.
 """
-struct LinearScalarAdvectionEquation1D{RealT<:Real} <: AbstractLinearScalarAdvectionEquation{1, 1}
-  advection_velocity::SVector{1, RealT}
+struct LinearScalarAdvectionEquation1D{RealT <: Real} <:
+       AbstractLinearScalarAdvectionEquation{1, 1}
+    advection_velocity::SVector{1, RealT}
 end
 
 function LinearScalarAdvectionEquation1D(a::Real)
-  LinearScalarAdvectionEquation1D(SVector(a))
+    LinearScalarAdvectionEquation1D(SVector(a))
 end
 
-
-varnames(::typeof(cons2cons), ::LinearScalarAdvectionEquation1D) = ("scalar", )
-varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation1D) = ("scalar", )
-
+varnames(::typeof(cons2cons), ::LinearScalarAdvectionEquation1D) = ("scalar",)
+varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation1D) = ("scalar",)
 
 # Set initial conditions at physical location `x` for time `t`
 """
@@ -34,13 +33,12 @@ varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation1D) = ("scalar", )
 A constant initial condition to test free-stream preservation.
 """
 function initial_condition_constant(x, t, equation::LinearScalarAdvectionEquation1D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  return SVector(2.0)
+    return SVector(2.0)
 end
 
-
 """
     initial_condition_convergence_test(x, t, equations::LinearScalarAdvectionEquation1D)
 
@@ -48,20 +46,20 @@ A smooth initial condition used for convergence tests
 (in combination with [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref)
 in non-periodic domains).
 """
-function initial_condition_convergence_test(x, t, equation::LinearScalarAdvectionEquation1D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
-
-  c = 1.0
-  A = 0.5
-  L = 2
-  f = 1/L
-  omega = 2 * pi * f
-  scalar = c + A * sin(omega * sum(x_trans))
-  return SVector(scalar)
+function initial_condition_convergence_test(x, t,
+                                            equation::LinearScalarAdvectionEquation1D)
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
+
+    c = 1.0
+    A = 0.5
+    L = 2
+    f = 1 / L
+    omega = 2 * pi * f
+    scalar = c + A * sin(omega * sum(x_trans))
+    return SVector(scalar)
 end
 
-
 """
     initial_condition_gauss(x, t, equations::LinearScalarAdvectionEquation1D)
 
@@ -69,28 +67,26 @@ A Gaussian pulse used together with
 [`BoundaryConditionDirichlet(initial_condition_gauss)`](@ref).
 """
 function initial_condition_gauss(x, t, equation::LinearScalarAdvectionEquation1D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  scalar = exp(-(x_trans[1]^2))
-  return SVector(scalar)
+    scalar = exp(-(x_trans[1]^2))
+    return SVector(scalar)
 end
 
-
 """
     initial_condition_sin(x, t, equations::LinearScalarAdvectionEquation1D)
 
 A sine wave in the conserved variable.
 """
 function initial_condition_sin(x, t, equation::LinearScalarAdvectionEquation1D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  scalar = sinpi(2 * x_trans[1])
-  return SVector(scalar)
+    scalar = sinpi(2 * x_trans[1])
+    return SVector(scalar)
 end
 
-
 """
     initial_condition_linear_x(x, t, equations::LinearScalarAdvectionEquation1D)
 
@@ -98,10 +94,10 @@ A linear function of `x[1]` used together with
 [`boundary_condition_linear_x`](@ref).
 """
 function initial_condition_linear_x(x, t, equation::LinearScalarAdvectionEquation1D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  return SVector(x_trans[1])
+    return SVector(x_trans[1])
 end
 
 """
@@ -115,84 +111,80 @@ Boundary conditions for
 function boundary_condition_linear_x(u_inner, orientation, direction, x, t,
                                      surface_flux_function,
                                      equation::LinearScalarAdvectionEquation1D)
-  u_boundary = initial_condition_linear_x(x, t, equation)
+    u_boundary = initial_condition_linear_x(x, t, equation)
 
-  # Calculate boundary flux
-  if direction == 2  # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    flux = surface_flux_function(u_inner, u_boundary, orientation, equation)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    flux = surface_flux_function(u_boundary, u_inner, orientation, equation)
-  end
+    # Calculate boundary flux
+    if direction == 2  # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation, equation)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation, equation)
+    end
 
-  return flux
+    return flux
 end
 
-
 # Pre-defined source terms should be implemented as
 # function source_terms_WHATEVER(u, x, t, equations::LinearScalarAdvectionEquation1D)
 
-
 # Calculate 1D flux in for a single point
-@inline function flux(u, orientation::Integer, equation::LinearScalarAdvectionEquation1D)
-  a = equation.advection_velocity[orientation]
-  return a * u
+@inline function flux(u, orientation::Integer,
+                      equation::LinearScalarAdvectionEquation1D)
+    a = equation.advection_velocity[orientation]
+    return a * u
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Int, equation::LinearScalarAdvectionEquation1D)
-  λ_max = abs(equation.advection_velocity[orientation])
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Int,
+                                     equation::LinearScalarAdvectionEquation1D)
+    λ_max = abs(equation.advection_velocity[orientation])
 end
 
-
 # Essentially first order upwind, see e.g.
 # https://math.stackexchange.com/a/4355076/805029
-function flux_godunov(u_ll, u_rr, orientation::Int, equation::LinearScalarAdvectionEquation1D)
-  u_L = u_ll[1]
-  u_R = u_rr[1]
-
-  v_normal = equation.advection_velocity[orientation]
-  if v_normal >= 0
-    return SVector(v_normal * u_L)
-  else
-    return SVector(v_normal * u_R)
-  end
+function flux_godunov(u_ll, u_rr, orientation::Int,
+                      equation::LinearScalarAdvectionEquation1D)
+    u_L = u_ll[1]
+    u_R = u_rr[1]
+
+    v_normal = equation.advection_velocity[orientation]
+    if v_normal >= 0
+        return SVector(v_normal * u_L)
+    else
+        return SVector(v_normal * u_R)
+    end
 end
 
-
 # See https://metaphor.ethz.ch/x/2019/hs/401-4671-00L/literature/mishra_hyperbolic_pdes.pdf ,
 # section 4.2.5 and especially equation (4.33).
-function flux_engquist_osher(u_ll, u_rr, orientation::Int, equation::LinearScalarAdvectionEquation1D)
-  u_L = u_ll[1]
-  u_R = u_rr[1]
-
-  return SVector(0.5 * (flux(u_L, orientation, equation) + flux(u_R, orientation, equation) -
-                        abs(equation.advection_velocity[orientation]) * (u_R - u_L)))
+function flux_engquist_osher(u_ll, u_rr, orientation::Int,
+                             equation::LinearScalarAdvectionEquation1D)
+    u_L = u_ll[1]
+    u_R = u_rr[1]
+
+    return SVector(0.5 * (flux(u_L, orientation, equation) +
+                    flux(u_R, orientation, equation) -
+                    abs(equation.advection_velocity[orientation]) * (u_R - u_L)))
 end
 
-
 @inline have_constant_speed(::LinearScalarAdvectionEquation1D) = True()
 
 @inline function max_abs_speeds(equation::LinearScalarAdvectionEquation1D)
-  return abs.(equation.advection_velocity)
+    return abs.(equation.advection_velocity)
 end
 
-
 # Convert conservative variables to primitive
 @inline cons2prim(u, equation::LinearScalarAdvectionEquation1D) = u
 
 # Convert conservative variables to entropy variables
 @inline cons2entropy(u, equation::LinearScalarAdvectionEquation1D) = u
 
-
 # Calculate entropy for a conservative state `cons`
 @inline entropy(u::Real, ::LinearScalarAdvectionEquation1D) = 0.5 * u^2
 @inline entropy(u, equation::LinearScalarAdvectionEquation1D) = entropy(u[1], equation)
 
-
 # Calculate total energy for a conservative state `cons`
 @inline energy_total(u::Real, ::LinearScalarAdvectionEquation1D) = 0.5 * u^2
-@inline energy_total(u, equation::LinearScalarAdvectionEquation1D) = energy_total(u[1], equation)
-
-
+@inline function energy_total(u, equation::LinearScalarAdvectionEquation1D)
+    energy_total(u[1], equation)
+end
 end # @muladd
diff --git a/src/equations/linear_scalar_advection_2d.jl b/src/equations/linear_scalar_advection_2d.jl
index 9fe7d5f9b90..d90bf0c8793 100644
--- a/src/equations/linear_scalar_advection_2d.jl
+++ b/src/equations/linear_scalar_advection_2d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     LinearScalarAdvectionEquation2D
@@ -14,28 +14,29 @@ The linear scalar advection equation
 ```
 in two space dimensions with constant velocity `a`.
 """
-struct LinearScalarAdvectionEquation2D{RealT<:Real} <: AbstractLinearScalarAdvectionEquation{2, 1}
-  advection_velocity::SVector{2, RealT}
+struct LinearScalarAdvectionEquation2D{RealT <: Real} <:
+       AbstractLinearScalarAdvectionEquation{2, 1}
+    advection_velocity::SVector{2, RealT}
 end
 
-function LinearScalarAdvectionEquation2D(a::NTuple{2,<:Real})
-  LinearScalarAdvectionEquation2D(SVector(a))
+function LinearScalarAdvectionEquation2D(a::NTuple{2, <:Real})
+    LinearScalarAdvectionEquation2D(SVector(a))
 end
 
 function LinearScalarAdvectionEquation2D(a1::Real, a2::Real)
-  LinearScalarAdvectionEquation2D(SVector(a1, a2))
+    LinearScalarAdvectionEquation2D(SVector(a1, a2))
 end
 
-
-varnames(::typeof(cons2cons), ::LinearScalarAdvectionEquation2D) = ("scalar", )
-varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation2D) = ("scalar", )
+varnames(::typeof(cons2cons), ::LinearScalarAdvectionEquation2D) = ("scalar",)
+varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation2D) = ("scalar",)
 
 # Calculates translated coordinates `x` for a periodic domain
 function x_trans_periodic_2d(x, domain_length = SVector(10, 10), center = SVector(0, 0))
-  x_normalized = x .- center
-  x_shifted = x_normalized .% domain_length
-  x_offset = ((x_shifted .< -0.5*domain_length) - (x_shifted .> 0.5*domain_length)) .* domain_length
-  return center + x_shifted + x_offset
+    x_normalized = x .- center
+    x_shifted = x_normalized .% domain_length
+    x_offset = ((x_shifted .< -0.5 * domain_length) -
+                (x_shifted .> 0.5 * domain_length)) .* domain_length
+    return center + x_shifted + x_offset
 end
 
 # Set initial conditions at physical location `x` for time `t`
@@ -45,32 +46,31 @@ end
 A constant initial condition to test free-stream preservation.
 """
 function initial_condition_constant(x, t, equation::LinearScalarAdvectionEquation2D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x_trans_periodic_2d(x - equation.advection_velocity * t)
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x_trans_periodic_2d(x - equation.advection_velocity * t)
 
-  return SVector(2.0)
+    return SVector(2.0)
 end
 
-
 """
     initial_condition_convergence_test(x, t, equations::LinearScalarAdvectionEquation2D)
 
 A smooth initial condition used for convergence tests.
 """
-function initial_condition_convergence_test(x, t, equation::LinearScalarAdvectionEquation2D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
-
-  c = 1.0
-  A = 0.5
-  L = 2
-  f = 1/L
-  omega = 2 * pi * f
-  scalar = c + A * sin(omega * sum(x_trans))
-  return SVector(scalar)
+function initial_condition_convergence_test(x, t,
+                                            equation::LinearScalarAdvectionEquation2D)
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
+
+    c = 1.0
+    A = 0.5
+    L = 2
+    f = 1 / L
+    omega = 2 * pi * f
+    scalar = c + A * sin(omega * sum(x_trans))
+    return SVector(scalar)
 end
 
-
 """
     initial_condition_gauss(x, t, equation::LinearScalarAdvectionEquation2D)
 
@@ -78,28 +78,26 @@ A Gaussian pulse used together with
 [`BoundaryConditionDirichlet(initial_condition_gauss)`](@ref).
 """
 function initial_condition_gauss(x, t, equation::LinearScalarAdvectionEquation2D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x_trans_periodic_2d(x - equation.advection_velocity * t)
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x_trans_periodic_2d(x - equation.advection_velocity * t)
 
-  scalar = exp(-(x_trans[1]^2 + x_trans[2]^2))
-  return SVector(scalar)
+    scalar = exp(-(x_trans[1]^2 + x_trans[2]^2))
+    return SVector(scalar)
 end
 
-
 """
     initial_condition_sin_sin(x, t, equations::LinearScalarAdvectionEquation2D)
 
 A sine wave in the conserved variable.
 """
 function initial_condition_sin_sin(x, t, equation::LinearScalarAdvectionEquation2D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  scalar = sinpi(2 * x_trans[1]) * sinpi(2 * x_trans[2])
-  return SVector(scalar)
+    scalar = sinpi(2 * x_trans[1]) * sinpi(2 * x_trans[2])
+    return SVector(scalar)
 end
 
-
 """
     initial_condition_linear_x_y(x, t, equations::LinearScalarAdvectionEquation2D)
 
@@ -107,10 +105,10 @@ A linear function of `x[1] + x[2]` used together with
 [`boundary_condition_linear_x_y`](@ref).
 """
 function initial_condition_linear_x_y(x, t, equation::LinearScalarAdvectionEquation2D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  return SVector(sum(x_trans))
+    return SVector(sum(x_trans))
 end
 
 """
@@ -124,19 +122,18 @@ Boundary conditions for
 function boundary_condition_linear_x_y(u_inner, orientation, direction, x, t,
                                        surface_flux_function,
                                        equation::LinearScalarAdvectionEquation2D)
-  u_boundary = initial_condition_linear_x_y(x, t, equation)
+    u_boundary = initial_condition_linear_x_y(x, t, equation)
 
-  # Calculate boundary flux
-  if direction in (2, 4) # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    flux = surface_flux_function(u_inner, u_boundary, orientation, equation)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    flux = surface_flux_function(u_boundary, u_inner, orientation, equation)
-  end
+    # Calculate boundary flux
+    if direction in (2, 4) # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation, equation)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation, equation)
+    end
 
-  return flux
+    return flux
 end
 
-
 """
     initial_condition_linear_x(x, t, equations::LinearScalarAdvectionEquation2D)
 
@@ -144,10 +141,10 @@ A linear function of `x[1]` used together with
 [`boundary_condition_linear_x`](@ref).
 """
 function initial_condition_linear_x(x, t, equation::LinearScalarAdvectionEquation2D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  return SVector(x_trans[1])
+    return SVector(x_trans[1])
 end
 
 """
@@ -161,19 +158,18 @@ Boundary conditions for
 function boundary_condition_linear_x(u_inner, orientation, direction, x, t,
                                      surface_flux_function,
                                      equation::LinearScalarAdvectionEquation2D)
-  u_boundary = initial_condition_linear_x(x, t, equation)
+    u_boundary = initial_condition_linear_x(x, t, equation)
 
-  # Calculate boundary flux
-  if direction in (2, 4) # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    flux = surface_flux_function(u_inner, u_boundary, orientation, equation)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    flux = surface_flux_function(u_boundary, u_inner, orientation, equation)
-  end
+    # Calculate boundary flux
+    if direction in (2, 4) # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation, equation)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation, equation)
+    end
 
-  return flux
+    return flux
 end
 
-
 """
     initial_condition_linear_y(x, t, equations::LinearScalarAdvectionEquation2D)
 
@@ -181,10 +177,10 @@ A linear function of `x[1]` used together with
 [`boundary_condition_linear_y`](@ref).
 """
 function initial_condition_linear_y(x, t, equation::LinearScalarAdvectionEquation2D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  return SVector(x_trans[2])
+    return SVector(x_trans[2])
 end
 
 """
@@ -198,102 +194,98 @@ Boundary conditions for
 function boundary_condition_linear_y(u_inner, orientation, direction, x, t,
                                      surface_flux_function,
                                      equation::LinearScalarAdvectionEquation2D)
-  u_boundary = initial_condition_linear_y(x, t, equation)
+    u_boundary = initial_condition_linear_y(x, t, equation)
 
-  # Calculate boundary flux
-  if direction in (2, 4) # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    flux = surface_flux_function(u_inner, u_boundary, orientation, equation)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    flux = surface_flux_function(u_boundary, u_inner, orientation, equation)
-  end
+    # Calculate boundary flux
+    if direction in (2, 4) # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation, equation)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation, equation)
+    end
 
-  return flux
+    return flux
 end
 
-
 # Pre-defined source terms should be implemented as
 # function source_terms_WHATEVER(u, x, t, equations::LinearScalarAdvectionEquation2D)
 
-
 # Calculate 1D flux for a single point
-@inline function flux(u, orientation::Integer, equation::LinearScalarAdvectionEquation2D)
-  a = equation.advection_velocity[orientation]
-  return a * u
+@inline function flux(u, orientation::Integer,
+                      equation::LinearScalarAdvectionEquation2D)
+    a = equation.advection_velocity[orientation]
+    return a * u
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equation::LinearScalarAdvectionEquation2D)
-  λ_max = abs(equation.advection_velocity[orientation])
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equation::LinearScalarAdvectionEquation2D)
+    λ_max = abs(equation.advection_velocity[orientation])
 end
 
-
 # Calculate 1D flux for a single point in the normal direction
 # Note, this directional vector is not normalized
-@inline function flux(u, normal_direction::AbstractVector, equation::LinearScalarAdvectionEquation2D)
-  a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction
-  return a * u
+@inline function flux(u, normal_direction::AbstractVector,
+                      equation::LinearScalarAdvectionEquation2D)
+    a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction
+    return a * u
 end
 
-
 # Calculate maximum wave speed in the normal direction for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equation::LinearScalarAdvectionEquation2D)
-  a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction
-  return abs(a)
+@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equation::LinearScalarAdvectionEquation2D)
+    a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction
+    return abs(a)
 end
 
-
 # Essentially first order upwind, see e.g.
 # https://math.stackexchange.com/a/4355076/805029
-function flux_godunov(u_ll, u_rr, orientation::Integer, equation::LinearScalarAdvectionEquation2D)
-  u_L = u_ll[1]
-  u_R = u_rr[1]
-
-  v_normal = equation.advection_velocity[orientation]
-  if v_normal >= 0
-    return SVector(v_normal * u_L)
-  else
-    return SVector(v_normal * u_R)
-  end
+function flux_godunov(u_ll, u_rr, orientation::Integer,
+                      equation::LinearScalarAdvectionEquation2D)
+    u_L = u_ll[1]
+    u_R = u_rr[1]
+
+    v_normal = equation.advection_velocity[orientation]
+    if v_normal >= 0
+        return SVector(v_normal * u_L)
+    else
+        return SVector(v_normal * u_R)
+    end
 end
 
 # Essentially first order upwind, see e.g.
 # https://math.stackexchange.com/a/4355076/805029
-function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector, equation::LinearScalarAdvectionEquation2D)
-  u_L = u_ll[1]
-  u_R = u_rr[1]
-
-  a_normal = dot(equation.advection_velocity, normal_direction)
-  if a_normal >= 0
-    return SVector(a_normal * u_L)
-  else
-    return SVector(a_normal * u_R)
-  end
+function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector,
+                      equation::LinearScalarAdvectionEquation2D)
+    u_L = u_ll[1]
+    u_R = u_rr[1]
+
+    a_normal = dot(equation.advection_velocity, normal_direction)
+    if a_normal >= 0
+        return SVector(a_normal * u_L)
+    else
+        return SVector(a_normal * u_R)
+    end
 end
 
-
 @inline have_constant_speed(::LinearScalarAdvectionEquation2D) = True()
 
 @inline function max_abs_speeds(equation::LinearScalarAdvectionEquation2D)
-  return abs.(equation.advection_velocity)
+    return abs.(equation.advection_velocity)
 end
 
-
 # Convert conservative variables to primitive
 @inline cons2prim(u, equation::LinearScalarAdvectionEquation2D) = u
 
 # Convert conservative variables to entropy variables
 @inline cons2entropy(u, equation::LinearScalarAdvectionEquation2D) = u
 
-
 # Calculate entropy for a conservative state `cons`
 @inline entropy(u::Real, ::LinearScalarAdvectionEquation2D) = 0.5 * u^2
 @inline entropy(u, equation::LinearScalarAdvectionEquation2D) = entropy(u[1], equation)
 
-
 # Calculate total energy for a conservative state `cons`
 @inline energy_total(u::Real, ::LinearScalarAdvectionEquation2D) = 0.5 * u^2
-@inline energy_total(u, equation::LinearScalarAdvectionEquation2D) = energy_total(u[1], equation)
-
-
+@inline function energy_total(u, equation::LinearScalarAdvectionEquation2D)
+    energy_total(u[1], equation)
+end
 end # @muladd
diff --git a/src/equations/linear_scalar_advection_3d.jl b/src/equations/linear_scalar_advection_3d.jl
index 218eaf8816c..7b19974eb49 100644
--- a/src/equations/linear_scalar_advection_3d.jl
+++ b/src/equations/linear_scalar_advection_3d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     LinearScalarAdvectionEquation3D
@@ -14,22 +14,21 @@ The linear scalar advection equation
 ```
 in three space dimensions with constant velocity `a`.
 """
-struct LinearScalarAdvectionEquation3D{RealT<:Real} <: AbstractLinearScalarAdvectionEquation{3, 1}
-  advection_velocity::SVector{3, RealT}
+struct LinearScalarAdvectionEquation3D{RealT <: Real} <:
+       AbstractLinearScalarAdvectionEquation{3, 1}
+    advection_velocity::SVector{3, RealT}
 end
 
-function LinearScalarAdvectionEquation3D(a::NTuple{3,<:Real})
-  LinearScalarAdvectionEquation3D(SVector(a))
+function LinearScalarAdvectionEquation3D(a::NTuple{3, <:Real})
+    LinearScalarAdvectionEquation3D(SVector(a))
 end
 
 function LinearScalarAdvectionEquation3D(a1::Real, a2::Real, a3::Real)
-  LinearScalarAdvectionEquation3D(SVector(a1, a2, a3))
+    LinearScalarAdvectionEquation3D(SVector(a1, a2, a3))
 end
 
-
-varnames(::typeof(cons2cons), ::LinearScalarAdvectionEquation3D) = ("scalar", )
-varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation3D) = ("scalar", )
-
+varnames(::typeof(cons2cons), ::LinearScalarAdvectionEquation3D) = ("scalar",)
+varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation3D) = ("scalar",)
 
 # Set initial conditions at physical location `x` for time `t`
 """
@@ -38,60 +37,58 @@ varnames(::typeof(cons2prim), ::LinearScalarAdvectionEquation3D) = ("scalar", )
 A constant initial condition to test free-stream preservation.
 """
 function initial_condition_constant(x, t, equation::LinearScalarAdvectionEquation3D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  return SVector(2.0)
+    return SVector(2.0)
 end
 
-
 """
     initial_condition_convergence_test(x, t, equations::LinearScalarAdvectionEquation1D)
 
 A smooth initial condition used for convergence tests.
 """
-function initial_condition_convergence_test(x, t, equation::LinearScalarAdvectionEquation3D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+function initial_condition_convergence_test(x, t,
+                                            equation::LinearScalarAdvectionEquation3D)
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  c = 1.0
-  A = 0.5
-  L = 2
-  f = 1/L
-  omega = 2 * pi * f
-  scalar = c + A * sin(omega * sum(x_trans))
-  return SVector(scalar)
+    c = 1.0
+    A = 0.5
+    L = 2
+    f = 1 / L
+    omega = 2 * pi * f
+    scalar = c + A * sin(omega * sum(x_trans))
+    return SVector(scalar)
 end
 
-
 """
     initial_condition_gauss(x, t, equations::LinearScalarAdvectionEquation1D)
 
 A Gaussian pulse.
 """
 function initial_condition_gauss(x, t, equation::LinearScalarAdvectionEquation3D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  scalar = exp(-(x_trans[1]^2 + x_trans[2]^2 + x_trans[3]^2))
-  return SVector(scalar)
+    scalar = exp(-(x_trans[1]^2 + x_trans[2]^2 + x_trans[3]^2))
+    return SVector(scalar)
 end
 
-
 """
     initial_condition_sin(x, t, equations::LinearScalarAdvectionEquation1D)
 
 A sine wave in the conserved variable.
 """
 function initial_condition_sin(x, t, equation::LinearScalarAdvectionEquation3D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  scalar = sin(2 * pi * x_trans[1]) * sin(2 * pi * x_trans[2]) * sin(2 * pi * x_trans[3])
-  return SVector(scalar)
+    scalar = sin(2 * pi * x_trans[1]) * sin(2 * pi * x_trans[2]) *
+             sin(2 * pi * x_trans[3])
+    return SVector(scalar)
 end
 
-
 """
     initial_condition_linear_z(x, t, equations::LinearScalarAdvectionEquation1D)
 
@@ -99,10 +96,10 @@ A linear function of `x[3]` used together with
 [`boundary_condition_linear_z`](@ref).
 """
 function initial_condition_linear_z(x, t, equation::LinearScalarAdvectionEquation3D)
-  # Store translated coordinate for easy use of exact solution
-  x_trans = x - equation.advection_velocity * t
+    # Store translated coordinate for easy use of exact solution
+    x_trans = x - equation.advection_velocity * t
 
-  return SVector(x_trans[3])
+    return SVector(x_trans[3])
 end
 
 """
@@ -116,102 +113,98 @@ Boundary conditions for
 function boundary_condition_linear_z(u_inner, orientation, direction, x, t,
                                      surface_flux_function,
                                      equation::LinearScalarAdvectionEquation3D)
-  u_boundary = initial_condition_linear_z(x, t, equation)
+    u_boundary = initial_condition_linear_z(x, t, equation)
 
-  # Calculate boundary flux
-  if direction in (2, 4, 6) # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    flux = surface_flux_function(u_inner, u_boundary, orientation, equation)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    flux = surface_flux_function(u_boundary, u_inner, orientation, equation)
-  end
+    # Calculate boundary flux
+    if direction in (2, 4, 6) # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation, equation)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation, equation)
+    end
 
-  return flux
+    return flux
 end
 
-
 # Pre-defined source terms should be implemented as
 # function source_terms_WHATEVER(u, x, t, equation::LinearScalarAdvectionEquation3D)
 
-
 # Calculate 1D flux in for a single point
-@inline function flux(u, orientation::Integer, equation::LinearScalarAdvectionEquation3D)
-  a = equation.advection_velocity[orientation]
-  return a * u
+@inline function flux(u, orientation::Integer,
+                      equation::LinearScalarAdvectionEquation3D)
+    a = equation.advection_velocity[orientation]
+    return a * u
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equation::LinearScalarAdvectionEquation3D)
-  λ_max = abs(equation.advection_velocity[orientation])
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equation::LinearScalarAdvectionEquation3D)
+    λ_max = abs(equation.advection_velocity[orientation])
 end
 
-
 # Calculate 1D flux for a single point in the normal direction
 # Note, this directional vector is not normalized
-@inline function flux(u, normal_direction::AbstractVector, equation::LinearScalarAdvectionEquation3D)
-  a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction
-  return a * u
+@inline function flux(u, normal_direction::AbstractVector,
+                      equation::LinearScalarAdvectionEquation3D)
+    a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction
+    return a * u
 end
 
-
 # Calculate maximum wave speed in the normal direction for local Lax-Friedrichs-type dissipation
-@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equation::LinearScalarAdvectionEquation3D)
-  a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction
-  return abs(a)
+@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equation::LinearScalarAdvectionEquation3D)
+    a = dot(equation.advection_velocity, normal_direction) # velocity in normal direction
+    return abs(a)
 end
 
-
 # Essentially first order upwind, see e.g.
 # https://math.stackexchange.com/a/4355076/805029
-function flux_godunov(u_ll, u_rr, orientation::Integer, equation::LinearScalarAdvectionEquation3D)
-  u_L = u_ll[1]
-  u_R = u_rr[1]
+function flux_godunov(u_ll, u_rr, orientation::Integer,
+                      equation::LinearScalarAdvectionEquation3D)
+    u_L = u_ll[1]
+    u_R = u_rr[1]
 
-  v_normal = equation.advection_velocity[orientation]
-  if v_normal >= 0
-    return SVector(v_normal * u_L)
-  else
-    return SVector(v_normal * u_R)
-  end
+    v_normal = equation.advection_velocity[orientation]
+    if v_normal >= 0
+        return SVector(v_normal * u_L)
+    else
+        return SVector(v_normal * u_R)
+    end
 end
 
 # Essentially first order upwind, see e.g.
 # https://math.stackexchange.com/a/4355076/805029
-function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector, equation::LinearScalarAdvectionEquation3D)
-  u_L = u_ll[1]
-  u_R = u_rr[1]
+function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector,
+                      equation::LinearScalarAdvectionEquation3D)
+    u_L = u_ll[1]
+    u_R = u_rr[1]
 
-  a_normal = dot(equation.advection_velocity, normal_direction)
-  if a_normal >= 0
-    return SVector(a_normal * u_L)
-  else
-    return SVector(a_normal * u_R)
-  end
+    a_normal = dot(equation.advection_velocity, normal_direction)
+    if a_normal >= 0
+        return SVector(a_normal * u_L)
+    else
+        return SVector(a_normal * u_R)
+    end
 end
 
-
 @inline have_constant_speed(::LinearScalarAdvectionEquation3D) = True()
 
 @inline function max_abs_speeds(equation::LinearScalarAdvectionEquation3D)
-  return abs.(equation.advection_velocity)
+    return abs.(equation.advection_velocity)
 end
 
-
 # Convert conservative variables to primitive
 @inline cons2prim(u, equation::LinearScalarAdvectionEquation3D) = u
 
 # Convert conservative variables to entropy variables
 @inline cons2entropy(u, equation::LinearScalarAdvectionEquation3D) = u
 
-
 # Calculate entropy for a conservative state `cons`
 @inline entropy(u::Real, ::LinearScalarAdvectionEquation3D) = 0.5 * u^2
 @inline entropy(u, equation::LinearScalarAdvectionEquation3D) = entropy(u[1], equation)
 
-
 # Calculate total energy for a conservative state `cons`
 @inline energy_total(u::Real, ::LinearScalarAdvectionEquation3D) = 0.5 * u^2
-@inline energy_total(u, equation::LinearScalarAdvectionEquation3D) = energy_total(u[1], equation)
-
-
+@inline function energy_total(u, equation::LinearScalarAdvectionEquation3D)
+    energy_total(u[1], equation)
+end
 end # @muladd
diff --git a/src/equations/linearized_euler_2d.jl b/src/equations/linearized_euler_2d.jl
index d1765fd0d7b..cd681365cae 100644
--- a/src/equations/linearized_euler_2d.jl
+++ b/src/equations/linearized_euler_2d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     LinearizedEulerEquations2D(v_mean_global, c_mean_global, rho_mean_global)
@@ -32,29 +32,37 @@ Linearized euler equations in two space dimensions. The equations are given by
 The bar ``\bar{(\cdot)}`` indicates uniform mean flow variables and c is the speed of sound.
 The unknowns are the acoustic velocities ``v' = (v_1', v_2')``, the pressure ``p'`` and the density ``\rho'``.
 """
-struct LinearizedEulerEquations2D{RealT<:Real} <: AbstractLinearizedEulerEquations{2, 4}
+struct LinearizedEulerEquations2D{RealT <: Real} <:
+       AbstractLinearizedEulerEquations{2, 4}
     v_mean_global::SVector{2, RealT}
     c_mean_global::RealT
     rho_mean_global::RealT
 end
 
-function LinearizedEulerEquations2D(v_mean_global::NTuple{2,<:Real}, c_mean_global::Real, rho_mean_global::Real)
+function LinearizedEulerEquations2D(v_mean_global::NTuple{2, <:Real},
+                                    c_mean_global::Real, rho_mean_global::Real)
     if rho_mean_global < 0
-      throw(ArgumentError("rho_mean_global must be non-negative"))
+        throw(ArgumentError("rho_mean_global must be non-negative"))
     elseif c_mean_global < 0
-      throw(ArgumentError("c_mean_global must be non-negative"))
+        throw(ArgumentError("c_mean_global must be non-negative"))
     end
 
-    return LinearizedEulerEquations2D(SVector(v_mean_global), c_mean_global, rho_mean_global)
+    return LinearizedEulerEquations2D(SVector(v_mean_global), c_mean_global,
+                                      rho_mean_global)
 end
 
-function LinearizedEulerEquations2D(; v_mean_global::NTuple{2,<:Real}, c_mean_global::Real, rho_mean_global::Real)
-    return LinearizedEulerEquations2D(SVector(v_mean_global), c_mean_global, rho_mean_global)
+function LinearizedEulerEquations2D(; v_mean_global::NTuple{2, <:Real},
+                                    c_mean_global::Real, rho_mean_global::Real)
+    return LinearizedEulerEquations2D(SVector(v_mean_global), c_mean_global,
+                                      rho_mean_global)
 end
 
-
-varnames(::typeof(cons2cons), ::LinearizedEulerEquations2D) = ("rho_prime", "v1_prime", "v2_prime", "p_prime")
-varnames(::typeof(cons2prim), ::LinearizedEulerEquations2D) = ("rho_prime", "v1_prime", "v2_prime", "p_prime")
+function varnames(::typeof(cons2cons), ::LinearizedEulerEquations2D)
+    ("rho_prime", "v1_prime", "v2_prime", "p_prime")
+end
+function varnames(::typeof(cons2prim), ::LinearizedEulerEquations2D)
+    ("rho_prime", "v1_prime", "v2_prime", "p_prime")
+end
 
 """
     initial_condition_convergence_test(x, t, equations::LinearizedEulerEquations2D)
@@ -62,23 +70,23 @@ varnames(::typeof(cons2prim), ::LinearizedEulerEquations2D) = ("rho_prime", "v1_
 A smooth initial condition used for convergence tests.
 """
 function initial_condition_convergence_test(x, t, equations::LinearizedEulerEquations2D)
-    rho_prime = -cospi(2*t) * (sinpi(2*x[1]) + sinpi(2*x[2]))
-    v1_prime = sinpi(2*t) * cospi(2*x[1])
-    v2_prime = sinpi(2*t) * cospi(2*x[2])
+    rho_prime = -cospi(2 * t) * (sinpi(2 * x[1]) + sinpi(2 * x[2]))
+    v1_prime = sinpi(2 * t) * cospi(2 * x[1])
+    v2_prime = sinpi(2 * t) * cospi(2 * x[2])
     p_prime = rho_prime
 
     return SVector(rho_prime, v1_prime, v2_prime, p_prime)
 end
 
-
 """
     boundary_condition_wall(u_inner, orientation, direction, x, t, surface_flux_function,
                                 equations::LinearizedEulerEquations2D)
 
 Boundary conditions for a solid wall.
 """
-function boundary_condition_wall(u_inner, orientation, direction, x, t, surface_flux_function,
-                                    equations::LinearizedEulerEquations2D)
+function boundary_condition_wall(u_inner, orientation, direction, x, t,
+                                 surface_flux_function,
+                                 equations::LinearizedEulerEquations2D)
     # Boundary state is equal to the inner state except for the velocity. For boundaries
     # in the -x/+x direction, we multiply the velocity in the x direction by -1.
     # Similarly, for boundaries in the -y/+y direction, we multiply the velocity in the
@@ -99,7 +107,6 @@ function boundary_condition_wall(u_inner, orientation, direction, x, t, surface_
     return flux
 end
 
-
 # Calculate 1D flux for a single point
 @inline function flux(u, orientation::Integer, equations::LinearizedEulerEquations2D)
     @unpack v_mean_global, c_mean_global, rho_mean_global = equations
@@ -119,7 +126,6 @@ end
     return SVector(f1, f2, f3, f4)
 end
 
-
 @inline have_constant_speed(::LinearizedEulerEquations2D) = True()
 
 @inline function max_abs_speeds(equations::LinearizedEulerEquations2D)
@@ -127,7 +133,8 @@ end
     return abs(v_mean_global[1]) + c_mean_global, abs(v_mean_global[2]) + c_mean_global
 end
 
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::LinearizedEulerEquations2D)
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::LinearizedEulerEquations2D)
     @unpack v_mean_global, c_mean_global = equations
     if orientation == 1
         return abs(v_mean_global[1]) + c_mean_global
@@ -136,10 +143,7 @@ end
     end
 end
 
-
 # Convert conservative variables to primitive
 @inline cons2prim(u, equations::LinearizedEulerEquations2D) = u
 @inline cons2entropy(u, ::LinearizedEulerEquations2D) = u
-
-
 end # muladd
diff --git a/src/equations/numerical_fluxes.jl b/src/equations/numerical_fluxes.jl
index ff9596848bb..16a83124d14 100644
--- a/src/equations/numerical_fluxes.jl
+++ b/src/equations/numerical_fluxes.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # This file contains general numerical fluxes that are not specific to certain equations
 
@@ -16,34 +16,37 @@ DG method (except floating point errors).
 """
 @inline function flux_central(u_ll, u_rr, orientation_or_normal_direction,
                               equations::AbstractEquations)
-  # Calculate regular 1D fluxes
-  f_ll = flux(u_ll, orientation_or_normal_direction, equations)
-  f_rr = flux(u_rr, orientation_or_normal_direction, equations)
+    # Calculate regular 1D fluxes
+    f_ll = flux(u_ll, orientation_or_normal_direction, equations)
+    f_rr = flux(u_rr, orientation_or_normal_direction, equations)
 
-  # Average regular fluxes
-  return 0.5 * (f_ll + f_rr)
+    # Average regular fluxes
+    return 0.5 * (f_ll + f_rr)
 end
 
-
 """
     FluxPlusDissipation(numerical_flux, dissipation)
 
 Combine a `numerical_flux` with a `dissipation` operator to create a new numerical flux.
 """
 struct FluxPlusDissipation{NumericalFlux, Dissipation}
-  numerical_flux::NumericalFlux
-  dissipation::Dissipation
+    numerical_flux::NumericalFlux
+    dissipation::Dissipation
 end
 
-@inline function (numflux::FluxPlusDissipation)(u_ll, u_rr, orientation_or_normal_direction, equations)
-  @unpack numerical_flux, dissipation = numflux
+@inline function (numflux::FluxPlusDissipation)(u_ll, u_rr,
+                                                orientation_or_normal_direction,
+                                                equations)
+    @unpack numerical_flux, dissipation = numflux
 
-  return ( numerical_flux(u_ll, u_rr, orientation_or_normal_direction, equations)
-            + dissipation(u_ll, u_rr, orientation_or_normal_direction, equations) )
+    return (numerical_flux(u_ll, u_rr, orientation_or_normal_direction, equations)
+            +
+            dissipation(u_ll, u_rr, orientation_or_normal_direction, equations))
 end
 
-Base.show(io::IO, f::FluxPlusDissipation) = print(io, "FluxPlusDissipation(",  f.numerical_flux, ", ", f.dissipation, ")")
-
+function Base.show(io::IO, f::FluxPlusDissipation)
+    print(io, "FluxPlusDissipation(", f.numerical_flux, ", ", f.dissipation, ")")
+end
 
 """
     FluxRotated(numerical_flux)
@@ -55,57 +58,56 @@ Requires a rotationally invariant equation with equation-specific functions
 [`rotate_to_x`](@ref) and [`rotate_from_x`](@ref).
 """
 struct FluxRotated{NumericalFlux}
-  numerical_flux::NumericalFlux
+    numerical_flux::NumericalFlux
 end
 
-
 # Rotated surface flux computation (2D version)
-@inline function (flux_rotated::FluxRotated)(u_ll, u_rr, normal_direction::AbstractVector,
+@inline function (flux_rotated::FluxRotated)(u_ll, u_rr,
+                                             normal_direction::AbstractVector,
                                              equations::AbstractEquations{2})
-  @unpack numerical_flux = flux_rotated
+    @unpack numerical_flux = flux_rotated
 
-  norm_ = norm(normal_direction)
-  # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later
-  normal_vector = normal_direction / norm_
+    norm_ = norm(normal_direction)
+    # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later
+    normal_vector = normal_direction / norm_
 
-  u_ll_rotated = rotate_to_x(u_ll, normal_vector, equations)
-  u_rr_rotated = rotate_to_x(u_rr, normal_vector, equations)
+    u_ll_rotated = rotate_to_x(u_ll, normal_vector, equations)
+    u_rr_rotated = rotate_to_x(u_rr, normal_vector, equations)
 
-  f = numerical_flux(u_ll_rotated, u_rr_rotated, 1, equations)
+    f = numerical_flux(u_ll_rotated, u_rr_rotated, 1, equations)
 
-  return rotate_from_x(f, normal_vector, equations) * norm_
+    return rotate_from_x(f, normal_vector, equations) * norm_
 end
 
-
 # Rotated surface flux computation (3D version)
-@inline function (flux_rotated::FluxRotated)(u_ll, u_rr, normal_direction::AbstractVector,
+@inline function (flux_rotated::FluxRotated)(u_ll, u_rr,
+                                             normal_direction::AbstractVector,
                                              equations::AbstractEquations{3})
-  @unpack numerical_flux = flux_rotated
+    @unpack numerical_flux = flux_rotated
 
-  # Storing these vectors could increase the performance by 20 percent
-  norm_ = norm(normal_direction)
-  # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later
-  normal_vector = normal_direction / norm_
+    # Storing these vectors could increase the performance by 20 percent
+    norm_ = norm(normal_direction)
+    # Normalize the vector without using `normalize` since we need to multiply by the `norm_` later
+    normal_vector = normal_direction / norm_
 
-  # Some vector that can't be identical to normal_vector (unless normal_vector == 0)
-  tangent1 = SVector(normal_direction[2], normal_direction[3], -normal_direction[1])
-  # Orthogonal projection
-  tangent1 -= dot(normal_vector, tangent1) * normal_vector
-  tangent1 = normalize(tangent1)
+    # Some vector that can't be identical to normal_vector (unless normal_vector == 0)
+    tangent1 = SVector(normal_direction[2], normal_direction[3], -normal_direction[1])
+    # Orthogonal projection
+    tangent1 -= dot(normal_vector, tangent1) * normal_vector
+    tangent1 = normalize(tangent1)
 
-  # Third orthogonal vector
-  tangent2 = normalize(cross(normal_direction, tangent1))
+    # Third orthogonal vector
+    tangent2 = normalize(cross(normal_direction, tangent1))
 
-  u_ll_rotated = rotate_to_x(u_ll, normal_vector, tangent1, tangent2, equations)
-  u_rr_rotated = rotate_to_x(u_rr, normal_vector, tangent1, tangent2, equations)
+    u_ll_rotated = rotate_to_x(u_ll, normal_vector, tangent1, tangent2, equations)
+    u_rr_rotated = rotate_to_x(u_rr, normal_vector, tangent1, tangent2, equations)
 
-  f = numerical_flux(u_ll_rotated, u_rr_rotated, 1, equations)
+    f = numerical_flux(u_ll_rotated, u_rr_rotated, 1, equations)
 
-  return rotate_from_x(f, normal_vector, tangent1, tangent2, equations) * norm_
+    return rotate_from_x(f, normal_vector, tangent1, tangent2, equations) * norm_
 end
 
-Base.show(io::IO, f::FluxRotated) = print(io, "FluxRotated(",  f.numerical_flux, ")")
-
+Base.show(io::IO, f::FluxRotated) = print(io, "FluxRotated(", f.numerical_flux, ")")
 
 """
     DissipationGlobalLaxFriedrichs(λ)
@@ -113,21 +115,26 @@ Base.show(io::IO, f::FluxRotated) = print(io, "FluxRotated(",  f.numerical_flux,
 Create a global Lax-Friedrichs dissipation operator with dissipation coefficient `λ`.
 """
 struct DissipationGlobalLaxFriedrichs{RealT}
-  λ::RealT
+    λ::RealT
 end
 
-@inline function (dissipation::DissipationGlobalLaxFriedrichs)(u_ll, u_rr, orientation::Integer, equations)
-  @unpack λ = dissipation
-  return -λ/2 * (u_rr - u_ll)
+@inline function (dissipation::DissipationGlobalLaxFriedrichs)(u_ll, u_rr,
+                                                               orientation::Integer,
+                                                               equations)
+    @unpack λ = dissipation
+    return -λ / 2 * (u_rr - u_ll)
 end
 
-@inline function (dissipation::DissipationGlobalLaxFriedrichs)(u_ll, u_rr, normal_direction::AbstractVector, equations)
-  @unpack λ = dissipation
-  return -λ/2 * norm(normal_direction) * (u_rr - u_ll)
+@inline function (dissipation::DissipationGlobalLaxFriedrichs)(u_ll, u_rr,
+                                                               normal_direction::AbstractVector,
+                                                               equations)
+    @unpack λ = dissipation
+    return -λ / 2 * norm(normal_direction) * (u_rr - u_ll)
 end
 
-Base.show(io::IO, d::DissipationGlobalLaxFriedrichs) = print(io, "DissipationGlobalLaxFriedrichs(", d.λ, ")")
-
+function Base.show(io::IO, d::DissipationGlobalLaxFriedrichs)
+    print(io, "DissipationGlobalLaxFriedrichs(", d.λ, ")")
+end
 
 """
     DissipationLocalLaxFriedrichs(max_abs_speed=max_abs_speed_naive)
@@ -138,18 +145,22 @@ is estimated as
 defaulting to [`max_abs_speed_naive`](@ref).
 """
 struct DissipationLocalLaxFriedrichs{MaxAbsSpeed}
-  max_abs_speed::MaxAbsSpeed
+    max_abs_speed::MaxAbsSpeed
 end
 
 DissipationLocalLaxFriedrichs() = DissipationLocalLaxFriedrichs(max_abs_speed_naive)
 
-@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, orientation_or_normal_direction, equations)
-  λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, equations)
-  return -0.5 * λ * (u_rr - u_ll)
+@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr,
+                                                              orientation_or_normal_direction,
+                                                              equations)
+    λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction,
+                                  equations)
+    return -0.5 * λ * (u_rr - u_ll)
 end
 
-Base.show(io::IO, d::DissipationLocalLaxFriedrichs) = print(io, "DissipationLocalLaxFriedrichs(", d.max_abs_speed, ")")
-
+function Base.show(io::IO, d::DissipationLocalLaxFriedrichs)
+    print(io, "DissipationLocalLaxFriedrichs(", d.max_abs_speed, ")")
+end
 
 """
     max_abs_speed_naive(u_ll, u_rr, orientation::Integer,   equations)
@@ -164,11 +175,16 @@ For non-integer arguments `normal_direction` in one dimension, `max_abs_speed_na
 function max_abs_speed_naive end
 
 # for non-integer `orientation_or_normal` arguments.
-@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::AbstractEquations{1})
-  return abs(normal_direction[1]) * max_abs_speed_naive(u_ll, u_rr, 1, equations)
+@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::AbstractEquations{1})
+    return abs(normal_direction[1]) * max_abs_speed_naive(u_ll, u_rr, 1, equations)
 end
 
-const FluxLaxFriedrichs{MaxAbsSpeed} = FluxPlusDissipation{typeof(flux_central), DissipationLocalLaxFriedrichs{MaxAbsSpeed}}
+const FluxLaxFriedrichs{MaxAbsSpeed} = FluxPlusDissipation{typeof(flux_central),
+                                                           DissipationLocalLaxFriedrichs{
+                                                                                         MaxAbsSpeed
+                                                                                         }
+                                                           }
 """
     FluxLaxFriedrichs(max_abs_speed=max_abs_speed_naive)
 
@@ -176,11 +192,13 @@ Local Lax-Friedrichs (Rusanov) flux with maximum wave speed estimate provided by
 `max_abs_speed`, cf. [`DissipationLocalLaxFriedrichs`](@ref) and
 [`max_abs_speed_naive`](@ref).
 """
-function FluxLaxFriedrichs(max_abs_speed=max_abs_speed_naive)
-  FluxPlusDissipation(flux_central, DissipationLocalLaxFriedrichs(max_abs_speed))
+function FluxLaxFriedrichs(max_abs_speed = max_abs_speed_naive)
+    FluxPlusDissipation(flux_central, DissipationLocalLaxFriedrichs(max_abs_speed))
 end
 
-Base.show(io::IO, f::FluxLaxFriedrichs) = print(io, "FluxLaxFriedrichs(", f.dissipation.max_abs_speed, ")")
+function Base.show(io::IO, f::FluxLaxFriedrichs)
+    print(io, "FluxLaxFriedrichs(", f.dissipation.max_abs_speed, ")")
+end
 
 """
     flux_lax_friedrichs
@@ -189,7 +207,6 @@ See [`FluxLaxFriedrichs`](@ref).
 """
 const flux_lax_friedrichs = FluxLaxFriedrichs()
 
-
 """
     FluxHLL(min_max_speed=min_max_speed_naive)
 
@@ -199,7 +216,7 @@ wave speeds are estimated as
 defaulting to [`min_max_speed_naive`](@ref).
 """
 struct FluxHLL{MinMaxSpeed}
-  min_max_speed::MinMaxSpeed
+    min_max_speed::MinMaxSpeed
 end
 
 FluxHLL() = FluxHLL(min_max_speed_naive)
@@ -217,22 +234,24 @@ left and right states `u_ll, u_rr`, usually based only on the local wave speeds
 """
 function min_max_speed_naive end
 
-@inline function (numflux::FluxHLL)(u_ll, u_rr, orientation_or_normal_direction, equations)
-  λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction, equations)
-
-  if λ_min >= 0 && λ_max >= 0
-    return flux(u_ll, orientation_or_normal_direction, equations)
-  elseif λ_max <= 0 && λ_min <= 0
-    return flux(u_rr, orientation_or_normal_direction, equations)
-  else
-    f_ll = flux(u_ll, orientation_or_normal_direction, equations)
-    f_rr = flux(u_rr, orientation_or_normal_direction, equations)
-    inv_λ_max_minus_λ_min = inv(λ_max - λ_min)
-    factor_ll = λ_max * inv_λ_max_minus_λ_min
-    factor_rr = λ_min * inv_λ_max_minus_λ_min
-    factor_diss = λ_min * λ_max * inv_λ_max_minus_λ_min
-    return factor_ll * f_ll - factor_rr * f_rr + factor_diss * (u_rr - u_ll)
-  end
+@inline function (numflux::FluxHLL)(u_ll, u_rr, orientation_or_normal_direction,
+                                    equations)
+    λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction,
+                                         equations)
+
+    if λ_min >= 0 && λ_max >= 0
+        return flux(u_ll, orientation_or_normal_direction, equations)
+    elseif λ_max <= 0 && λ_min <= 0
+        return flux(u_rr, orientation_or_normal_direction, equations)
+    else
+        f_ll = flux(u_ll, orientation_or_normal_direction, equations)
+        f_rr = flux(u_rr, orientation_or_normal_direction, equations)
+        inv_λ_max_minus_λ_min = inv(λ_max - λ_min)
+        factor_ll = λ_max * inv_λ_max_minus_λ_min
+        factor_rr = λ_min * inv_λ_max_minus_λ_min
+        factor_diss = λ_min * λ_max * inv_λ_max_minus_λ_min
+        return factor_ll * f_ll - factor_rr * f_rr + factor_diss * (u_rr - u_ll)
+    end
 end
 
 Base.show(io::IO, numflux::FluxHLL) = print(io, "FluxHLL(", numflux.min_max_speed, ")")
@@ -244,8 +263,6 @@ See [`FluxHLL`](@ref).
 """
 const flux_hll = FluxHLL()
 
-
-
 """
     flux_shima_etal_turbo(u_ll, u_rr, orientation_or_normal_direction, equations)
 
@@ -254,8 +271,9 @@ methods, e.g., when used with [`VolumeIntegralFluxDifferencing`](@ref).
 These specialized methods may enable better use of SIMD instructions to
 increase runtime efficiency on modern hardware.
 """
-@inline function flux_shima_etal_turbo(u_ll, u_rr, orientation_or_normal_direction, equations)
-  flux_shima_etal(u_ll, u_rr, orientation_or_normal_direction, equations)
+@inline function flux_shima_etal_turbo(u_ll, u_rr, orientation_or_normal_direction,
+                                       equations)
+    flux_shima_etal(u_ll, u_rr, orientation_or_normal_direction, equations)
 end
 
 """
@@ -266,11 +284,11 @@ methods, e.g., when used with [`VolumeIntegralFluxDifferencing`](@ref).
 These specialized methods may enable better use of SIMD instructions to
 increase runtime efficiency on modern hardware.
 """
-@inline function flux_ranocha_turbo(u_ll, u_rr, orientation_or_normal_direction, equations)
-  flux_ranocha(u_ll, u_rr, orientation_or_normal_direction, equations)
+@inline function flux_ranocha_turbo(u_ll, u_rr, orientation_or_normal_direction,
+                                    equations)
+    flux_ranocha(u_ll, u_rr, orientation_or_normal_direction, equations)
 end
 
-
 """
     FluxHydrostaticReconstruction(numerical_flux, hydrostatic_reconstruction)
 
@@ -299,23 +317,23 @@ fronts. A good overview of the development and application of hydrostatic recons
   [DOI: 10.1016/j.advwatres.2019.03.010](https://doi.org/10.1016/j.advwatres.2019.03.010)
 """
 struct FluxHydrostaticReconstruction{NumericalFlux, HydrostaticReconstruction}
-  numerical_flux::NumericalFlux
-  hydrostatic_reconstruction::HydrostaticReconstruction
+    numerical_flux::NumericalFlux
+    hydrostatic_reconstruction::HydrostaticReconstruction
 end
 
 @inline function (numflux::FluxHydrostaticReconstruction)(u_ll, u_rr,
                                                           orientation_or_normal_direction,
                                                           equations::AbstractEquations)
-  @unpack numerical_flux, hydrostatic_reconstruction = numflux
+    @unpack numerical_flux, hydrostatic_reconstruction = numflux
 
-  # Create the reconstructed left/right solution states in conservative form
-  u_ll_star, u_rr_star = hydrostatic_reconstruction(u_ll, u_rr, equations)
+    # Create the reconstructed left/right solution states in conservative form
+    u_ll_star, u_rr_star = hydrostatic_reconstruction(u_ll, u_rr, equations)
 
-  # Use the reconstructed states to compute the numerical surface flux
-  return numerical_flux(u_ll_star, u_rr_star, orientation_or_normal_direction, equations)
+    # Use the reconstructed states to compute the numerical surface flux
+    return numerical_flux(u_ll_star, u_rr_star, orientation_or_normal_direction,
+                          equations)
 end
 
-
 """
     FluxUpwind(splitting)
 
@@ -330,17 +348,15 @@ as numerical flux (up to floating point differences).
     This is an experimental feature and may change in future releases.
 """
 struct FluxUpwind{Splitting}
-  splitting::Splitting
+    splitting::Splitting
 end
 
 @inline function (numflux::FluxUpwind)(u_ll, u_rr, orientation::Int, equations)
-  @unpack splitting = numflux
-  fm = splitting(u_rr, Val{:minus}(), orientation, equations)
-  fp = splitting(u_ll, Val{:plus}(),  orientation, equations)
-  return fm + fp
+    @unpack splitting = numflux
+    fm = splitting(u_rr, Val{:minus}(), orientation, equations)
+    fp = splitting(u_ll, Val{:plus}(), orientation, equations)
+    return fm + fp
 end
 
-Base.show(io::IO, f::FluxUpwind) = print(io, "FluxUpwind(",  f.splitting, ")")
-
-
+Base.show(io::IO, f::FluxUpwind) = print(io, "FluxUpwind(", f.splitting, ")")
 end # @muladd
diff --git a/src/equations/shallow_water_1d.jl b/src/equations/shallow_water_1d.jl
index 949c6576006..851cbacdd57 100644
--- a/src/equations/shallow_water_1d.jl
+++ b/src/equations/shallow_water_1d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     ShallowWaterEquations1D(gravity, H0)
@@ -44,27 +44,25 @@ References for the SWE are many but a good introduction is available in Chapter
   Finite Volume Methods for Hyperbolic Problems
   [DOI: 10.1017/CBO9780511791253](https://doi.org/10.1017/CBO9780511791253)
 """
-struct ShallowWaterEquations1D{RealT<:Real} <: AbstractShallowWaterEquations{1, 3}
-  gravity::RealT # gravitational constant
-  H0::RealT      # constant "lake-at-rest" total water height
+struct ShallowWaterEquations1D{RealT <: Real} <: AbstractShallowWaterEquations{1, 3}
+    gravity::RealT # gravitational constant
+    H0::RealT      # constant "lake-at-rest" total water height
 end
 
 # Allow for flexibility to set the gravitational constant within an elixir depending on the
 # application where `gravity_constant=1.0` or `gravity_constant=9.81` are common values.
 # The reference total water height H0 defaults to 0.0 but is used for the "lake-at-rest"
 # well-balancedness test cases
-function ShallowWaterEquations1D(; gravity_constant, H0=0.0)
-  ShallowWaterEquations1D(gravity_constant, H0)
+function ShallowWaterEquations1D(; gravity_constant, H0 = 0.0)
+    ShallowWaterEquations1D(gravity_constant, H0)
 end
 
-
 have_nonconservative_terms(::ShallowWaterEquations1D) = True()
 varnames(::typeof(cons2cons), ::ShallowWaterEquations1D) = ("h", "h_v", "b")
 # Note, we use the total water height, H = h + b, as the first primitive variable for easier
 # visualization and setting initial conditions
 varnames(::typeof(cons2prim), ::ShallowWaterEquations1D) = ("H", "v", "b")
 
-
 # Set initial conditions at physical location `x` for time `t`
 """
     initial_condition_convergence_test(x, t, equations::ShallowWaterEquations1D)
@@ -75,15 +73,15 @@ A smooth initial condition used for convergence tests in combination with
 """
 
 function initial_condition_convergence_test(x, t, equations::ShallowWaterEquations1D)
-  # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]
-  c  = 7.0
-  omega_x = 2.0 * pi * sqrt(2.0)
-  omega_t = 2.0 * pi
-
-  H = c + cos(omega_x * x[1]) * cos(omega_t * t)
-  v = 0.5
-  b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x[1])
-  return prim2cons(SVector(H, v, b), equations)
+    # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]
+    c = 7.0
+    omega_x = 2.0 * pi * sqrt(2.0)
+    omega_t = 2.0 * pi
+
+    H = c + cos(omega_x * x[1]) * cos(omega_t * t)
+    v = 0.5
+    b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x[1])
+    return prim2cons(SVector(H, v, b), equations)
 end
 
 """
@@ -98,31 +96,32 @@ This manufactured solution source term is specifically designed for the bottom t
 as defined in [`initial_condition_convergence_test`](@ref).
 """
 
-@inline function source_terms_convergence_test(u, x, t, equations::ShallowWaterEquations1D)
-  # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because
-  # this manufactured solution velocity is taken to be constant
-  c  = 7.0
-  omega_x = 2.0 * pi * sqrt(2.0)
-  omega_t = 2.0 * pi
-  omega_b = sqrt(2.0) * pi
-  v = 0.5
-
-  sinX, cosX = sincos(omega_x * x[1])
-  sinT, cosT = sincos(omega_t * t )
-
-  H = c + cosX * cosT
-  H_x = -omega_x * sinX * cosT
-  # this time derivative for the water height exploits that the bottom topography is
-  # fixed in time such that H_t = (h+b)_t = h_t + 0
-  H_t = -omega_t * cosX * sinT
-
-  # bottom topography and its spatial derivative
-  b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x[1])
-  b_x = 0.5 * omega_b * cos(omega_b * x[1])
-
-  du1 = H_t + v * (H_x - b_x)
-  du2 = v * du1 + equations.gravity * (H - b) * H_x
-  return SVector(du1, du2, 0.0)
+@inline function source_terms_convergence_test(u, x, t,
+                                               equations::ShallowWaterEquations1D)
+    # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because
+    # this manufactured solution velocity is taken to be constant
+    c = 7.0
+    omega_x = 2.0 * pi * sqrt(2.0)
+    omega_t = 2.0 * pi
+    omega_b = sqrt(2.0) * pi
+    v = 0.5
+
+    sinX, cosX = sincos(omega_x * x[1])
+    sinT, cosT = sincos(omega_t * t)
+
+    H = c + cosX * cosT
+    H_x = -omega_x * sinX * cosT
+    # this time derivative for the water height exploits that the bottom topography is
+    # fixed in time such that H_t = (h+b)_t = h_t + 0
+    H_t = -omega_t * cosX * sinT
+
+    # bottom topography and its spatial derivative
+    b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x[1])
+    b_x = 0.5 * omega_b * cos(omega_b * x[1])
+
+    du1 = H_t + v * (H_x - b_x)
+    du2 = v * du1 + equations.gravity * (H - b) * H_x
+    return SVector(du1, du2, 0.0)
 end
 
 """
@@ -132,17 +131,16 @@ A weak blast wave discontinuity useful for testing, e.g., total energy conservat
 Note for the shallow water equations to the total energy acts as a mathematical entropy function.
 """
 function initial_condition_weak_blast_wave(x, t, equations::ShallowWaterEquations1D)
+    inicenter = 0.7
+    x_norm = x[1] - inicenter
+    r = abs(x_norm)
 
-  inicenter = 0.7
-  x_norm = x[1] - inicenter
-  r = abs(x_norm)
+    # Calculate primitive variables
+    H = r > 0.5 ? 3.25 : 4.0
+    v = r > 0.5 ? 0.0 : 0.1882
+    b = sin(x[1]) # arbitrary continuous function
 
-  # Calculate primitive variables
-  H = r > 0.5 ? 3.25 : 4.0
-  v = r > 0.5 ? 0.0 : 0.1882
-  b = sin(x[1]) # arbitrary continuous function
-
-  return prim2cons(SVector(H, v, b), equations)
+    return prim2cons(SVector(H, v, b), equations)
 end
 
 """
@@ -164,33 +162,35 @@ For details see Section 9.2.5 of the book:
                                               surface_flux_function,
                                               equations::ShallowWaterEquations1D)
 
-  # create the "external" boundary solution state
-  u_boundary = SVector(u_inner[1],
-                       -u_inner[2],
-                       u_inner[3])
-
-  # calculate the boundary flux
-  if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    flux = surface_flux_function(u_inner, u_boundary, orientation_or_normal, equations)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    flux = surface_flux_function(u_boundary, u_inner, orientation_or_normal, equations)
-  end
-
-  return flux
+    # create the "external" boundary solution state
+    u_boundary = SVector(u_inner[1],
+                         -u_inner[2],
+                         u_inner[3])
+
+    # calculate the boundary flux
+    if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation_or_normal,
+                                     equations)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation_or_normal,
+                                     equations)
+    end
+
+    return flux
 end
 
 # Calculate 1D flux for a single point
 # Note, the bottom topography has no flux
 @inline function flux(u, orientation::Integer, equations::ShallowWaterEquations1D)
-  h, h_v, _ = u
-  v = velocity(u, equations)
+    h, h_v, _ = u
+    v = velocity(u, equations)
 
-  p = 0.5 * equations.gravity * h^2
+    p = 0.5 * equations.gravity * h^2
 
-  f1 = h_v
-  f2 = h_v * v + p
+    f1 = h_v
+    f2 = h_v * v + p
 
-  return SVector(f1, f2, zero(eltype(u)))
+    return SVector(f1, f2, zero(eltype(u)))
 end
 
 """
@@ -208,16 +208,16 @@ Further details are available in the paper:
 """
 @inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr, orientation::Integer,
                                                        equations::ShallowWaterEquations1D)
-  # Pull the necessary left and right state information
-  h_ll = waterheight(u_ll, equations)
-  b_rr = u_rr[3]
+    # Pull the necessary left and right state information
+    h_ll = waterheight(u_ll, equations)
+    b_rr = u_rr[3]
 
-  z = zero(eltype(u_ll))
+    z = zero(eltype(u_ll))
 
-  # Bottom gradient nonconservative term: (0, g h b_x, 0)
-  f = SVector(z, equations.gravity * h_ll * b_rr, z)
+    # Bottom gradient nonconservative term: (0, g h b_x, 0)
+    f = SVector(z, equations.gravity * h_ll * b_rr, z)
 
-  return f
+    return f
 end
 
 """
@@ -245,27 +245,27 @@ and for curvilinear 2D case in the paper:
 """
 @inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr, orientation::Integer,
                                                      equations::ShallowWaterEquations1D)
-  # Pull the necessary left and right state information
-  h_ll, _, b_ll = u_ll
-  h_rr, _, b_rr = u_rr
+    # Pull the necessary left and right state information
+    h_ll, _, b_ll = u_ll
+    h_rr, _, b_rr = u_rr
 
-  h_average = 0.5 * (h_ll + h_rr)
-  b_jump = b_rr - b_ll
+    h_average = 0.5 * (h_ll + h_rr)
+    b_jump = b_rr - b_ll
 
-  # Includes two parts:
-  #  (i)  Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid
-  #       cross-averaging across a discontinuous bottom topography
-  #  (ii) True surface part that uses `h_average` and `b_jump` to handle discontinuous bathymetry
-  z = zero(eltype(u_ll))
+    # Includes two parts:
+    #  (i)  Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid
+    #       cross-averaging across a discontinuous bottom topography
+    #  (ii) True surface part that uses `h_average` and `b_jump` to handle discontinuous bathymetry
+    z = zero(eltype(u_ll))
 
-  f = SVector(z,
-              equations.gravity * h_ll * b_ll + equations.gravity * h_average * b_jump,
-              z)
+    f = SVector(z,
+                equations.gravity * h_ll * b_ll +
+                equations.gravity * h_average * b_jump,
+                z)
 
-  return f
+    return f
 end
 
-
 """
     flux_nonconservative_audusse_etal(u_ll, u_rr, orientation::Integer,
                                       equations::ShallowWaterEquations1D)
@@ -287,26 +287,26 @@ Further details on the hydrostatic reconstruction and its motivation can be foun
 @inline function flux_nonconservative_audusse_etal(u_ll, u_rr,
                                                    orientation::Integer,
                                                    equations::ShallowWaterEquations1D)
-  # Pull the water height and bottom topography on the left
-  h_ll, _, b_ll = u_ll
-
-  # Create the hydrostatic reconstruction for the left solution state
-  u_ll_star, _ = hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations)
-
-  # Copy the reconstructed water height for easier to read code
-  h_ll_star = u_ll_star[1]
-
-  z = zero(eltype(u_ll))
-  # Includes two parts:
-  #   (i)  Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid
-  #        cross-averaging across a discontinuous bottom topography
-  #   (ii) True surface part that uses `h_ll` and `h_ll_star` to handle discontinuous bathymetry
-  return SVector(z,
-                 equations.gravity * h_ll * b_ll + equations.gravity * ( h_ll^2 - h_ll_star^2 ),
-                 z)
+    # Pull the water height and bottom topography on the left
+    h_ll, _, b_ll = u_ll
+
+    # Create the hydrostatic reconstruction for the left solution state
+    u_ll_star, _ = hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations)
+
+    # Copy the reconstructed water height for easier to read code
+    h_ll_star = u_ll_star[1]
+
+    z = zero(eltype(u_ll))
+    # Includes two parts:
+    #   (i)  Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid
+    #        cross-averaging across a discontinuous bottom topography
+    #   (ii) True surface part that uses `h_ll` and `h_ll_star` to handle discontinuous bathymetry
+    return SVector(z,
+                   equations.gravity * h_ll * b_ll +
+                   equations.gravity * (h_ll^2 - h_ll_star^2),
+                   z)
 end
 
-
 """
     flux_fjordholm_etal(u_ll, u_rr, orientation,
                         equations::ShallowWaterEquations1D)
@@ -320,23 +320,24 @@ Details are available in Eq. (4.1) in the paper:
   Well-balanced and energy stable schemes for the shallow water equations with discontinuous topography
   [DOI: 10.1016/j.jcp.2011.03.042](https://doi.org/10.1016/j.jcp.2011.03.042)
 """
-@inline function flux_fjordholm_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations1D)
-  # Unpack left and right state
-  h_ll = waterheight(u_ll, equations)
-  v_ll = velocity(u_ll, equations)
-  h_rr = waterheight(u_rr, equations)
-  v_rr = velocity(u_rr, equations)
-
-  # Average each factor of products in flux
-  h_avg = 0.5 * (h_ll   + h_rr  )
-  v_avg = 0.5 * (v_ll  + v_rr )
-  p_avg = 0.25 * equations.gravity * (h_ll^2 + h_rr^2)
-
-  # Calculate fluxes depending on orientation
-  f1 = h_avg * v_avg
-  f2 = f1 * v_avg + p_avg
-
-  return SVector(f1, f2, zero(eltype(u_ll)))
+@inline function flux_fjordholm_etal(u_ll, u_rr, orientation::Integer,
+                                     equations::ShallowWaterEquations1D)
+    # Unpack left and right state
+    h_ll = waterheight(u_ll, equations)
+    v_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v_rr = velocity(u_rr, equations)
+
+    # Average each factor of products in flux
+    h_avg = 0.5 * (h_ll + h_rr)
+    v_avg = 0.5 * (v_ll + v_rr)
+    p_avg = 0.25 * equations.gravity * (h_ll^2 + h_rr^2)
+
+    # Calculate fluxes depending on orientation
+    f1 = h_avg * v_avg
+    f2 = f1 * v_avg + p_avg
+
+    return SVector(f1, f2, zero(eltype(u_ll)))
 end
 
 """
@@ -353,27 +354,27 @@ Further details are available in Theorem 1 of the paper:
   shallow water equations on unstructured curvilinear meshes with discontinuous bathymetry
   [DOI: 10.1016/j.jcp.2017.03.036](https://doi.org/10.1016/j.jcp.2017.03.036)
 """
-@inline function flux_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations1D)
-  # Unpack left and right state
-  h_ll, h_v_ll, _ = u_ll
-  h_rr, h_v_rr, _ = u_rr
+@inline function flux_wintermeyer_etal(u_ll, u_rr, orientation::Integer,
+                                       equations::ShallowWaterEquations1D)
+    # Unpack left and right state
+    h_ll, h_v_ll, _ = u_ll
+    h_rr, h_v_rr, _ = u_rr
 
-  # Get the velocities on either side
-  v_ll = velocity(u_ll, equations)
-  v_rr = velocity(u_rr, equations)
+    # Get the velocities on either side
+    v_ll = velocity(u_ll, equations)
+    v_rr = velocity(u_rr, equations)
 
-  # Average each factor of products in flux
-  v_avg = 0.5 * (v_ll + v_rr)
-  p_avg = 0.5 * equations.gravity * h_ll * h_rr
+    # Average each factor of products in flux
+    v_avg = 0.5 * (v_ll + v_rr)
+    p_avg = 0.5 * equations.gravity * h_ll * h_rr
 
-  # Calculate fluxes depending on orientation
-  f1 = 0.5 * (h_v_ll + h_v_rr)
-  f2 = f1 * v_avg + p_avg
+    # Calculate fluxes depending on orientation
+    f1 = 0.5 * (h_v_ll + h_v_rr)
+    f2 = f1 * v_avg + p_avg
 
-  return SVector(f1, f2, zero(eltype(u_ll)))
+    return SVector(f1, f2, zero(eltype(u_ll)))
 end
 
-
 """
     hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, orientation::Integer,
                                             equations::ShallowWaterEquations1D)
@@ -388,203 +389,192 @@ Further details on this hydrostatic reconstruction and its motivation can be fou
   A fast and stable well-balanced scheme with hydrostatic reconstruction for shallow water flows
   [DOI: 10.1137/S1064827503431090](https://doi.org/10.1137/S1064827503431090)
 """
-@inline function hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations::ShallowWaterEquations1D)
-  # Unpack left and right water heights and bottom topographies
-  h_ll, _, b_ll = u_ll
-  h_rr, _, b_rr = u_rr
+@inline function hydrostatic_reconstruction_audusse_etal(u_ll, u_rr,
+                                                         equations::ShallowWaterEquations1D)
+    # Unpack left and right water heights and bottom topographies
+    h_ll, _, b_ll = u_ll
+    h_rr, _, b_rr = u_rr
 
-  # Get the velocities on either side
-  v1_ll = velocity(u_ll, equations)
-  v1_rr = velocity(u_rr, equations)
+    # Get the velocities on either side
+    v1_ll = velocity(u_ll, equations)
+    v1_rr = velocity(u_rr, equations)
 
-  # Compute the reconstructed water heights
-  h_ll_star = max(zero(h_ll) , h_ll + b_ll - max(b_ll, b_rr) )
-  h_rr_star = max(zero(h_rr) , h_rr + b_rr - max(b_ll, b_rr) )
+    # Compute the reconstructed water heights
+    h_ll_star = max(zero(h_ll), h_ll + b_ll - max(b_ll, b_rr))
+    h_rr_star = max(zero(h_rr), h_rr + b_rr - max(b_ll, b_rr))
 
-  # Create the conservative variables using the reconstruted water heights
-  u_ll_star = SVector( h_ll_star , h_ll_star * v1_ll , b_ll )
-  u_rr_star = SVector( h_rr_star , h_rr_star * v1_rr , b_rr )
+    # Create the conservative variables using the reconstruted water heights
+    u_ll_star = SVector(h_ll_star, h_ll_star * v1_ll, b_ll)
+    u_rr_star = SVector(h_rr_star, h_rr_star * v1_rr, b_rr)
 
-  return u_ll_star, u_rr_star
+    return u_ll_star, u_rr_star
 end
 
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the
 # maximum velocity magnitude plus the maximum speed of sound
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations1D)
-  # Get the velocity quantities
-  v_ll = velocity(u_ll, equations)
-  v_rr = velocity(u_rr, equations)
-
-  # Calculate the wave celerity on the left and right
-  h_ll = waterheight(u_ll, equations)
-  h_rr = waterheight(u_rr, equations)
-  c_ll = sqrt(equations.gravity * h_ll)
-  c_rr = sqrt(equations.gravity * h_rr)
-
-  return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
-end
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::ShallowWaterEquations1D)
+    # Get the velocity quantities
+    v_ll = velocity(u_ll, equations)
+    v_rr = velocity(u_rr, equations)
 
+    # Calculate the wave celerity on the left and right
+    h_ll = waterheight(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    c_ll = sqrt(equations.gravity * h_ll)
+    c_rr = sqrt(equations.gravity * h_rr)
+
+    return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
+end
 
 # Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the bottom topography
-@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, orientation_or_normal_direction,
+@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr,
+                                                              orientation_or_normal_direction,
                                                               equations::ShallowWaterEquations1D)
-  λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, equations)
-  diss = -0.5 * λ * (u_rr - u_ll)
-  return SVector(diss[1], diss[2], zero(eltype(u_ll)))
+    λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction,
+                                  equations)
+    diss = -0.5 * λ * (u_rr - u_ll)
+    return SVector(diss[1], diss[2], zero(eltype(u_ll)))
 end
 
-
 # Specialized `FluxHLL` to avoid spurious dissipation in the bottom topography
 @inline function (numflux::FluxHLL)(u_ll, u_rr, orientation_or_normal_direction,
                                     equations::ShallowWaterEquations1D)
-  λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction, equations)
-
-  if λ_min >= 0 && λ_max >= 0
-    return flux(u_ll, orientation_or_normal_direction, equations)
-  elseif λ_max <= 0 && λ_min <= 0
-    return flux(u_rr, orientation_or_normal_direction, equations)
-  else
-    f_ll = flux(u_ll, orientation_or_normal_direction, equations)
-    f_rr = flux(u_rr, orientation_or_normal_direction, equations)
-    inv_λ_max_minus_λ_min = inv(λ_max - λ_min)
-    factor_ll = λ_max * inv_λ_max_minus_λ_min
-    factor_rr = λ_min * inv_λ_max_minus_λ_min
-    factor_diss = λ_min * λ_max * inv_λ_max_minus_λ_min
-    diss = u_rr - u_ll
-    return factor_ll * f_ll - factor_rr * f_rr + factor_diss * SVector(diss[1], diss[2], zero(eltype(u_ll)))
-  end
+    λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction,
+                                         equations)
+
+    if λ_min >= 0 && λ_max >= 0
+        return flux(u_ll, orientation_or_normal_direction, equations)
+    elseif λ_max <= 0 && λ_min <= 0
+        return flux(u_rr, orientation_or_normal_direction, equations)
+    else
+        f_ll = flux(u_ll, orientation_or_normal_direction, equations)
+        f_rr = flux(u_rr, orientation_or_normal_direction, equations)
+        inv_λ_max_minus_λ_min = inv(λ_max - λ_min)
+        factor_ll = λ_max * inv_λ_max_minus_λ_min
+        factor_rr = λ_min * inv_λ_max_minus_λ_min
+        factor_diss = λ_min * λ_max * inv_λ_max_minus_λ_min
+        diss = u_rr - u_ll
+        return factor_ll * f_ll - factor_rr * f_rr +
+               factor_diss * SVector(diss[1], diss[2], zero(eltype(u_ll)))
+    end
 end
 
-
 # Calculate minimum and maximum wave speeds for HLL-type fluxes
 @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
                                      equations::ShallowWaterEquations1D)
-  h_ll = waterheight(u_ll, equations)
-  v_ll = velocity(u_ll, equations)
-  h_rr = waterheight(u_rr, equations)
-  v_rr = velocity(u_rr, equations)
+    h_ll = waterheight(u_ll, equations)
+    v_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v_rr = velocity(u_rr, equations)
 
-  λ_min = v_ll - sqrt(equations.gravity * h_ll)
-  λ_max = v_rr + sqrt(equations.gravity * h_rr)
+    λ_min = v_ll - sqrt(equations.gravity * h_ll)
+    λ_max = v_rr + sqrt(equations.gravity * h_rr)
 
-  return λ_min, λ_max
+    return λ_min, λ_max
 end
 
-
 @inline function max_abs_speeds(u, equations::ShallowWaterEquations1D)
-  h = waterheight(u, equations)
-  v = velocity(u, equations)
+    h = waterheight(u, equations)
+    v = velocity(u, equations)
 
-  c = equations.gravity * sqrt(h)
-  return (abs(v) + c,)
+    c = equations.gravity * sqrt(h)
+    return (abs(v) + c,)
 end
 
-
 # Helper function to extract the velocity vector from the conservative variables
 @inline function velocity(u, equations::ShallowWaterEquations1D)
-  h, h_v, _ = u
+    h, h_v, _ = u
 
-  v = h_v / h
+    v = h_v / h
 
-  return v
+    return v
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::ShallowWaterEquations1D)
-  h, _, b = u
+    h, _, b = u
 
-  H = h + b
-  v = velocity(u, equations)
-  return SVector(H, v, b)
+    H = h + b
+    v = velocity(u, equations)
+    return SVector(H, v, b)
 end
 
-
 # Convert conservative variables to entropy
 # Note, only the first two are the entropy variables, the third entry still
 # just carries the bottom topography values for convenience
 @inline function cons2entropy(u, equations::ShallowWaterEquations1D)
-  h, h_v, b = u
+    h, h_v, b = u
 
-  v = velocity(u, equations)
+    v = velocity(u, equations)
 
-  w1 = equations.gravity * (h + b) - 0.5 * v^2
-  w2 = v
+    w1 = equations.gravity * (h + b) - 0.5 * v^2
+    w2 = v
 
-  return SVector(w1, w2, b)
+    return SVector(w1, w2, b)
 end
 
-
 # Convert entropy variables to conservative
 @inline function entropy2cons(w, equations::ShallowWaterEquations1D)
-  w1, w2, b = w
+    w1, w2, b = w
 
-  h = (w1 + 0.5 * w2^2) / equations.gravity - b
-  h_v = h * w2
-  return SVector(h, h_v, b)
+    h = (w1 + 0.5 * w2^2) / equations.gravity - b
+    h_v = h * w2
+    return SVector(h, h_v, b)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::ShallowWaterEquations1D)
-  H, v, b = prim
+    H, v, b = prim
 
-  h = H - b
-  h_v = h * v
+    h = H - b
+    h_v = h * v
 
-  return SVector(h, h_v, b)
+    return SVector(h, h_v, b)
 end
 
-
 @inline function waterheight(u, equations::ShallowWaterEquations1D)
-  return u[1]
+    return u[1]
 end
 
-
 @inline function pressure(u, equations::ShallowWaterEquations1D)
-  h = waterheight(u, equations)
-  p = 0.5 * equations.gravity * h^2
-  return p
+    h = waterheight(u, equations)
+    p = 0.5 * equations.gravity * h^2
+    return p
 end
 
-
 @inline function waterheight_pressure(u, equations::ShallowWaterEquations1D)
-  return waterheight(u, equations) * pressure(u, equations)
+    return waterheight(u, equations) * pressure(u, equations)
 end
 
-
 # Entropy function for the shallow water equations is the total energy
-@inline entropy(cons, equations::ShallowWaterEquations1D) = energy_total(cons, equations)
-
+@inline function entropy(cons, equations::ShallowWaterEquations1D)
+    energy_total(cons, equations)
+end
 
 # Calculate total energy for a conservative state `cons`
 @inline function energy_total(cons, equations::ShallowWaterEquations1D)
-  h, h_v, b = cons
+    h, h_v, b = cons
 
-  e = (h_v^2) / (2 * h) + 0.5 * equations.gravity * h^2 + equations.gravity * h * b
-  return e
+    e = (h_v^2) / (2 * h) + 0.5 * equations.gravity * h^2 + equations.gravity * h * b
+    return e
 end
 
-
 # Calculate kinetic energy for a conservative state `cons`
 @inline function energy_kinetic(u, equations::ShallowWaterEquations1D)
-  h, h_v, _ = u
-  return (h_v^2) / (2 * h)
+    h, h_v, _ = u
+    return (h_v^2) / (2 * h)
 end
 
-
 # Calculate potential energy for a conservative state `cons`
 @inline function energy_internal(cons, equations::ShallowWaterEquations1D)
-  return energy_total(cons, equations) - energy_kinetic(cons, equations)
+    return energy_total(cons, equations) - energy_kinetic(cons, equations)
 end
 
-
 # Calculate the error for the "lake-at-rest" test case where H = h+b should
 # be a constant value over time
 @inline function lake_at_rest_error(u, equations::ShallowWaterEquations1D)
-  h, _, b = u
-  return abs(equations.H0 - (h + b))
+    h, _, b = u
+    return abs(equations.H0 - (h + b))
 end
-
 end # @muladd
diff --git a/src/equations/shallow_water_2d.jl b/src/equations/shallow_water_2d.jl
index b07fbfc739e..f9ebbd597f9 100644
--- a/src/equations/shallow_water_2d.jl
+++ b/src/equations/shallow_water_2d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 @doc raw"""
     ShallowWaterEquations2D(gravity, H0)
@@ -47,27 +47,25 @@ References for the SWE are many but a good introduction is available in Chapter
   Finite Volume Methods for Hyperbolic Problems
   [DOI: 10.1017/CBO9780511791253](https://doi.org/10.1017/CBO9780511791253)
 """
-struct ShallowWaterEquations2D{RealT<:Real} <: AbstractShallowWaterEquations{2, 4}
-  gravity::RealT # gravitational constant
-  H0::RealT      # constant "lake-at-rest" total water height
+struct ShallowWaterEquations2D{RealT <: Real} <: AbstractShallowWaterEquations{2, 4}
+    gravity::RealT # gravitational constant
+    H0::RealT      # constant "lake-at-rest" total water height
 end
 
 # Allow for flexibility to set the gravitational constant within an elixir depending on the
 # application where `gravity_constant=1.0` or `gravity_constant=9.81` are common values.
 # The reference total water height H0 defaults to 0.0 but is used for the "lake-at-rest"
 # well-balancedness test cases
-function ShallowWaterEquations2D(; gravity_constant, H0=0.0)
-  ShallowWaterEquations2D(gravity_constant, H0)
+function ShallowWaterEquations2D(; gravity_constant, H0 = 0.0)
+    ShallowWaterEquations2D(gravity_constant, H0)
 end
 
-
 have_nonconservative_terms(::ShallowWaterEquations2D) = True()
 varnames(::typeof(cons2cons), ::ShallowWaterEquations2D) = ("h", "h_v1", "h_v2", "b")
 # Note, we use the total water height, H = h + b, as the first primitive variable for easier
 # visualization and setting initial conditions
 varnames(::typeof(cons2prim), ::ShallowWaterEquations2D) = ("H", "v1", "v2", "b")
 
-
 # Set initial conditions at physical location `x` for time `t`
 """
     initial_condition_convergence_test(x, t, equations::ShallowWaterEquations2D)
@@ -77,18 +75,18 @@ A smooth initial condition used for convergence tests in combination with
 (and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
 """
 function initial_condition_convergence_test(x, t, equations::ShallowWaterEquations2D)
-  # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2
-  c  = 7.0
-  omega_x = 2.0 * pi * sqrt(2.0)
-  omega_t = 2.0 * pi
+    # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2
+    c = 7.0
+    omega_x = 2.0 * pi * sqrt(2.0)
+    omega_t = 2.0 * pi
 
-  x1, x2 = x
+    x1, x2 = x
 
-  H = c + cos(omega_x * x1) * sin(omega_x * x2) * cos(omega_t * t)
-  v1 = 0.5
-  v2 = 1.5
-  b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x1) + 0.5 * sin(sqrt(2.0) * pi * x2)
-  return prim2cons(SVector(H, v1, v2, b), equations)
+    H = c + cos(omega_x * x1) * sin(omega_x * x2) * cos(omega_t * t)
+    v1 = 0.5
+    v2 = 1.5
+    b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x1) + 0.5 * sin(sqrt(2.0) * pi * x2)
+    return prim2cons(SVector(H, v1, v2, b), equations)
 end
 
 """
@@ -102,42 +100,42 @@ This manufactured solution source term is specifically designed for the bottom t
 `b(x,y) = 2 + 0.5 * sin(sqrt(2)*pi*x) + 0.5 * sin(sqrt(2)*pi*y)`
 as defined in [`initial_condition_convergence_test`](@ref).
 """
-@inline function source_terms_convergence_test(u, x, t, equations::ShallowWaterEquations2D)
-  # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because
-  # this manufactured solution velocities are taken to be constants
-  c  = 7.0
-  omega_x = 2.0 * pi * sqrt(2.0)
-  omega_t = 2.0 * pi
-  omega_b = sqrt(2.0) * pi
-  v1 = 0.5
-  v2 = 1.5
-
-  x1, x2 = x
-
-  sinX, cosX = sincos(omega_x * x1)
-  sinY, cosY = sincos(omega_x * x2)
-  sinT, cosT = sincos(omega_t * t )
-
-  H = c + cosX * sinY * cosT
-  H_x = -omega_x * sinX * sinY * cosT
-  H_y =  omega_x * cosX * cosY * cosT
-  # this time derivative for the water height exploits that the bottom topography is
-  # fixed in time such that H_t = (h+b)_t = h_t + 0
-  H_t = -omega_t * cosX * sinY * sinT
-
-  # bottom topography and its gradient
-  b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x1) + 0.5 * sin(sqrt(2.0) * pi * x2)
-  tmp1 = 0.5 * omega_b
-  b_x = tmp1 * cos(omega_b * x1)
-  b_y = tmp1 * cos(omega_b * x2)
-
-  du1 = H_t + v1 * (H_x - b_x) + v2 * (H_y - b_y)
-  du2 = v1 * du1 + equations.gravity * (H - b) * H_x
-  du3 = v2 * du1 + equations.gravity * (H - b) * H_y
-  return SVector(du1, du2, du3, 0.0)
+@inline function source_terms_convergence_test(u, x, t,
+                                               equations::ShallowWaterEquations2D)
+    # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because
+    # this manufactured solution velocities are taken to be constants
+    c = 7.0
+    omega_x = 2.0 * pi * sqrt(2.0)
+    omega_t = 2.0 * pi
+    omega_b = sqrt(2.0) * pi
+    v1 = 0.5
+    v2 = 1.5
+
+    x1, x2 = x
+
+    sinX, cosX = sincos(omega_x * x1)
+    sinY, cosY = sincos(omega_x * x2)
+    sinT, cosT = sincos(omega_t * t)
+
+    H = c + cosX * sinY * cosT
+    H_x = -omega_x * sinX * sinY * cosT
+    H_y = omega_x * cosX * cosY * cosT
+    # this time derivative for the water height exploits that the bottom topography is
+    # fixed in time such that H_t = (h+b)_t = h_t + 0
+    H_t = -omega_t * cosX * sinY * sinT
+
+    # bottom topography and its gradient
+    b = 2.0 + 0.5 * sin(sqrt(2.0) * pi * x1) + 0.5 * sin(sqrt(2.0) * pi * x2)
+    tmp1 = 0.5 * omega_b
+    b_x = tmp1 * cos(omega_b * x1)
+    b_y = tmp1 * cos(omega_b * x2)
+
+    du1 = H_t + v1 * (H_x - b_x) + v2 * (H_y - b_y)
+    du2 = v1 * du1 + equations.gravity * (H - b) * H_x
+    du3 = v2 * du1 + equations.gravity * (H - b) * H_y
+    return SVector(du1, du2, du3, 0.0)
 end
 
-
 """
     initial_condition_weak_blast_wave(x, t, equations::ShallowWaterEquations2D)
 
@@ -145,24 +143,23 @@ A weak blast wave discontinuity useful for testing, e.g., total energy conservat
 Note for the shallow water equations to the total energy acts as a mathematical entropy function.
 """
 function initial_condition_weak_blast_wave(x, t, equations::ShallowWaterEquations2D)
-  # Set up polar coordinates
-  inicenter = SVector(0.7, 0.7)
-  x_norm = x[1] - inicenter[1]
-  y_norm = x[2] - inicenter[2]
-  r = sqrt(x_norm^2 + y_norm^2)
-  phi = atan(y_norm, x_norm)
-  sin_phi, cos_phi = sincos(phi)
+    # Set up polar coordinates
+    inicenter = SVector(0.7, 0.7)
+    x_norm = x[1] - inicenter[1]
+    y_norm = x[2] - inicenter[2]
+    r = sqrt(x_norm^2 + y_norm^2)
+    phi = atan(y_norm, x_norm)
+    sin_phi, cos_phi = sincos(phi)
 
-  # Calculate primitive variables
-  H = r > 0.5 ? 3.25 : 4.0
-  v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi
-  v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi
-  b = 0.0 # by default assume there is no bottom topography
+    # Calculate primitive variables
+    H = r > 0.5 ? 3.25 : 4.0
+    v1 = r > 0.5 ? 0.0 : 0.1882 * cos_phi
+    v2 = r > 0.5 ? 0.0 : 0.1882 * sin_phi
+    b = 0.0 # by default assume there is no bottom topography
 
-  return prim2cons(SVector(H, v1, v2, b), equations)
+    return prim2cons(SVector(H, v1, v2, b), equations)
 end
 
-
 """
     boundary_condition_slip_wall(u_inner, normal_direction, x, t, surface_flux_function,
                                  equations::ShallowWaterEquations2D)
@@ -179,25 +176,24 @@ For details see Section 9.2.5 of the book:
                                               x, t,
                                               surface_flux_function,
                                               equations::ShallowWaterEquations2D)
-  # normalize the outward pointing direction
-  normal = normal_direction / norm(normal_direction)
+    # normalize the outward pointing direction
+    normal = normal_direction / norm(normal_direction)
 
-  # compute the normal velocity
-  u_normal = normal[1] * u_inner[2] + normal[2] * u_inner[3]
+    # compute the normal velocity
+    u_normal = normal[1] * u_inner[2] + normal[2] * u_inner[3]
 
-  # create the "external" boundary solution state
-  u_boundary = SVector(u_inner[1],
-                       u_inner[2] - 2.0 * u_normal * normal[1],
-                       u_inner[3] - 2.0 * u_normal * normal[2],
-                       u_inner[4])
+    # create the "external" boundary solution state
+    u_boundary = SVector(u_inner[1],
+                         u_inner[2] - 2.0 * u_normal * normal[1],
+                         u_inner[3] - 2.0 * u_normal * normal[2],
+                         u_inner[4])
 
-  # calculate the boundary flux
-  flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations)
+    # calculate the boundary flux
+    flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations)
 
-  return flux
+    return flux
 end
 
-
 """
     boundary_condition_slip_wall(u_inner, orientation, direction, x, t,
                                  surface_flux_function, equations::ShallowWaterEquations2D)
@@ -208,56 +204,55 @@ Should be used together with [`TreeMesh`](@ref).
                                               direction, x, t,
                                               surface_flux_function,
                                               equations::ShallowWaterEquations2D)
-  ## get the appropriate normal vector from the orientation
-  if orientation == 1
-    u_boundary = SVector(u_inner[1], -u_inner[2], u_inner[3], u_inner[4])
-  else # orientation == 2
-    u_boundary = SVector(u_inner[1], u_inner[2], -u_inner[3], u_inner[4])
-  end
+    ## get the appropriate normal vector from the orientation
+    if orientation == 1
+        u_boundary = SVector(u_inner[1], -u_inner[2], u_inner[3], u_inner[4])
+    else # orientation == 2
+        u_boundary = SVector(u_inner[1], u_inner[2], -u_inner[3], u_inner[4])
+    end
 
-  # compute and return the flux using `boundary_condition_slip_wall` routine above
-  flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
+    # compute and return the flux using `boundary_condition_slip_wall` routine above
+    flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
 
-  return flux
+    return flux
 end
 
 # Calculate 1D flux for a single point
 # Note, the bottom topography has no flux
 @inline function flux(u, orientation::Integer, equations::ShallowWaterEquations2D)
-  h, h_v1, h_v2, _ = u
-  v1, v2 = velocity(u, equations)
-
-  p = 0.5 * equations.gravity * h^2
-  if orientation == 1
-    f1 = h_v1
-    f2 = h_v1 * v1 + p
-    f3 = h_v1 * v2
-  else
-    f1 = h_v2
-    f2 = h_v2 * v1
-    f3 = h_v2 * v2 + p
-  end
-  return SVector(f1, f2, f3, zero(eltype(u)))
+    h, h_v1, h_v2, _ = u
+    v1, v2 = velocity(u, equations)
+
+    p = 0.5 * equations.gravity * h^2
+    if orientation == 1
+        f1 = h_v1
+        f2 = h_v1 * v1 + p
+        f3 = h_v1 * v2
+    else
+        f1 = h_v2
+        f2 = h_v2 * v1
+        f3 = h_v2 * v2 + p
+    end
+    return SVector(f1, f2, f3, zero(eltype(u)))
 end
 
-
 # Calculate 1D flux for a single point in the normal direction
 # Note, this directional vector is not normalized and the bottom topography has no flux
-@inline function flux(u, normal_direction::AbstractVector, equations::ShallowWaterEquations2D)
-  h = waterheight(u, equations)
-  v1, v2 = velocity(u, equations)
+@inline function flux(u, normal_direction::AbstractVector,
+                      equations::ShallowWaterEquations2D)
+    h = waterheight(u, equations)
+    v1, v2 = velocity(u, equations)
 
-  v_normal = v1 * normal_direction[1] + v2 * normal_direction[2]
-  h_v_normal = h * v_normal
-  p = 0.5 * equations.gravity * h^2
+    v_normal = v1 * normal_direction[1] + v2 * normal_direction[2]
+    h_v_normal = h * v_normal
+    p = 0.5 * equations.gravity * h^2
 
-  f1 = h_v_normal
-  f2 = h_v_normal * v1 + p * normal_direction[1]
-  f3 = h_v_normal * v2 + p * normal_direction[2]
-  return SVector(f1, f2, f3, zero(eltype(u)))
+    f1 = h_v_normal
+    f2 = h_v_normal * v1 + p * normal_direction[1]
+    f3 = h_v_normal * v2 + p * normal_direction[2]
+    return SVector(f1, f2, f3, zero(eltype(u)))
 end
 
-
 """
     flux_nonconservative_wintermeyer_etal(u_ll, u_rr, orientation::Integer,
                                           equations::ShallowWaterEquations2D)
@@ -282,36 +277,35 @@ Further details are available in the paper:
 """
 @inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr, orientation::Integer,
                                                        equations::ShallowWaterEquations2D)
-  # Pull the necessary left and right state information
-  h_ll = waterheight(u_ll, equations)
-  b_rr = u_rr[4]
+    # Pull the necessary left and right state information
+    h_ll = waterheight(u_ll, equations)
+    b_rr = u_rr[4]
 
-  z = zero(eltype(u_ll))
-  # Bottom gradient nonconservative term: (0, g h b_x, g h b_y, 0)
-  if orientation == 1
-    f = SVector(z, equations.gravity * h_ll * b_rr, z, z)
-  else # orientation == 2
-    f = SVector(z, z, equations.gravity * h_ll * b_rr, z)
-  end
-  return f
+    z = zero(eltype(u_ll))
+    # Bottom gradient nonconservative term: (0, g h b_x, g h b_y, 0)
+    if orientation == 1
+        f = SVector(z, equations.gravity * h_ll * b_rr, z, z)
+    else # orientation == 2
+        f = SVector(z, z, equations.gravity * h_ll * b_rr, z)
+    end
+    return f
 end
 
 @inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr,
                                                        normal_direction_ll::AbstractVector,
                                                        normal_direction_average::AbstractVector,
                                                        equations::ShallowWaterEquations2D)
-  # Pull the necessary left and right state information
-  h_ll = waterheight(u_ll, equations)
-  b_rr = u_rr[4]
-  # Note this routine only uses the `normal_direction_average` and the average of the
-  # bottom topography to get a quadratic split form DG gradient on curved elements
-  return SVector(zero(eltype(u_ll)),
-                 normal_direction_average[1] * equations.gravity * h_ll * b_rr,
-                 normal_direction_average[2] * equations.gravity * h_ll * b_rr,
-                 zero(eltype(u_ll)))
+    # Pull the necessary left and right state information
+    h_ll = waterheight(u_ll, equations)
+    b_rr = u_rr[4]
+    # Note this routine only uses the `normal_direction_average` and the average of the
+    # bottom topography to get a quadratic split form DG gradient on curved elements
+    return SVector(zero(eltype(u_ll)),
+                   normal_direction_average[1] * equations.gravity * h_ll * b_rr,
+                   normal_direction_average[2] * equations.gravity * h_ll * b_rr,
+                   zero(eltype(u_ll)))
 end
 
-
 """
     flux_nonconservative_fjordholm_etal(u_ll, u_rr, orientation::Integer,
                                         equations::ShallowWaterEquations2D)
@@ -346,61 +340,62 @@ and for curvilinear 2D case in the paper:
 """
 @inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr, orientation::Integer,
                                                      equations::ShallowWaterEquations2D)
-  # Pull the necessary left and right state information
-  h_ll, _, _, b_ll = u_ll
-  h_rr, _, _, b_rr = u_rr
-
-  h_average = 0.5 * (h_ll + h_rr)
-  b_jump = b_rr - b_ll
-
-  # Includes two parts:
-  #   (i)  Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid
-  #        cross-averaging across a discontinuous bottom topography
-  #   (ii) True surface part that uses `h_average` and `b_jump` to handle discontinuous bathymetry
-  z = zero(eltype(u_ll))
-  if orientation == 1
-    f = SVector(z,
-                equations.gravity * h_ll * b_ll + equations.gravity * h_average * b_jump,
-                z, z)
-  else # orientation == 2
-    f = SVector(z, z,
-                equations.gravity * h_ll * b_ll + equations.gravity * h_average * b_jump,
-                z)
-  end
-
-  return f
+    # Pull the necessary left and right state information
+    h_ll, _, _, b_ll = u_ll
+    h_rr, _, _, b_rr = u_rr
+
+    h_average = 0.5 * (h_ll + h_rr)
+    b_jump = b_rr - b_ll
+
+    # Includes two parts:
+    #   (i)  Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid
+    #        cross-averaging across a discontinuous bottom topography
+    #   (ii) True surface part that uses `h_average` and `b_jump` to handle discontinuous bathymetry
+    z = zero(eltype(u_ll))
+    if orientation == 1
+        f = SVector(z,
+                    equations.gravity * h_ll * b_ll +
+                    equations.gravity * h_average * b_jump,
+                    z, z)
+    else # orientation == 2
+        f = SVector(z, z,
+                    equations.gravity * h_ll * b_ll +
+                    equations.gravity * h_average * b_jump,
+                    z)
+    end
+
+    return f
 end
 
 @inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr,
                                                      normal_direction_ll::AbstractVector,
                                                      normal_direction_average::AbstractVector,
                                                      equations::ShallowWaterEquations2D)
-  # Pull the necessary left and right state information
-  h_ll, _, _, b_ll = u_ll
-  h_rr, _, _, b_rr = u_rr
+    # Pull the necessary left and right state information
+    h_ll, _, _, b_ll = u_ll
+    h_rr, _, _, b_rr = u_rr
 
-  # Comes in two parts:
-  #   (i)  Diagonal (consistent) term from the volume flux that uses `normal_direction_average`
-  #        but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography
+    # Comes in two parts:
+    #   (i)  Diagonal (consistent) term from the volume flux that uses `normal_direction_average`
+    #        but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography
 
-  f2 = normal_direction_average[1] * equations.gravity * h_ll * b_ll
-  f3 = normal_direction_average[2] * equations.gravity * h_ll * b_ll
+    f2 = normal_direction_average[1] * equations.gravity * h_ll * b_ll
+    f3 = normal_direction_average[2] * equations.gravity * h_ll * b_ll
 
-  #   (ii) True surface part that uses `normal_direction_ll`, `h_average` and `b_jump`
-  #        to handle discontinuous bathymetry
-  h_average = 0.5 * (h_ll + h_rr)
-  b_jump = b_rr - b_ll
+    #   (ii) True surface part that uses `normal_direction_ll`, `h_average` and `b_jump`
+    #        to handle discontinuous bathymetry
+    h_average = 0.5 * (h_ll + h_rr)
+    b_jump = b_rr - b_ll
 
-  f2 += normal_direction_ll[1] * equations.gravity * h_average * b_jump
-  f3 += normal_direction_ll[2] * equations.gravity * h_average * b_jump
+    f2 += normal_direction_ll[1] * equations.gravity * h_average * b_jump
+    f3 += normal_direction_ll[2] * equations.gravity * h_average * b_jump
 
-  # First and last equations do not have a nonconservative flux
-  f1 = f4 = zero(eltype(u_ll))
+    # First and last equations do not have a nonconservative flux
+    f1 = f4 = zero(eltype(u_ll))
 
-  return SVector(f1, f2, f3, f4)
+    return SVector(f1, f2, f3, f4)
 end
 
-
 """
     hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, orientation_or_normal_direction,
                                             equations::ShallowWaterEquations2D)
@@ -415,27 +410,27 @@ Further details for the hydrostatic reconstruction and its motivation can be fou
   A fast and stable well-balanced scheme with hydrostatic reconstruction for shallow water flows
   [DOI: 10.1137/S1064827503431090](https://doi.org/10.1137/S1064827503431090)
 """
-@inline function hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations::ShallowWaterEquations2D)
-  # Unpack left and right water heights and bottom topographies
-  h_ll, _, _, b_ll = u_ll
-  h_rr, _, _, b_rr = u_rr
+@inline function hydrostatic_reconstruction_audusse_etal(u_ll, u_rr,
+                                                         equations::ShallowWaterEquations2D)
+    # Unpack left and right water heights and bottom topographies
+    h_ll, _, _, b_ll = u_ll
+    h_rr, _, _, b_rr = u_rr
 
-  # Get the velocities on either side
-  v1_ll, v2_ll = velocity(u_ll, equations)
-  v1_rr, v2_rr = velocity(u_rr, equations)
+    # Get the velocities on either side
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
 
-  # Compute the reconstructed water heights
-  h_ll_star = max(zero(h_ll) , h_ll + b_ll - max(b_ll, b_rr) )
-  h_rr_star = max(zero(h_rr) , h_rr + b_rr - max(b_ll, b_rr) )
+    # Compute the reconstructed water heights
+    h_ll_star = max(zero(h_ll), h_ll + b_ll - max(b_ll, b_rr))
+    h_rr_star = max(zero(h_rr), h_rr + b_rr - max(b_ll, b_rr))
 
-  # Create the conservative variables using the reconstruted water heights
-  u_ll_star = SVector( h_ll_star , h_ll_star * v1_ll , h_ll_star * v2_ll , b_ll )
-  u_rr_star = SVector( h_rr_star , h_rr_star * v1_rr , h_rr_star * v2_rr , b_rr )
+    # Create the conservative variables using the reconstruted water heights
+    u_ll_star = SVector(h_ll_star, h_ll_star * v1_ll, h_ll_star * v2_ll, b_ll)
+    u_rr_star = SVector(h_rr_star, h_rr_star * v1_rr, h_rr_star * v2_rr, b_rr)
 
-  return u_ll_star, u_rr_star
+    return u_ll_star, u_rr_star
 end
 
-
 """
     flux_nonconservative_audusse_etal(u_ll, u_rr, orientation::Integer,
                                       equations::ShallowWaterEquations2D)
@@ -460,67 +455,67 @@ Further details for the hydrostatic reconstruction and its motivation can be fou
 """
 @inline function flux_nonconservative_audusse_etal(u_ll, u_rr, orientation::Integer,
                                                    equations::ShallowWaterEquations2D)
-  # Pull the water height and bottom topography on the left
-  h_ll, _, _, b_ll = u_ll
-
-  # Create the hydrostatic reconstruction for the left solution state
-  u_ll_star, _ = hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations)
-
-  # Copy the reconstructed water height for easier to read code
-  h_ll_star = u_ll_star[1]
-
-  z = zero(eltype(u_ll))
-  # Includes two parts:
-  #   (i)  Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid
-  #        cross-averaging across a discontinuous bottom topography
-  #   (ii) True surface part that uses `h_ll` and `h_ll_star` to handle discontinuous bathymetry
-  if orientation == 1
-    f = SVector(z,
-                equations.gravity * h_ll * b_ll + equations.gravity * ( h_ll^2 - h_ll_star^2 ),
-                z, z)
-  else # orientation == 2
-    f = SVector(z, z,
-                equations.gravity * h_ll * b_ll + equations.gravity * ( h_ll^2 - h_ll_star^2 ),
-                z)
-  end
-
-  return f
+    # Pull the water height and bottom topography on the left
+    h_ll, _, _, b_ll = u_ll
+
+    # Create the hydrostatic reconstruction for the left solution state
+    u_ll_star, _ = hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations)
+
+    # Copy the reconstructed water height for easier to read code
+    h_ll_star = u_ll_star[1]
+
+    z = zero(eltype(u_ll))
+    # Includes two parts:
+    #   (i)  Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid
+    #        cross-averaging across a discontinuous bottom topography
+    #   (ii) True surface part that uses `h_ll` and `h_ll_star` to handle discontinuous bathymetry
+    if orientation == 1
+        f = SVector(z,
+                    equations.gravity * h_ll * b_ll +
+                    equations.gravity * (h_ll^2 - h_ll_star^2),
+                    z, z)
+    else # orientation == 2
+        f = SVector(z, z,
+                    equations.gravity * h_ll * b_ll +
+                    equations.gravity * (h_ll^2 - h_ll_star^2),
+                    z)
+    end
+
+    return f
 end
 
 @inline function flux_nonconservative_audusse_etal(u_ll, u_rr,
                                                    normal_direction_ll::AbstractVector,
                                                    normal_direction_average::AbstractVector,
                                                    equations::ShallowWaterEquations2D)
-  # Pull the water height and bottom topography on the left
-  h_ll, _, _, b_ll = u_ll
+    # Pull the water height and bottom topography on the left
+    h_ll, _, _, b_ll = u_ll
 
-  # Create the hydrostatic reconstruction for the left solution state
-  u_ll_star, _ = hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations)
+    # Create the hydrostatic reconstruction for the left solution state
+    u_ll_star, _ = hydrostatic_reconstruction_audusse_etal(u_ll, u_rr, equations)
 
-  # Copy the reconstructed water height for easier to read code
-  h_ll_star = u_ll_star[1]
+    # Copy the reconstructed water height for easier to read code
+    h_ll_star = u_ll_star[1]
 
-  # Comes in two parts:
-  #   (i)  Diagonal (consistent) term from the volume flux that uses `normal_direction_average`
-  #        but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography
+    # Comes in two parts:
+    #   (i)  Diagonal (consistent) term from the volume flux that uses `normal_direction_average`
+    #        but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography
 
-  f2 = normal_direction_average[1] * equations.gravity * h_ll * b_ll
-  f3 = normal_direction_average[2] * equations.gravity * h_ll * b_ll
+    f2 = normal_direction_average[1] * equations.gravity * h_ll * b_ll
+    f3 = normal_direction_average[2] * equations.gravity * h_ll * b_ll
 
-  #   (ii) True surface part that uses `normal_direction_ll`, `h_ll` and `h_ll_star`
-  #        to handle discontinuous bathymetry
+    #   (ii) True surface part that uses `normal_direction_ll`, `h_ll` and `h_ll_star`
+    #        to handle discontinuous bathymetry
 
-  f2 += normal_direction_ll[1] * equations.gravity * ( h_ll^2 - h_ll_star^2 )
-  f3 += normal_direction_ll[2] * equations.gravity * ( h_ll^2 - h_ll_star^2 )
+    f2 += normal_direction_ll[1] * equations.gravity * (h_ll^2 - h_ll_star^2)
+    f3 += normal_direction_ll[2] * equations.gravity * (h_ll^2 - h_ll_star^2)
 
-  # First and last equations do not have a nonconservative flux
-  f1 = f4 = zero(eltype(u_ll))
+    # First and last equations do not have a nonconservative flux
+    f1 = f4 = zero(eltype(u_ll))
 
-  return SVector(f1, f2, f3, f4)
+    return SVector(f1, f2, f3, f4)
 end
 
-
-
 """
     flux_fjordholm_etal(u_ll, u_rr, orientation_or_normal_direction,
                         equations::ShallowWaterEquations2D)
@@ -534,59 +529,60 @@ Details are available in Eq. (4.1) in the paper:
   Well-balanced and energy stable schemes for the shallow water equations with discontinuous topography
   [DOI: 10.1016/j.jcp.2011.03.042](https://doi.org/10.1016/j.jcp.2011.03.042)
 """
-@inline function flux_fjordholm_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D)
-  # Unpack left and right state
-  h_ll = waterheight(u_ll, equations)
-  v1_ll, v2_ll = velocity(u_ll, equations)
-  h_rr = waterheight(u_rr, equations)
-  v1_rr, v2_rr = velocity(u_rr, equations)
-
-  # Average each factor of products in flux
-  h_avg  = 0.5 * (h_ll   + h_rr  )
-  v1_avg = 0.5 * (v1_ll  + v1_rr )
-  v2_avg = 0.5 * (v2_ll  + v2_rr )
-  p_avg  = 0.25 * equations.gravity * (h_ll^2 + h_rr^2)
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    f1 = h_avg * v1_avg
-    f2 = f1 * v1_avg + p_avg
-    f3 = f1 * v2_avg
-  else
-    f1 = h_avg * v2_avg
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg + p_avg
-  end
-
-  return SVector(f1, f2, f3, zero(eltype(u_ll)))
-end
-
-@inline function flux_fjordholm_etal(u_ll, u_rr, normal_direction::AbstractVector, equations::ShallowWaterEquations2D)
-  # Unpack left and right state
-  h_ll = waterheight(u_ll, equations)
-  v1_ll, v2_ll = velocity(u_ll, equations)
-  h_rr = waterheight(u_rr, equations)
-  v1_rr, v2_rr = velocity(u_rr, equations)
-
-  v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
-  v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
-
-  # Average each factor of products in flux
-  h_avg  = 0.5 * (h_ll   + h_rr  )
-  v1_avg = 0.5 * (v1_ll  + v1_rr )
-  v2_avg = 0.5 * (v2_ll  + v2_rr )
-  h2_avg = 0.5 * (h_ll^2 + h_rr^2)
-  p_avg  = 0.5 * equations.gravity * h2_avg
-  v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr)
-
-  # Calculate fluxes depending on normal_direction
-  f1 = h_avg * v_dot_n_avg
-  f2 = f1 * v1_avg + p_avg * normal_direction[1]
-  f3 = f1 * v2_avg + p_avg * normal_direction[2]
-
-  return SVector(f1, f2, f3, zero(eltype(u_ll)))
-end
+@inline function flux_fjordholm_etal(u_ll, u_rr, orientation::Integer,
+                                     equations::ShallowWaterEquations2D)
+    # Unpack left and right state
+    h_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    # Average each factor of products in flux
+    h_avg = 0.5 * (h_ll + h_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    p_avg = 0.25 * equations.gravity * (h_ll^2 + h_rr^2)
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        f1 = h_avg * v1_avg
+        f2 = f1 * v1_avg + p_avg
+        f3 = f1 * v2_avg
+    else
+        f1 = h_avg * v2_avg
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg + p_avg
+    end
+
+    return SVector(f1, f2, f3, zero(eltype(u_ll)))
+end
+
+@inline function flux_fjordholm_etal(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::ShallowWaterEquations2D)
+    # Unpack left and right state
+    h_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    v_dot_n_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
+    v_dot_n_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
 
+    # Average each factor of products in flux
+    h_avg = 0.5 * (h_ll + h_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    h2_avg = 0.5 * (h_ll^2 + h_rr^2)
+    p_avg = 0.5 * equations.gravity * h2_avg
+    v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr)
+
+    # Calculate fluxes depending on normal_direction
+    f1 = h_avg * v_dot_n_avg
+    f2 = f1 * v1_avg + p_avg * normal_direction[1]
+    f3 = f1 * v2_avg + p_avg * normal_direction[2]
+
+    return SVector(f1, f2, f3, zero(eltype(u_ll)))
+end
 
 """
     flux_wintermeyer_etal(u_ll, u_rr, orientation_or_normal_direction,
@@ -602,282 +598,274 @@ Further details are available in Theorem 1 of the paper:
   shallow water equations on unstructured curvilinear meshes with discontinuous bathymetry
   [DOI: 10.1016/j.jcp.2017.03.036](https://doi.org/10.1016/j.jcp.2017.03.036)
 """
-@inline function flux_wintermeyer_etal(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D)
-  # Unpack left and right state
-  h_ll, h_v1_ll, h_v2_ll, _ = u_ll
-  h_rr, h_v1_rr, h_v2_rr, _ = u_rr
-
-  # Get the velocities on either side
-  v1_ll, v2_ll = velocity(u_ll, equations)
-  v1_rr, v2_rr = velocity(u_rr, equations)
-
-  # Average each factor of products in flux
-  v1_avg = 0.5 * (v1_ll + v1_rr)
-  v2_avg = 0.5 * (v2_ll + v2_rr)
-  p_avg  = 0.5 * equations.gravity * h_ll * h_rr
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    f1 = 0.5 * (h_v1_ll + h_v1_rr)
-    f2 = f1 * v1_avg + p_avg
-    f3 = f1 * v2_avg
-  else
-    f1 = 0.5 * (h_v2_ll + h_v2_rr)
-    f2 = f1 * v1_avg
-    f3 = f1 * v2_avg + p_avg
-  end
-
-  return SVector(f1, f2, f3, zero(eltype(u_ll)))
+@inline function flux_wintermeyer_etal(u_ll, u_rr, orientation::Integer,
+                                       equations::ShallowWaterEquations2D)
+    # Unpack left and right state
+    h_ll, h_v1_ll, h_v2_ll, _ = u_ll
+    h_rr, h_v1_rr, h_v2_rr, _ = u_rr
+
+    # Get the velocities on either side
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    # Average each factor of products in flux
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    p_avg = 0.5 * equations.gravity * h_ll * h_rr
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        f1 = 0.5 * (h_v1_ll + h_v1_rr)
+        f2 = f1 * v1_avg + p_avg
+        f3 = f1 * v2_avg
+    else
+        f1 = 0.5 * (h_v2_ll + h_v2_rr)
+        f2 = f1 * v1_avg
+        f3 = f1 * v2_avg + p_avg
+    end
+
+    return SVector(f1, f2, f3, zero(eltype(u_ll)))
+end
+
+@inline function flux_wintermeyer_etal(u_ll, u_rr, normal_direction::AbstractVector,
+                                       equations::ShallowWaterEquations2D)
+    # Unpack left and right state
+    h_ll, h_v1_ll, h_v2_ll, _ = u_ll
+    h_rr, h_v1_rr, h_v2_rr, _ = u_rr
+
+    # Get the velocities on either side
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    # Average each factor of products in flux
+    h_v1_avg = 0.5 * (h_v1_ll + h_v1_rr)
+    h_v2_avg = 0.5 * (h_v2_ll + h_v2_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    p_avg = 0.5 * equations.gravity * h_ll * h_rr
+
+    # Calculate fluxes depending on normal_direction
+    f1 = h_v1_avg * normal_direction[1] + h_v2_avg * normal_direction[2]
+    f2 = f1 * v1_avg + p_avg * normal_direction[1]
+    f3 = f1 * v2_avg + p_avg * normal_direction[2]
+
+    return SVector(f1, f2, f3, zero(eltype(u_ll)))
 end
 
-@inline function flux_wintermeyer_etal(u_ll, u_rr, normal_direction::AbstractVector, equations::ShallowWaterEquations2D)
-  # Unpack left and right state
-  h_ll, h_v1_ll, h_v2_ll, _ = u_ll
-  h_rr, h_v1_rr, h_v2_rr, _ = u_rr
-
-  # Get the velocities on either side
-  v1_ll, v2_ll = velocity(u_ll, equations)
-  v1_rr, v2_rr = velocity(u_rr, equations)
-
-  # Average each factor of products in flux
-  h_v1_avg = 0.5 * (h_v1_ll + h_v1_rr )
-  h_v2_avg = 0.5 * (h_v2_ll + h_v2_rr )
-  v1_avg   = 0.5 * (v1_ll   + v1_rr   )
-  v2_avg   = 0.5 * (v2_ll   + v2_rr   )
-  p_avg    = 0.5 * equations.gravity * h_ll * h_rr
-
-  # Calculate fluxes depending on normal_direction
-  f1 = h_v1_avg * normal_direction[1] + h_v2_avg * normal_direction[2]
-  f2 = f1 * v1_avg + p_avg * normal_direction[1]
-  f3 = f1 * v2_avg + p_avg * normal_direction[2]
-
-  return SVector(f1, f2, f3, zero(eltype(u_ll)))
-end
-
-
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the
 # maximum velocity magnitude plus the maximum speed of sound
-@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer, equations::ShallowWaterEquations2D)
-  # Get the velocity quantities in the appropriate direction
-  if orientation == 1
-    v_ll, _ = velocity(u_ll, equations)
-    v_rr, _ = velocity(u_rr, equations)
-  else
-    _, v_ll = velocity(u_ll, equations)
-    _, v_rr = velocity(u_rr, equations)
-  end
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::ShallowWaterEquations2D)
+    # Get the velocity quantities in the appropriate direction
+    if orientation == 1
+        v_ll, _ = velocity(u_ll, equations)
+        v_rr, _ = velocity(u_rr, equations)
+    else
+        _, v_ll = velocity(u_ll, equations)
+        _, v_rr = velocity(u_rr, equations)
+    end
 
-  # Calculate the wave celerity on the left and right
-  h_ll = waterheight(u_ll, equations)
-  h_rr = waterheight(u_rr, equations)
-  c_ll = sqrt(equations.gravity * h_ll)
-  c_rr = sqrt(equations.gravity * h_rr)
+    # Calculate the wave celerity on the left and right
+    h_ll = waterheight(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    c_ll = sqrt(equations.gravity * h_ll)
+    c_rr = sqrt(equations.gravity * h_rr)
 
-  return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
+    return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
 end
 
-@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations::ShallowWaterEquations2D)
-  # Extract and compute the velocities in the normal direction
-  v1_ll, v2_ll = velocity(u_ll, equations)
-  v1_rr, v2_rr = velocity(u_rr, equations)
-  v_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
-  v_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
+@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::ShallowWaterEquations2D)
+    # Extract and compute the velocities in the normal direction
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+    v_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
+    v_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
 
-  # Compute the wave celerity on the left and right
-  h_ll = waterheight(u_ll, equations)
-  h_rr = waterheight(u_rr, equations)
-  c_ll = sqrt(equations.gravity * h_ll)
-  c_rr = sqrt(equations.gravity * h_rr)
+    # Compute the wave celerity on the left and right
+    h_ll = waterheight(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    c_ll = sqrt(equations.gravity * h_ll)
+    c_rr = sqrt(equations.gravity * h_rr)
 
-  # The normal velocities are already scaled by the norm
-  return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction)
+    # The normal velocities are already scaled by the norm
+    return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction)
 end
 
-
 # Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the bottom topography
-@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, orientation_or_normal_direction,
+@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr,
+                                                              orientation_or_normal_direction,
                                                               equations::ShallowWaterEquations2D)
-  λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, equations)
-  diss = -0.5 * λ * (u_rr - u_ll)
-  return SVector(diss[1], diss[2], diss[3], zero(eltype(u_ll)))
+    λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction,
+                                  equations)
+    diss = -0.5 * λ * (u_rr - u_ll)
+    return SVector(diss[1], diss[2], diss[3], zero(eltype(u_ll)))
 end
 
-
 # Specialized `FluxHLL` to avoid spurious dissipation in the bottom topography
 @inline function (numflux::FluxHLL)(u_ll, u_rr, orientation_or_normal_direction,
-                  equations::ShallowWaterEquations2D)
-  λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction, equations)
-
-  if λ_min >= 0 && λ_max >= 0
-    return flux(u_ll, orientation_or_normal_direction, equations)
-  elseif λ_max <= 0 && λ_min <= 0
-    return flux(u_rr, orientation_or_normal_direction, equations)
-  else
-    f_ll = flux(u_ll, orientation_or_normal_direction, equations)
-    f_rr = flux(u_rr, orientation_or_normal_direction, equations)
-    inv_λ_max_minus_λ_min = inv(λ_max - λ_min)
-    factor_ll = λ_max * inv_λ_max_minus_λ_min
-    factor_rr = λ_min * inv_λ_max_minus_λ_min
-    factor_diss = λ_min * λ_max * inv_λ_max_minus_λ_min
-    diss = u_rr - u_ll
-    return factor_ll * f_ll - factor_rr * f_rr + factor_diss * SVector(diss[1], diss[2], diss[3], zero(eltype(u_ll)))
-  end
+                                    equations::ShallowWaterEquations2D)
+    λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction,
+                                         equations)
+
+    if λ_min >= 0 && λ_max >= 0
+        return flux(u_ll, orientation_or_normal_direction, equations)
+    elseif λ_max <= 0 && λ_min <= 0
+        return flux(u_rr, orientation_or_normal_direction, equations)
+    else
+        f_ll = flux(u_ll, orientation_or_normal_direction, equations)
+        f_rr = flux(u_rr, orientation_or_normal_direction, equations)
+        inv_λ_max_minus_λ_min = inv(λ_max - λ_min)
+        factor_ll = λ_max * inv_λ_max_minus_λ_min
+        factor_rr = λ_min * inv_λ_max_minus_λ_min
+        factor_diss = λ_min * λ_max * inv_λ_max_minus_λ_min
+        diss = u_rr - u_ll
+        return factor_ll * f_ll - factor_rr * f_rr +
+               factor_diss * SVector(diss[1], diss[2], diss[3], zero(eltype(u_ll)))
+    end
 end
 
-
 # Calculate minimum and maximum wave speeds for HLL-type fluxes
 @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
                                      equations::ShallowWaterEquations2D)
-  h_ll = waterheight(u_ll, equations)
-  v1_ll, v2_ll = velocity(u_ll, equations)
-  h_rr = waterheight(u_rr, equations)
-  v1_rr, v2_rr = velocity(u_rr, equations)
+    h_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
 
-  if orientation == 1 # x-direction
-    λ_min = v1_ll - sqrt(equations.gravity * h_ll)
-    λ_max = v1_rr + sqrt(equations.gravity * h_rr)
-  else # y-direction
-    λ_min = v2_ll - sqrt(equations.gravity * h_ll)
-    λ_max = v2_rr + sqrt(equations.gravity * h_rr)
-  end
+    if orientation == 1 # x-direction
+        λ_min = v1_ll - sqrt(equations.gravity * h_ll)
+        λ_max = v1_rr + sqrt(equations.gravity * h_rr)
+    else # y-direction
+        λ_min = v2_ll - sqrt(equations.gravity * h_ll)
+        λ_max = v2_rr + sqrt(equations.gravity * h_rr)
+    end
 
-  return λ_min, λ_max
+    return λ_min, λ_max
 end
 
 @inline function min_max_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
                                      equations::ShallowWaterEquations2D)
-  h_ll = waterheight(u_ll, equations)
-  v1_ll, v2_ll = velocity(u_ll, equations)
-  h_rr = waterheight(u_rr, equations)
-  v1_rr, v2_rr = velocity(u_rr, equations)
+    h_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
 
-  v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
-  v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
+    v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
+    v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
 
-  norm_ = norm(normal_direction)
-  # The v_normals are already scaled by the norm
-  λ_min = v_normal_ll - sqrt(equations.gravity * h_ll) * norm_
-  λ_max = v_normal_rr + sqrt(equations.gravity * h_rr) * norm_
+    norm_ = norm(normal_direction)
+    # The v_normals are already scaled by the norm
+    λ_min = v_normal_ll - sqrt(equations.gravity * h_ll) * norm_
+    λ_max = v_normal_rr + sqrt(equations.gravity * h_rr) * norm_
 
-  return λ_min, λ_max
+    return λ_min, λ_max
 end
 
-
 @inline function max_abs_speeds(u, equations::ShallowWaterEquations2D)
-  h = waterheight(u, equations)
-  v1, v2 = velocity(u, equations)
+    h = waterheight(u, equations)
+    v1, v2 = velocity(u, equations)
 
-  c = equations.gravity * sqrt(h)
-  return abs(v1) + c, abs(v2) + c
+    c = equations.gravity * sqrt(h)
+    return abs(v1) + c, abs(v2) + c
 end
 
-
 # Helper function to extract the velocity vector from the conservative variables
 @inline function velocity(u, equations::ShallowWaterEquations2D)
-  h, h_v1, h_v2, _ = u
+    h, h_v1, h_v2, _ = u
 
-  v1 = h_v1 / h
-  v2 = h_v2 / h
-  return SVector(v1, v2)
+    v1 = h_v1 / h
+    v2 = h_v2 / h
+    return SVector(v1, v2)
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::ShallowWaterEquations2D)
-  h, _, _, b = u
+    h, _, _, b = u
 
-  H = h + b
-  v1, v2 = velocity(u, equations)
-  return SVector(H, v1, v2, b)
+    H = h + b
+    v1, v2 = velocity(u, equations)
+    return SVector(H, v1, v2, b)
 end
 
-
 # Convert conservative variables to entropy
 # Note, only the first three are the entropy variables, the fourth entry still
 # just carries the bottom topography values for convenience
 @inline function cons2entropy(u, equations::ShallowWaterEquations2D)
-  h, h_v1, h_v2, b = u
+    h, h_v1, h_v2, b = u
 
-  v1, v2 = velocity(u, equations)
-  v_square = v1^2 + v2^2
+    v1, v2 = velocity(u, equations)
+    v_square = v1^2 + v2^2
 
-  w1 = equations.gravity * (h + b) - 0.5 * v_square
-  w2 = v1
-  w3 = v2
-  return SVector(w1, w2, w3, b)
+    w1 = equations.gravity * (h + b) - 0.5 * v_square
+    w2 = v1
+    w3 = v2
+    return SVector(w1, w2, w3, b)
 end
 
-
 # Convert entropy variables to conservative
 @inline function entropy2cons(w, equations::ShallowWaterEquations2D)
-  w1, w2, w3, b = w
+    w1, w2, w3, b = w
 
-  h = (w1 + 0.5 * (w2^2 + w3^2)) / equations.gravity - b
-  h_v1 = h * w2
-  h_v2 = h * w3
-  return SVector(h, h_v1, h_v2, b)
+    h = (w1 + 0.5 * (w2^2 + w3^2)) / equations.gravity - b
+    h_v1 = h * w2
+    h_v2 = h * w3
+    return SVector(h, h_v1, h_v2, b)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::ShallowWaterEquations2D)
-  H, v1, v2, b = prim
+    H, v1, v2, b = prim
 
-  h = H - b
-  h_v1 = h * v1
-  h_v2 = h * v2
-  return SVector(h, h_v1, h_v2, b)
+    h = H - b
+    h_v1 = h * v1
+    h_v2 = h * v2
+    return SVector(h, h_v1, h_v2, b)
 end
 
-
 @inline function waterheight(u, equations::ShallowWaterEquations2D)
-  return u[1]
+    return u[1]
 end
 
-
 @inline function pressure(u, equations::ShallowWaterEquations2D)
-  h = waterheight(u, equations)
-  p = 0.5 * equations.gravity * h^2
-  return p
+    h = waterheight(u, equations)
+    p = 0.5 * equations.gravity * h^2
+    return p
 end
 
-
 @inline function waterheight_pressure(u, equations::ShallowWaterEquations2D)
-  return waterheight(u, equations) * pressure(u, equations)
+    return waterheight(u, equations) * pressure(u, equations)
 end
 
-
 # Entropy function for the shallow water equations is the total energy
-@inline entropy(cons, equations::ShallowWaterEquations2D) = energy_total(cons, equations)
-
+@inline function entropy(cons, equations::ShallowWaterEquations2D)
+    energy_total(cons, equations)
+end
 
 # Calculate total energy for a conservative state `cons`
 @inline function energy_total(cons, equations::ShallowWaterEquations2D)
-  h, h_v1, h_v2, b = cons
+    h, h_v1, h_v2, b = cons
 
-  e = (h_v1^2 + h_v2^2) / (2 * h) + 0.5 * equations.gravity * h^2 + equations.gravity * h * b
-  return e
+    e = (h_v1^2 + h_v2^2) / (2 * h) + 0.5 * equations.gravity * h^2 +
+        equations.gravity * h * b
+    return e
 end
 
-
 # Calculate kinetic energy for a conservative state `cons`
 @inline function energy_kinetic(u, equations::ShallowWaterEquations2D)
-  h, h_v1, h_v2, _ = u
-  return (h_v1^2 + h_v2^2) / (2 * h)
+    h, h_v1, h_v2, _ = u
+    return (h_v1^2 + h_v2^2) / (2 * h)
 end
 
-
 # Calculate potential energy for a conservative state `cons`
 @inline function energy_internal(cons, equations::ShallowWaterEquations2D)
-  return energy_total(cons, equations) - energy_kinetic(cons, equations)
+    return energy_total(cons, equations) - energy_kinetic(cons, equations)
 end
 
-
 # Calculate the error for the "lake-at-rest" test case where H = h+b should
 # be a constant value over time
 @inline function lake_at_rest_error(u, equations::ShallowWaterEquations2D)
-  h, _, _, b = u
-  return abs(equations.H0 - (h + b))
+    h, _, _, b = u
+    return abs(equations.H0 - (h + b))
 end
-
 end # @muladd
diff --git a/src/equations/shallow_water_two_layer_1d.jl b/src/equations/shallow_water_two_layer_1d.jl
index fd4fbc017ec..edf7d5e32ff 100644
--- a/src/equations/shallow_water_two_layer_1d.jl
+++ b/src/equations/shallow_water_two_layer_1d.jl
@@ -3,6 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 @doc raw"""
     ShallowWaterTwoLayerEquations1D(gravity, H0, rho_upper, rho_lower)
@@ -57,37 +58,43 @@ A good introduction for the 2LSWE is available in Chapter 12 of the book:
   <https://www.sciencedirect.com/bookseries/international-geophysics/vol/101/suppl/C>\
   ISBN: 978-0-12-088759-0
 """
-struct ShallowWaterTwoLayerEquations1D{RealT<:Real} <: AbstractShallowWaterEquations{1,5}
-  gravity::RealT   # gravitational constant
-  H0::RealT        # constant "lake-at-rest" total water height
-  rho_upper::RealT # lower layer density
-  rho_lower::RealT # upper layer density
-  r::RealT         # ratio of rho_upper / rho_lower
+struct ShallowWaterTwoLayerEquations1D{RealT <: Real} <:
+       AbstractShallowWaterEquations{1, 5}
+    gravity::RealT   # gravitational constant
+    H0::RealT        # constant "lake-at-rest" total water height
+    rho_upper::RealT # lower layer density
+    rho_lower::RealT # upper layer density
+    r::RealT         # ratio of rho_upper / rho_lower
 end
 
 # Allow for flexibility to set the gravitational constant within an elixir depending on the
 # application where `gravity_constant=1.0` or `gravity_constant=9.81` are common values.
 # The reference total water height H0 defaults to 0.0 but is used for the "lake-at-rest"
 # well-balancedness test cases. Densities must be specified such that rho_upper <= rho_lower.
-function ShallowWaterTwoLayerEquations1D(; gravity_constant, H0=zero(gravity_constant), rho_upper, rho_lower)
-  # Assign density ratio if rho_upper <= rho_lower
-  if rho_upper > rho_lower
-    error("Invalid input: Densities must be chosen such that rho_upper <= rho_lower")
-  else
-    r = rho_upper / rho_lower
-  end
-  ShallowWaterTwoLayerEquations1D(gravity_constant, H0, rho_upper, rho_lower, r)
+function ShallowWaterTwoLayerEquations1D(; gravity_constant,
+                                         H0 = zero(gravity_constant), rho_upper,
+                                         rho_lower)
+    # Assign density ratio if rho_upper <= rho_lower
+    if rho_upper > rho_lower
+        error("Invalid input: Densities must be chosen such that rho_upper <= rho_lower")
+    else
+        r = rho_upper / rho_lower
+    end
+    ShallowWaterTwoLayerEquations1D(gravity_constant, H0, rho_upper, rho_lower, r)
 end
 
 have_nonconservative_terms(::ShallowWaterTwoLayerEquations1D) = True()
-varnames(::typeof(cons2cons), ::ShallowWaterTwoLayerEquations1D) = ("h_upper", "h_v1_upper",
-                                                                    "h_lower", "h_v1_lower", "b")
+function varnames(::typeof(cons2cons), ::ShallowWaterTwoLayerEquations1D)
+    ("h_upper", "h_v1_upper",
+     "h_lower", "h_v1_lower", "b")
+end
 # Note, we use the total water height, H_lower = h_upper + h_lower + b, and first layer total height
 # H_upper = h_upper + b as the first primitive variable for easier visualization and setting initial
 # conditions
-varnames(::typeof(cons2prim), ::ShallowWaterTwoLayerEquations1D) = ("H_upper", "v1_upper", 
-                                                                    "H_lower", "v1_lower", "b")
-
+function varnames(::typeof(cons2prim), ::ShallowWaterTwoLayerEquations1D)
+    ("H_upper", "v1_upper",
+     "H_lower", "v1_lower", "b")
+end
 
 # Set initial conditions at physical location `x` for time `t`
 """
@@ -97,20 +104,20 @@ A smooth initial condition used for convergence tests in combination with
 [`source_terms_convergence_test`](@ref) (and 
 [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
 """
-function initial_condition_convergence_test(x, t, equations::ShallowWaterTwoLayerEquations1D)
-  # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]
-  ω = 2.0 * pi * sqrt(2.0)
-
-  H_lower = 2.0 + 0.1 * sin(ω * x[1] + t)
-  H_upper = 4.0 + 0.1 * cos(ω * x[1] + t)
-  v1_lower = 1.0
-  v1_upper = 0.9
-  b  = 1.0 + 0.1 * cos(2.0 * ω * x[1])
-
-  return prim2cons(SVector(H_upper, v1_upper, H_lower, v1_lower, b), equations)
+function initial_condition_convergence_test(x, t,
+                                            equations::ShallowWaterTwoLayerEquations1D)
+    # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]
+    ω = 2.0 * pi * sqrt(2.0)
+
+    H_lower = 2.0 + 0.1 * sin(ω * x[1] + t)
+    H_upper = 4.0 + 0.1 * cos(ω * x[1] + t)
+    v1_lower = 1.0
+    v1_upper = 0.9
+    b = 1.0 + 0.1 * cos(2.0 * ω * x[1])
+
+    return prim2cons(SVector(H_upper, v1_upper, H_lower, v1_lower, b), equations)
 end
 
-
 """
     source_terms_convergence_test(u, x, t, equations::ShallowWaterTwoLayerEquations1D)
 
@@ -121,26 +128,35 @@ in non-periodic domains).
 """
 @inline function source_terms_convergence_test(u, x, t,
                                                equations::ShallowWaterTwoLayerEquations1D)
-  # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because
-  # this manufactured solution velocity is taken to be constant
-  ω = 2 * pi * sqrt(2.0)
-
-  du1 = (-0.1*cos(t + ω*x[1]) - 0.1*sin(t + ω*x[1]) - 0.09*ω*cos(t + ω*x[1]) +
-          - 0.09*ω*sin(t + ω*x[1]))
-  du2 = (5.0 * (-0.1*ω*cos(t + ω*x[1]) - 0.1*ω*sin(t + ω*x[1])) * (4.0 + 0.2*cos(t + ω*x[1]) +
-        -0.2*sin(t + ω*x[1])) + 0.1*ω*(20.0 + cos(t + ω*x[1]) - sin(t + ω*x[1])) * cos(t +
-          ω*x[1]) - 0.09*cos(t + ω*x[1]) - 0.09*sin(t + ω*x[1]) - 0.081*ω*cos(t + ω*x[1]) +
-        -0.081*ω*sin(t + ω*x[1]))
-  du3 = 0.1*cos(t + ω*x[1]) + 0.1*ω*cos(t + ω*x[1]) + 0.2*ω*sin(2.0*ω*x[1])
-  du4 = ((10.0 + sin(t + ω*x[1]) - cos(2ω*x[1]))*(-0.09*ω*cos(t + ω*x[1]) - 0.09*ω*sin(t +
-          ω*x[1]) - 0.2*ω*sin(2*ω*x[1])) + 0.1*cos(t + ω*x[1]) + 0.1*ω*cos(t + ω*x[1]) +
-          5.0 * (0.1*ω*cos(t + ω*x[1]) + 0.2*ω*sin(2.0*ω*x[1])) * (2.0 + 0.2*sin(t + ω*x[1]) +
-        -0.2*cos(2.0*ω*x[1])) + 0.2*ω*sin(2.0*ω*x[1]))
-
-  return SVector(du1, du2, du3, du4, zero(eltype(u)))
+    # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because
+    # this manufactured solution velocity is taken to be constant
+    ω = 2 * pi * sqrt(2.0)
+
+    du1 = (-0.1 * cos(t + ω * x[1]) - 0.1 * sin(t + ω * x[1]) -
+           0.09 * ω * cos(t + ω * x[1]) +
+           -0.09 * ω * sin(t + ω * x[1]))
+    du2 = (5.0 * (-0.1 * ω * cos(t + ω * x[1]) - 0.1 * ω * sin(t + ω * x[1])) *
+           (4.0 + 0.2 * cos(t + ω * x[1]) +
+            -0.2 * sin(t + ω * x[1])) +
+           0.1 * ω * (20.0 + cos(t + ω * x[1]) - sin(t + ω * x[1])) *
+           cos(t +
+               ω * x[1]) - 0.09 * cos(t + ω * x[1]) - 0.09 * sin(t + ω * x[1]) -
+           0.081 * ω * cos(t + ω * x[1]) +
+           -0.081 * ω * sin(t + ω * x[1]))
+    du3 = 0.1 * cos(t + ω * x[1]) + 0.1 * ω * cos(t + ω * x[1]) +
+          0.2 * ω * sin(2.0 * ω * x[1])
+    du4 = ((10.0 + sin(t + ω * x[1]) - cos(2ω * x[1])) *
+           (-0.09 * ω * cos(t + ω * x[1]) - 0.09 * ω * sin(t +
+                                                           ω * x[1]) -
+            0.2 * ω * sin(2 * ω * x[1])) + 0.1 * cos(t + ω * x[1]) +
+           0.1 * ω * cos(t + ω * x[1]) +
+           5.0 * (0.1 * ω * cos(t + ω * x[1]) + 0.2 * ω * sin(2.0 * ω * x[1])) *
+           (2.0 + 0.2 * sin(t + ω * x[1]) +
+            -0.2 * cos(2.0 * ω * x[1])) + 0.2 * ω * sin(2.0 * ω * x[1]))
+
+    return SVector(du1, du2, du3, du4, zero(eltype(u)))
 end
 
-
 """
     boundary_condition_slip_wall(u_inner, orientation_or_normal, x, t, surface_flux_function,
                                  equations::ShallowWaterTwoLayerEquations1D)
@@ -158,43 +174,42 @@ For details see Section 9.2.5 of the book:
 @inline function boundary_condition_slip_wall(u_inner, orientation_or_normal, direction,
                                               x, t, surface_flux_function,
                                               equations::ShallowWaterTwoLayerEquations1D)
-  # create the "external" boundary solution state
-  u_boundary = SVector(u_inner[1],
-                      -u_inner[2],
-                       u_inner[3],
-                      -u_inner[4],
-                       u_inner[5])
-
-  # calculate the boundary flux
-  if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
-    f = surface_flux_function(u_inner, u_boundary, orientation_or_normal, equations)
-  else # u_boundary is "left" of boundary, u_inner is "right" of boundary
-    f = surface_flux_function(u_boundary, u_inner, orientation_or_normal, equations)
-  end
-  return f
+    # create the "external" boundary solution state
+    u_boundary = SVector(u_inner[1],
+                         -u_inner[2],
+                         u_inner[3],
+                         -u_inner[4],
+                         u_inner[5])
+
+    # calculate the boundary flux
+    if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        f = surface_flux_function(u_inner, u_boundary, orientation_or_normal, equations)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        f = surface_flux_function(u_boundary, u_inner, orientation_or_normal, equations)
+    end
+    return f
 end
 
-
 # Calculate 1D flux for a single point
 # Note, the bottom topography has no flux
-@inline function flux(u, orientation::Integer, equations::ShallowWaterTwoLayerEquations1D)
-  h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u
-
-  # Calculate velocities
-  v1_upper, v1_lower = velocity(u, equations)
-  # Calculate pressure
-  p1 = 0.5 * equations.gravity * h_upper^2
-  p2 = 0.5 * equations.gravity * h_lower^2
-
-  f1 = h_v1_upper
-  f2 = h_v1_upper * v1_upper + p1
-  f3 = h_v2_lower
-  f4 = h_v2_lower * v1_lower + p2
-
-  return SVector(f1, f2, f3, f4, zero(eltype(u)))
+@inline function flux(u, orientation::Integer,
+                      equations::ShallowWaterTwoLayerEquations1D)
+    h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u
+
+    # Calculate velocities
+    v1_upper, v1_lower = velocity(u, equations)
+    # Calculate pressure
+    p1 = 0.5 * equations.gravity * h_upper^2
+    p2 = 0.5 * equations.gravity * h_lower^2
+
+    f1 = h_v1_upper
+    f2 = h_v1_upper * v1_upper + p1
+    f3 = h_v2_lower
+    f4 = h_v2_lower * v1_lower + p2
+
+    return SVector(f1, f2, f3, f4, zero(eltype(u)))
 end
 
-
 """
     flux_nonconservative_wintermeyer_etal(u_ll, u_rr, orientation::Integer,
                                           equations::ShallowWaterTwoLayerEquations1D)
@@ -216,24 +231,23 @@ Further details are available in the paper:
 @inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr,
                                                        orientation::Integer,
                                                        equations::ShallowWaterTwoLayerEquations1D)
-  # Pull the necessary left and right state information
-  h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
-  h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
-  b_rr = u_rr[5]
-
-  z = zero(eltype(u_ll))
-
-  # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, 
-  #                                        0, g*h_lower*(b+r*h_upper)_x, 0)
-  f = SVector(z,
-              equations.gravity * h_upper_ll * (b_rr + h_lower_rr),
-              z,
-              equations.gravity * h_lower_ll * (b_rr + equations.r * h_upper_rr),
-              z)
-  return f
+    # Pull the necessary left and right state information
+    h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
+    h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
+    b_rr = u_rr[5]
+
+    z = zero(eltype(u_ll))
+
+    # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, 
+    #                                        0, g*h_lower*(b+r*h_upper)_x, 0)
+    f = SVector(z,
+                equations.gravity * h_upper_ll * (b_rr + h_lower_rr),
+                z,
+                equations.gravity * h_lower_ll * (b_rr + equations.r * h_upper_rr),
+                z)
+    return f
 end
 
-
 """
     flux_nonconservative_fjordholm_etal(u_ll, u_rr, orientation::Integer,
                                         equations::ShallowWaterTwoLayerEquations1D)
@@ -256,35 +270,35 @@ formulation.
 @inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr,
                                                      orientation::Integer,
                                                      equations::ShallowWaterTwoLayerEquations1D)
-  # Pull the necessary left and right state information
-  h_upper_ll, _, h_lower_ll, _, b_ll = u_ll
-  h_upper_rr, _, h_lower_rr, _, b_rr = u_rr
-
-  # Create average and jump values
-  h_upper_average = 0.5 * (h_upper_ll + h_upper_rr)
-  h_lower_average = 0.5 * (h_lower_ll + h_lower_rr)
-  h_upper_jump = h_upper_rr - h_upper_ll
-  h_lower_jump = h_lower_rr - h_lower_ll
-  b_jump  = b_rr  - b_ll
-
-  # Assign variables for constants for better readability
-  g = equations.gravity
-
-  z = zero(eltype(u_ll))
-
-  # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, 
-  #                                        0, g*h_lower*(b+r*h_upper)_x, 0)
-  f = SVector(
-    z,
-    g * h_upper_ll * (b_ll + h_lower_ll) + g * h_upper_average * (b_jump + h_lower_jump),
-    z,
-    g * h_lower_ll * (b_ll + equations.r * h_upper_ll) + g * h_lower_average * (b_jump +
-        equations.r * h_upper_jump),
-    z)
-  return f
+    # Pull the necessary left and right state information
+    h_upper_ll, _, h_lower_ll, _, b_ll = u_ll
+    h_upper_rr, _, h_lower_rr, _, b_rr = u_rr
+
+    # Create average and jump values
+    h_upper_average = 0.5 * (h_upper_ll + h_upper_rr)
+    h_lower_average = 0.5 * (h_lower_ll + h_lower_rr)
+    h_upper_jump = h_upper_rr - h_upper_ll
+    h_lower_jump = h_lower_rr - h_lower_ll
+    b_jump = b_rr - b_ll
+
+    # Assign variables for constants for better readability
+    g = equations.gravity
+
+    z = zero(eltype(u_ll))
+
+    # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, 
+    #                                        0, g*h_lower*(b+r*h_upper)_x, 0)
+    f = SVector(z,
+                g * h_upper_ll * (b_ll + h_lower_ll) +
+                g * h_upper_average * (b_jump + h_lower_jump),
+                z,
+                g * h_lower_ll * (b_ll + equations.r * h_upper_ll) +
+                g * h_lower_average * (b_jump +
+                                       equations.r * h_upper_jump),
+                z)
+    return f
 end
 
-
 """
     flux_fjordholm_etal(u_ll, u_rr, orientation,
                         equations::ShallowWaterTwoLayerEquations1D)
@@ -308,30 +322,29 @@ formulation.
 @inline function flux_fjordholm_etal(u_ll, u_rr,
                                      orientation::Integer,
                                      equations::ShallowWaterTwoLayerEquations1D)
-  # Unpack left and right state
-  h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
-  v1_ll, v2_ll = velocity(u_ll, equations)
-  h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
-  v1_rr, v2_rr = velocity(u_rr, equations)
-
-  # Average each factor of products in flux
-  h_upper_avg = 0.5 * (h_upper_ll + h_upper_rr)
-  h_lower_avg = 0.5 * (h_lower_ll + h_lower_rr)
-  v1_avg = 0.5 * (v1_ll + v1_rr)
-  v2_avg = 0.5 * (v2_ll + v2_rr)
-  p1_avg = 0.25 * equations.gravity * (h_upper_ll^2 + h_upper_rr^2)
-  p2_avg = 0.25 * equations.gravity * (h_lower_ll^2 + h_lower_rr^2)
-
-  # Calculate fluxes
-  f1 = h_upper_avg * v1_avg
-  f2 = f1 * v1_avg + p1_avg
-  f3 = h_lower_avg * v2_avg
-  f4 = f3 * v2_avg + p2_avg
-
-  return SVector(f1, f2, f3, f4, zero(eltype(u_ll)))
+    # Unpack left and right state
+    h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    # Average each factor of products in flux
+    h_upper_avg = 0.5 * (h_upper_ll + h_upper_rr)
+    h_lower_avg = 0.5 * (h_lower_ll + h_lower_rr)
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    p1_avg = 0.25 * equations.gravity * (h_upper_ll^2 + h_upper_rr^2)
+    p2_avg = 0.25 * equations.gravity * (h_lower_ll^2 + h_lower_rr^2)
+
+    # Calculate fluxes
+    f1 = h_upper_avg * v1_avg
+    f2 = f1 * v1_avg + p1_avg
+    f3 = h_lower_avg * v2_avg
+    f4 = f3 * v2_avg + p2_avg
+
+    return SVector(f1, f2, f3, f4, zero(eltype(u_ll)))
 end
 
-
 """
     flux_wintermeyer_etal(u_ll, u_rr, orientation,
                           equations::ShallowWaterTwoLayerEquations1D)
@@ -351,30 +364,29 @@ Further details are available in Theorem 1 of the paper:
 @inline function flux_wintermeyer_etal(u_ll, u_rr,
                                        orientation::Integer,
                                        equations::ShallowWaterTwoLayerEquations1D)
-  # Unpack left and right state
-  h_upper_ll, h_v1_upper_ll, h_lower_ll, h_v2_lower_ll, _ = u_ll
-  h_upper_rr, h_v1_upper_rr, h_lower_rr, h_v2_lower_rr, _ = u_rr
-
-  # Get the velocities on either side
-  v1_ll, v2_ll = velocity(u_ll, equations)
-  v1_rr, v2_rr = velocity(u_rr, equations)
-
-  # Average each factor of products in flux
-  v1_avg = 0.5 * (v1_ll + v1_rr)
-  v2_avg = 0.5 * (v2_ll + v2_rr)
-  p1_avg = 0.5 * equations.gravity * h_upper_ll * h_upper_rr
-  p2_avg = 0.5 * equations.gravity * h_lower_ll * h_lower_rr
-
-  # Calculate fluxes
-  f1 = 0.5 * (h_v1_upper_ll + h_v1_upper_rr)
-  f2 = f1 * v1_avg + p1_avg
-  f3 = 0.5 * (h_v2_lower_ll + h_v2_lower_rr)
-  f4 = f3 * v2_avg + p2_avg
-
-  return SVector(f1, f2, f3, f4, zero(eltype(u_ll)))
+    # Unpack left and right state
+    h_upper_ll, h_v1_upper_ll, h_lower_ll, h_v2_lower_ll, _ = u_ll
+    h_upper_rr, h_v1_upper_rr, h_lower_rr, h_v2_lower_rr, _ = u_rr
+
+    # Get the velocities on either side
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    # Average each factor of products in flux
+    v1_avg = 0.5 * (v1_ll + v1_rr)
+    v2_avg = 0.5 * (v2_ll + v2_rr)
+    p1_avg = 0.5 * equations.gravity * h_upper_ll * h_upper_rr
+    p2_avg = 0.5 * equations.gravity * h_lower_ll * h_lower_rr
+
+    # Calculate fluxes
+    f1 = 0.5 * (h_v1_upper_ll + h_v1_upper_rr)
+    f2 = f1 * v1_avg + p1_avg
+    f3 = 0.5 * (h_v2_lower_ll + h_v2_lower_rr)
+    f4 = f3 * v2_avg + p2_avg
+
+    return SVector(f1, f2, f3, f4, zero(eltype(u_ll)))
 end
 
-
 """
     flux_es_fjordholm_etal(u_ll, u_rr, orientation,
                            equations::ShallowWaterTwoLayerEquations1D)
@@ -394,59 +406,59 @@ formulation.
 @inline function flux_es_fjordholm_etal(u_ll, u_rr,
                                         orientation::Integer,
                                         equations::ShallowWaterTwoLayerEquations1D)
-  # Compute entropy conservative flux but without the bottom topography
-  f_ec = flux_fjordholm_etal(u_ll, u_rr,
-                              orientation,
-                              equations)
-
-  # Get maximum signal velocity
-  λ = max_abs_speed_naive(u_ll, u_rr, orientation, equations)
-  # Get entropy variables but without the bottom topography
-  q_rr = cons2entropy(u_rr,equations)
-  q_ll = cons2entropy(u_ll,equations)
-
-  # Average values from left and right
-  u_avg = (u_ll + u_rr) / 2
-
-  # Introduce variables for better readability
-  rho_upper = equations.rho_upper
-  rho_lower = equations.rho_lower
-  g    = equations.gravity
-  drho = rho_upper - rho_lower
-
-  # Entropy Jacobian matrix
-  H = @SMatrix [
-    [-rho_lower/(g*rho_upper*drho);;
-     -rho_lower*u_avg[2]/(g*rho_upper*u_avg[1]*drho);;
-     1.0/(g*drho);;
-     u_avg[4]/(g*u_avg[3]*drho);;
-     0];
-    [-rho_lower*u_avg[2]/(g*rho_upper*u_avg[1]*drho);;
-     (g*rho_upper*u_avg[1]^3 - g*rho_lower*u_avg[1]^3 +
-         -rho_lower*u_avg[2]^2)/(g*rho_upper*u_avg[1]^2*drho);;
-     u_avg[2]/(g*u_avg[1]*drho);;
-     u_avg[2]*u_avg[4]/(g*u_avg[1]*u_avg[3]*drho);;
-     0];
-    [1.0/(g*drho);;
-     u_avg[2]/(g*u_avg[1]*drho);;
-     -1.0/(g*drho);;
-     -u_avg[4]/(g*u_avg[3]*drho);;
-     0];
-    [u_avg[4]/(g*u_avg[3]*drho);;
-     u_avg[2]*u_avg[4]/(g*u_avg[1]*u_avg[3]*drho);;
-     -u_avg[4]/(g*u_avg[3]*drho);;
-     (g*rho_upper*u_avg[3]^3 - g*rho_lower*u_avg[3]^3 +
-         -rho_lower*u_avg[4]^2)/(g*rho_lower*u_avg[3]^2*drho);;
-     0];
-    [0;;0;;0;;0;;0]]
-
-  # Add dissipation to entropy conservative flux to obtain entropy stable flux
-  f_es = f_ec - 0.5 * λ * H * (q_rr - q_ll)
-
-  return SVector(f_es[1], f_es[2], f_es[3], f_es[4], zero(eltype(u_ll)))
+    # Compute entropy conservative flux but without the bottom topography
+    f_ec = flux_fjordholm_etal(u_ll, u_rr,
+                               orientation,
+                               equations)
+
+    # Get maximum signal velocity
+    λ = max_abs_speed_naive(u_ll, u_rr, orientation, equations)
+    # Get entropy variables but without the bottom topography
+    q_rr = cons2entropy(u_rr, equations)
+    q_ll = cons2entropy(u_ll, equations)
+
+    # Average values from left and right
+    u_avg = (u_ll + u_rr) / 2
+
+    # Introduce variables for better readability
+    rho_upper = equations.rho_upper
+    rho_lower = equations.rho_lower
+    g = equations.gravity
+    drho = rho_upper - rho_lower
+
+    # Compute entropy Jacobian coefficients
+    h11 = -rho_lower / (g * rho_upper * drho)
+    h12 = -rho_lower * u_avg[2] / (g * rho_upper * u_avg[1] * drho)
+    h13 = 1.0 / (g * drho)
+    h14 = u_avg[4] / (g * u_avg[3] * drho)
+    h21 = -rho_lower * u_avg[2] / (g * rho_upper * u_avg[1] * drho)
+    h22 = ((g * rho_upper * u_avg[1]^3 - g * rho_lower * u_avg[1]^3 +
+            -rho_lower * u_avg[2]^2) / (g * rho_upper * u_avg[1]^2 * drho))
+    h23 = u_avg[2] / (g * u_avg[1] * drho)
+    h24 = u_avg[2] * u_avg[4] / (g * u_avg[1] * u_avg[3] * drho)
+    h31 = 1.0 / (g * drho)
+    h32 = u_avg[2] / (g * u_avg[1] * drho)
+    h33 = -1.0 / (g * drho)
+    h34 = -u_avg[4] / (g * u_avg[3] * drho)
+    h41 = u_avg[4] / (g * u_avg[3] * drho)
+    h42 = u_avg[2] * u_avg[4] / (g * u_avg[1] * u_avg[3] * drho)
+    h43 = -u_avg[4] / (g * u_avg[3] * drho)
+    h44 = ((g * rho_upper * u_avg[3]^3 - g * rho_lower * u_avg[3]^3 +
+            -rho_lower * u_avg[4]^2) / (g * rho_lower * u_avg[3]^2 * drho))
+
+    # Entropy Jacobian matrix
+    H = @SMatrix [[h11;; h12;; h13;; h14;; 0];
+                  [h21;; h22;; h23;; h24;; 0];
+                  [h31;; h32;; h33;; h34;; 0];
+                  [h41;; h42;; h43;; h44;; 0];
+                  [0;; 0;; 0;; 0;; 0]]
+
+    # Add dissipation to entropy conservative flux to obtain entropy stable flux
+    f_es = f_ec - 0.5 * λ * H * (q_rr - q_ll)
+
+    return SVector(f_es[1], f_es[2], f_es[3], f_es[4], zero(eltype(u_ll)))
 end
 
-
 # Calculate approximation for maximum wave speed for local Lax-Friedrichs-type dissipation as the
 # maximum velocity magnitude plus the maximum speed of sound. This function uses approximate 
 # eigenvalues using the speed of the barotropic mode as there is no simple way to calculate them 
@@ -459,135 +471,130 @@ end
 @inline function max_abs_speed_naive(u_ll, u_rr,
                                      orientation::Integer,
                                      equations::ShallowWaterTwoLayerEquations1D)
-  # Unpack left and right state
-  h_upper_ll, h_v1_upper_ll, h_lower_ll, h_v2_lower_ll, _ = u_ll
-  h_upper_rr, h_v1_upper_rr, h_lower_rr, h_v2_lower_rr, _ = u_rr
+    # Unpack left and right state
+    h_upper_ll, h_v1_upper_ll, h_lower_ll, h_v2_lower_ll, _ = u_ll
+    h_upper_rr, h_v1_upper_rr, h_lower_rr, h_v2_lower_rr, _ = u_rr
 
-  # Get the averaged velocity
-  v_m_ll = (h_v1_upper_ll + h_v2_lower_ll) / (h_upper_ll + h_lower_ll)
-  v_m_rr = (h_v1_upper_rr + h_v2_lower_rr) / (h_upper_rr + h_lower_rr)
+    # Get the averaged velocity
+    v_m_ll = (h_v1_upper_ll + h_v2_lower_ll) / (h_upper_ll + h_lower_ll)
+    v_m_rr = (h_v1_upper_rr + h_v2_lower_rr) / (h_upper_rr + h_lower_rr)
 
-  # Calculate the wave celerity on the left and right
-  h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
-  h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
-  c_ll = sqrt(equations.gravity * (h_upper_ll + h_lower_ll))
-  c_rr = sqrt(equations.gravity * (h_upper_rr + h_lower_rr))
+    # Calculate the wave celerity on the left and right
+    h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
+    h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
+    c_ll = sqrt(equations.gravity * (h_upper_ll + h_lower_ll))
+    c_rr = sqrt(equations.gravity * (h_upper_rr + h_lower_rr))
 
-  return (max(abs(v_m_ll) + c_ll, abs(v_m_rr) + c_rr))
+    return (max(abs(v_m_ll) + c_ll, abs(v_m_rr) + c_rr))
 end
 
-
 # Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the bottom 
 # topography
 @inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr,
-    orientation_or_normal_direction, equations::ShallowWaterTwoLayerEquations1D)
-  λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, equations)
-  diss = -0.5 * λ * (u_rr - u_ll)
-  return SVector(diss[1], diss[2], diss[3], diss[4], zero(eltype(u_ll)))
+                                                              orientation_or_normal_direction,
+                                                              equations::ShallowWaterTwoLayerEquations1D)
+    λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction,
+                                  equations)
+    diss = -0.5 * λ * (u_rr - u_ll)
+    return SVector(diss[1], diss[2], diss[3], diss[4], zero(eltype(u_ll)))
 end
 
-
 # Absolute speed of the barotropic mode
 @inline function max_abs_speeds(u, equations::ShallowWaterTwoLayerEquations1D)
-  h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u
-  
-  # Calculate averaged velocity of both layers
-  v_m = (h_v1_upper + h_v2_lower) / (h_upper + h_lower)
-  c = sqrt(equations.gravity * (h_upper + h_lower))
+    h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u
 
-  return (abs(v_m) + c)
-end
+    # Calculate averaged velocity of both layers
+    v_m = (h_v1_upper + h_v2_lower) / (h_upper + h_lower)
+    c = sqrt(equations.gravity * (h_upper + h_lower))
 
+    return (abs(v_m) + c)
+end
 
 # Helper function to extract the velocity vector from the conservative variables
 @inline function velocity(u, equations::ShallowWaterTwoLayerEquations1D)
-  h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u
+    h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u
 
-  v1_upper = h_v1_upper / h_upper
-  v1_lower = h_v2_lower / h_lower
-  return SVector(v1_upper, v1_lower)
+    v1_upper = h_v1_upper / h_upper
+    v1_lower = h_v2_lower / h_lower
+    return SVector(v1_upper, v1_lower)
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::ShallowWaterTwoLayerEquations1D)
-  h_upper, _, h_lower, _, b = u
+    h_upper, _, h_lower, _, b = u
 
-  H_lower = h_lower + b
-  H_upper = h_lower + h_upper + b
-  v1_upper, v1_lower = velocity(u, equations)
-  return SVector(H_upper, v1_upper, H_lower, v1_lower, b)
+    H_lower = h_lower + b
+    H_upper = h_lower + h_upper + b
+    v1_upper, v1_lower = velocity(u, equations)
+    return SVector(H_upper, v1_upper, H_lower, v1_lower, b)
 end
 
-
 # Convert conservative variables to entropy variables
 # Note, only the first four are the entropy variables, the fifth entry still just carries the 
 # bottom topography values for convenience
 @inline function cons2entropy(u, equations::ShallowWaterTwoLayerEquations1D)
-  h_upper, _, h_lower, _, b = u
-  v1_upper, v1_lower = velocity(u, equations)
-
-  w1 = equations.rho_upper * (equations.gravity * (h_upper + h_lower + b) - 0.5 * v1_upper^2)
-  w2 = equations.rho_upper * v1_upper
-  w3 = equations.rho_lower * (equations.gravity * (equations.r * h_upper + h_lower + b) - 0.5 * v1_lower^2)
-  w4 = equations.rho_lower * v1_lower
-  return SVector(w1, w2, w3, w4, b)
+    h_upper, _, h_lower, _, b = u
+    v1_upper, v1_lower = velocity(u, equations)
+
+    w1 = equations.rho_upper *
+         (equations.gravity * (h_upper + h_lower + b) - 0.5 * v1_upper^2)
+    w2 = equations.rho_upper * v1_upper
+    w3 = equations.rho_lower *
+         (equations.gravity * (equations.r * h_upper + h_lower + b) - 0.5 * v1_lower^2)
+    w4 = equations.rho_lower * v1_lower
+    return SVector(w1, w2, w3, w4, b)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::ShallowWaterTwoLayerEquations1D)
-  H_upper, v1_upper, H_lower, v1_lower, b = prim
+    H_upper, v1_upper, H_lower, v1_lower, b = prim
 
-  h_lower    = H_lower - b
-  h_upper    = H_upper - h_lower - b
-  h_v1_upper = h_upper * v1_upper
-  h_v2_lower = h_lower * v1_lower
-  return SVector(h_upper, h_v1_upper, h_lower, h_v2_lower, b)
+    h_lower = H_lower - b
+    h_upper = H_upper - h_lower - b
+    h_v1_upper = h_upper * v1_upper
+    h_v2_lower = h_lower * v1_lower
+    return SVector(h_upper, h_v1_upper, h_lower, h_v2_lower, b)
 end
 
-
 @inline function waterheight(u, equations::ShallowWaterTwoLayerEquations1D)
-  return SVector(u[1], u[3])
+    return SVector(u[1], u[3])
 end
 
-
 # Entropy function for the shallow water equations is the total energy
-@inline entropy(cons, equations::ShallowWaterTwoLayerEquations1D) = energy_total(cons, equations)
-
+@inline function entropy(cons, equations::ShallowWaterTwoLayerEquations1D)
+    energy_total(cons, equations)
+end
 
 # Calculate total energy for a conservative state `cons`
 @inline function energy_total(cons, equations::ShallowWaterTwoLayerEquations1D)
-  h_upper, h_lower, h_v1_upper, h_v2_lower, b = cons
-  # Set new variables for better readability
-  g = equations.gravity
-  rho_upper = equations.rho_upper
-  rho_lower = equations.rho_lower
-
-  e = (0.5 * rho_upper * (h_v1_upper^2 / h_upper + g * h_upper^2) + 0.5 * rho_lower * (h_v2_lower^2 / h_lower + g * h_lower^2) +
-      g * rho_lower * h_lower * b + g * rho_upper * h_upper * (h_lower + b))
-  return e
+    h_upper, h_lower, h_v1_upper, h_v2_lower, b = cons
+    # Set new variables for better readability
+    g = equations.gravity
+    rho_upper = equations.rho_upper
+    rho_lower = equations.rho_lower
+
+    e = (0.5 * rho_upper * (h_v1_upper^2 / h_upper + g * h_upper^2) +
+         0.5 * rho_lower * (h_v2_lower^2 / h_lower + g * h_lower^2) +
+         g * rho_lower * h_lower * b + g * rho_upper * h_upper * (h_lower + b))
+    return e
 end
 
-
 # Calculate kinetic energy for a conservative state `cons`
 @inline function energy_kinetic(u, equations::ShallowWaterTwoLayerEquations1D)
-  h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u
-  return 0.5 * equations.rho_upper * h_v1_upper^2 / h_upper + 0.5 * equations.rho_lower * h_v2_lower^2 / h_lower
+    h_upper, h_v1_upper, h_lower, h_v2_lower, _ = u
+    return 0.5 * equations.rho_upper * h_v1_upper^2 / h_upper +
+           0.5 * equations.rho_lower * h_v2_lower^2 / h_lower
 end
 
-
 # Calculate potential energy for a conservative state `cons`
 @inline function energy_internal(cons, equations::ShallowWaterTwoLayerEquations1D)
-  return energy_total(cons, equations) - energy_kinetic(cons, equations)
+    return energy_total(cons, equations) - energy_kinetic(cons, equations)
 end
 
-
 # Calculate the error for the "lake-at-rest" test case where H = h_upper+h_lower+b should
 # be a constant value over time
 @inline function lake_at_rest_error(u, equations::ShallowWaterTwoLayerEquations1D)
-  h_upper, _, h_lower, _, b = u
-  return abs(equations.H0 - (h_upper + h_lower + b))
+    h_upper, _, h_lower, _, b = u
+    return abs(equations.H0 - (h_upper + h_lower + b))
 end
-
 end # @muladd
diff --git a/src/equations/shallow_water_two_layer_2d.jl b/src/equations/shallow_water_two_layer_2d.jl
index 60c389d8c4a..b5e52d636e4 100644
--- a/src/equations/shallow_water_two_layer_2d.jl
+++ b/src/equations/shallow_water_two_layer_2d.jl
@@ -3,6 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 @doc raw"""
     ShallowWaterTwoLayerEquations2D(gravity, H0, rho_upper, rho_lower)
@@ -71,38 +72,41 @@ A good introduction for the 2LSWE is available in Chapter 12 of the book:
     <https://www.sciencedirect.com/bookseries/international-geophysics/vol/101/suppl/C>\
     ISBN: 978-0-12-088759-0
 """
-struct ShallowWaterTwoLayerEquations2D{RealT<:Real} <: AbstractShallowWaterEquations{2, 7}
-  gravity::RealT   # gravitational constant
-  H0::RealT        # constant "lake-at-rest" total water height
-  rho_upper::RealT # lower layer density
-  rho_lower::RealT # upper layer density
-  r::RealT         # ratio of rho_upper / rho_lower
+struct ShallowWaterTwoLayerEquations2D{RealT <: Real} <:
+       AbstractShallowWaterEquations{2, 7}
+    gravity::RealT   # gravitational constant
+    H0::RealT        # constant "lake-at-rest" total water height
+    rho_upper::RealT # lower layer density
+    rho_lower::RealT # upper layer density
+    r::RealT         # ratio of rho_upper / rho_lower
 end
 
 # Allow for flexibility to set the gravitational constant within an elixir depending on the
 # application where `gravity_constant=1.0` or `gravity_constant=9.81` are common values.
 # The reference total water height H0 defaults to 0.0 but is used for the "lake-at-rest"
 # well-balancedness test cases. Densities must be specified such that rho_upper < rho_lower.
-function ShallowWaterTwoLayerEquations2D(; gravity_constant, H0=zero(gravity_constant), rho_upper, rho_lower)
-  # Assign density ratio if rho_upper <= rho_lower
-  if rho_upper > rho_lower
-    error("Invalid input: Densities must be chosen such that rho_upper <= rho_lower")
-  else
-    r = rho_upper / rho_lower
-  end
-  ShallowWaterTwoLayerEquations2D(gravity_constant, H0, rho_upper, rho_lower, r)
+function ShallowWaterTwoLayerEquations2D(; gravity_constant,
+                                         H0 = zero(gravity_constant), rho_upper,
+                                         rho_lower)
+    # Assign density ratio if rho_upper <= rho_lower
+    if rho_upper > rho_lower
+        error("Invalid input: Densities must be chosen such that rho_upper <= rho_lower")
+    else
+        r = rho_upper / rho_lower
+    end
+    ShallowWaterTwoLayerEquations2D(gravity_constant, H0, rho_upper, rho_lower, r)
 end
 
-
 have_nonconservative_terms(::ShallowWaterTwoLayerEquations2D) = True()
-varnames(::typeof(cons2cons), ::ShallowWaterTwoLayerEquations2D) = (
-    "h_upper", "h_v1_upper", "h_v2_upper", "h_lower", "h_v1_lower", "h_v2_lower", "b")                                                             
+function varnames(::typeof(cons2cons), ::ShallowWaterTwoLayerEquations2D)
+    ("h_upper", "h_v1_upper", "h_v2_upper", "h_lower", "h_v1_lower", "h_v2_lower", "b")
+end
 # Note, we use the total water height, H_upper = h_upper + h_lower + b, and first layer total height
 # H_lower = h_lower + b as the first primitive variable for easier visualization and setting initial
 # conditions
-varnames(::typeof(cons2prim), ::ShallowWaterTwoLayerEquations2D) = (
-    "H_upper", "v1_upper", "v2_upper", "H_lower", "v1_lower", "v2_lower", "b")
-
+function varnames(::typeof(cons2prim), ::ShallowWaterTwoLayerEquations2D)
+    ("H_upper", "v1_upper", "v2_upper", "H_lower", "v1_lower", "v2_lower", "b")
+end
 
 # Set initial conditions at physical location `x` for time `t`
 """
@@ -112,70 +116,111 @@ A smooth initial condition used for convergence tests in combination with
 [`source_terms_convergence_test`](@ref). Constants must be set to ``rho_{upper} = 0.9``, 
 ``rho_{lower} = 1.0``, ``g = 10.0``.
 """
-function initial_condition_convergence_test(x, t, equations::ShallowWaterTwoLayerEquations2D)
-  # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2]
-  ω = 2.0 * pi * sqrt(2.0)
-
-  H_lower  = 2.0 + 0.1 * sin(ω * x[1] + t) * cos(ω * x[2] + t)
-  H_upper  = 4.0 + 0.1 * cos(ω * x[1] + t) * sin(ω * x[2] + t)
-  v1_lower = 1.0
-  v1_upper = 0.9
-  v2_lower = 0.9
-  v2_upper = 1.0
-  b        = 1.0 + 0.1 * cos(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])
-
-  return prim2cons(SVector(H_upper, v1_upper, v2_upper, H_lower, v1_lower, v2_lower, b), equations)
+function initial_condition_convergence_test(x, t,
+                                            equations::ShallowWaterTwoLayerEquations2D)
+    # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2]
+    ω = 2.0 * pi * sqrt(2.0)
+
+    H_lower = 2.0 + 0.1 * sin(ω * x[1] + t) * cos(ω * x[2] + t)
+    H_upper = 4.0 + 0.1 * cos(ω * x[1] + t) * sin(ω * x[2] + t)
+    v1_lower = 1.0
+    v1_upper = 0.9
+    v2_lower = 0.9
+    v2_upper = 1.0
+    b = 1.0 + 0.1 * cos(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])
+
+    return prim2cons(SVector(H_upper, v1_upper, v2_upper, H_lower, v1_lower, v2_lower,
+                             b), equations)
 end
 
-
 """
     source_terms_convergence_test(u, x, t, equations::ShallowWaterTwoLayerEquations2D)
 
 Source terms used for convergence tests in combination with
 [`initial_condition_convergence_test`](@ref).
 """
-@inline function source_terms_convergence_test(u, x, t, equations::ShallowWaterTwoLayerEquations2D)
-  # Same settings as in `initial_condition_convergence_test`. 
-  # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2]
-  ω = 2.0 * pi * sqrt(2.0)
-
-  # Source terms obtained with SymPy
-  du1 = 0.01*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.01*ω*sin(t + ω*x[1])*sin(t + ω*x[2])
-  du2 = (5.0 * (-0.1*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) - 0.1*ω*sin(t + ω*x[1])*sin(t +
-         ω*x[2])) * (4.0 + 0.2cos(t + ω*x[1])*sin(t + ω*x[2]) - 0.2*sin(t + ω*x[1])*cos(t +
-         ω*x[2])) + 0.009*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.009*ω*sin(t + ω*x[1])*sin(t +
-         ω*x[2]) + 0.1*ω*(20.0 + cos(t + ω*x[1])*sin(t + ω*x[2]) - sin(t + ω*x[1])*cos(t +
-         ω*x[2])) * cos(t + ω*x[1])*cos(t + ω*x[2]))
-  du3 = (5.0 * (0.1*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.1*ω*sin(t + ω*x[1])*sin(t +
-         ω*x[2])) * (4.0 + 0.2*cos(t + ω*x[1])*sin(t + ω*x[2]) - 0.2*sin(t + ω*x[1])*cos(t +
-         ω*x[2])) + 0.01ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.01*ω*sin(t + ω*x[1])*sin(t + ω*x[2]) +
-        -0.1*ω*(20.0 + cos(t + ω*x[1])*sin(t + ω*x[2]) - sin(t + ω*x[1])*cos(t + ω*x[2]))*sin(t +
-         ω*x[1])*sin(t + ω*x[2]))
-  du4 = (0.1*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.1*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) +
-        0.05*ω*sin(0.5*ω*x[1])*sin(0.5*ω*x[2]) - 0.1*sin(t + ω*x[1])*sin(t + ω*x[2]) +
-        -0.045*ω*cos(0.5*ω*x[1])*cos(0.5*ω*x[2]) - 0.09*ω*sin(t + ω*x[1])*sin(t + ω*x[2]))
-  du5 = ((10.0 + sin(t + ω*x[1])*cos(t + ω*x[2]) - cos(0.5*ω*x[1])*sin(0.5*ω*x[2]))*(-0.09*ω*cos(t +
-         ω*x[1])*cos(t + ω*x[2]) - 0.09*ω*sin(t + ω*x[1])*sin(t + ω*x[2]) +
-        -0.05*ω*sin(0.5*ω*x[1])*sin(0.5*ω*x[2])) + 5.0 * (0.1*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) +
-         0.05*ω*sin(0.5*ω*x[1])*sin(0.5*ω*x[2])) * (2.0 + 0.2*sin(t + ω*x[1])*cos(t + ω*x[2]) +
-        -0.2*cos(0.5*ω*x[1])*sin(0.5*ω*x[2])) + 0.1*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.1*ω*cos(t +
-         ω*x[1])*cos(t + ω*x[2]) + 0.05*ω*sin(0.5*ω*x[1])*sin(0.5*ω*x[2]) - 0.1*sin(t +
-         ω*x[1])*sin(t + ω*x[2]) - 0.045*ω*cos(0.5*ω*x[1])*cos(0.5*ω*x[2]) - 0.09*ω*sin(t +
-         ω*x[1])*sin(t + ω*x[2]))
-  du6 = ((10.0 + sin(t + ω*x[1])*cos(t + ω*x[2]) +
-          -cos(0.5*ω*x[1])*sin(0.5*ω*x[2])) * (0.05*ω*cos(0.5*ω*x[1])*cos(0.5*ω*x[2]) +
-          0.09*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.09*ω*sin(t + ω*x[1])*sin(t + ω*x[2])) +
-          5.0 * (-0.05*ω*cos(0.5*ω*x[1])*cos(0.5*ω*x[2]) - 0.1*ω*sin(t + ω*x[1])*sin(t + 
-          ω*x[2])) * (2.0 + 0.2*sin(t + ω*x[1])*cos(t + ω*x[2]) +
-         -0.2*cos(0.5*ω*x[1])*sin(0.5*ω*x[2])) + 0.09cos(t + ω*x[1])*cos(t + ω*x[2]) +
-          0.09*ω*cos(t + ω*x[1])*cos(t + ω*x[2]) + 0.045*ω*sin(0.5*ω*x[1])*sin(0.5*ω*x[2]) +
-         -0.09*sin(t + ω*x[1])*sin(t + ω*x[2]) - 0.0405*ω*cos(0.5*ω*x[1])*cos(0.5*ω*x[2]) +
-         -0.081*ω*sin(t + ω*x[1])*sin(t + ω*x[2]))
-
-  return SVector(du1, du2, du3, du4, du5, du6, zero(eltype(u)))
+@inline function source_terms_convergence_test(u, x, t,
+                                               equations::ShallowWaterTwoLayerEquations2D)
+    # Same settings as in `initial_condition_convergence_test`. 
+    # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2]
+    ω = 2.0 * pi * sqrt(2.0)
+
+    # Source terms obtained with SymPy
+    du1 = 0.01 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) +
+          0.01 * ω * sin(t + ω * x[1]) * sin(t + ω * x[2])
+    du2 = (5.0 *
+           (-0.1 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) -
+            0.1 * ω * sin(t + ω * x[1]) * sin(t +
+                                              ω * x[2])) *
+           (4.0 + 0.2cos(t + ω * x[1]) * sin(t + ω * x[2]) -
+            0.2 * sin(t + ω * x[1]) * cos(t +
+                                          ω * x[2])) +
+           0.009 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) +
+           0.009 * ω * sin(t + ω * x[1]) * sin(t +
+                                               ω * x[2]) +
+           0.1 * ω *
+           (20.0 + cos(t + ω * x[1]) * sin(t + ω * x[2]) -
+            sin(t + ω * x[1]) * cos(t +
+                                    ω * x[2])) * cos(t + ω * x[1]) * cos(t + ω * x[2]))
+    du3 = (5.0 *
+           (0.1 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) +
+            0.1 * ω * sin(t + ω * x[1]) * sin(t +
+                                              ω * x[2])) *
+           (4.0 + 0.2 * cos(t + ω * x[1]) * sin(t + ω * x[2]) -
+            0.2 * sin(t + ω * x[1]) * cos(t +
+                                          ω * x[2])) +
+           0.01ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) +
+           0.01 * ω * sin(t + ω * x[1]) * sin(t + ω * x[2]) +
+           -0.1 * ω *
+           (20.0 + cos(t + ω * x[1]) * sin(t + ω * x[2]) -
+            sin(t + ω * x[1]) * cos(t + ω * x[2])) * sin(t +
+                                                         ω * x[1]) * sin(t + ω * x[2]))
+    du4 = (0.1 * cos(t + ω * x[1]) * cos(t + ω * x[2]) +
+           0.1 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) +
+           0.05 * ω * sin(0.5 * ω * x[1]) * sin(0.5 * ω * x[2]) -
+           0.1 * sin(t + ω * x[1]) * sin(t + ω * x[2]) +
+           -0.045 * ω * cos(0.5 * ω * x[1]) * cos(0.5 * ω * x[2]) -
+           0.09 * ω * sin(t + ω * x[1]) * sin(t + ω * x[2]))
+    du5 = ((10.0 + sin(t + ω * x[1]) * cos(t + ω * x[2]) -
+            cos(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])) * (-0.09 * ω * cos(t +
+                            ω * x[1]) * cos(t + ω * x[2]) -
+            0.09 * ω * sin(t + ω * x[1]) * sin(t + ω * x[2]) +
+            -0.05 * ω * sin(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])) +
+           5.0 *
+           (0.1 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) +
+            0.05 * ω * sin(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])) *
+           (2.0 + 0.2 * sin(t + ω * x[1]) * cos(t + ω * x[2]) +
+            -0.2 * cos(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])) +
+           0.1 * cos(t + ω * x[1]) * cos(t + ω * x[2]) +
+           0.1 * ω * cos(t +
+                         ω * x[1]) * cos(t + ω * x[2]) +
+           0.05 * ω * sin(0.5 * ω * x[1]) * sin(0.5 * ω * x[2]) -
+           0.1 * sin(t +
+                     ω * x[1]) * sin(t + ω * x[2]) -
+           0.045 * ω * cos(0.5 * ω * x[1]) * cos(0.5 * ω * x[2]) -
+           0.09 * ω * sin(t +
+                          ω * x[1]) * sin(t + ω * x[2]))
+    du6 = ((10.0 + sin(t + ω * x[1]) * cos(t + ω * x[2]) +
+            -cos(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])) *
+           (0.05 * ω * cos(0.5 * ω * x[1]) * cos(0.5 * ω * x[2]) +
+            0.09 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) +
+            0.09 * ω * sin(t + ω * x[1]) * sin(t + ω * x[2])) +
+           5.0 *
+           (-0.05 * ω * cos(0.5 * ω * x[1]) * cos(0.5 * ω * x[2]) -
+            0.1 * ω * sin(t + ω * x[1]) * sin(t +
+                                              ω * x[2])) *
+           (2.0 + 0.2 * sin(t + ω * x[1]) * cos(t + ω * x[2]) +
+            -0.2 * cos(0.5 * ω * x[1]) * sin(0.5 * ω * x[2])) +
+           0.09cos(t + ω * x[1]) * cos(t + ω * x[2]) +
+           0.09 * ω * cos(t + ω * x[1]) * cos(t + ω * x[2]) +
+           0.045 * ω * sin(0.5 * ω * x[1]) * sin(0.5 * ω * x[2]) +
+           -0.09 * sin(t + ω * x[1]) * sin(t + ω * x[2]) -
+           0.0405 * ω * cos(0.5 * ω * x[1]) * cos(0.5 * ω * x[2]) +
+           -0.081 * ω * sin(t + ω * x[1]) * sin(t + ω * x[2]))
+
+    return SVector(du1, du2, du3, du4, du5, du6, zero(eltype(u)))
 end
 
-
 """
     boundary_condition_slip_wall(u_inner, normal_direction, x, t, surface_flux_function,
                                  equations::ShallowWaterTwoLayerEquations2D)
@@ -193,85 +238,84 @@ For details see Section 9.2.5 of the book:
 @inline function boundary_condition_slip_wall(u_inner, normal_direction::AbstractVector,
                                               x, t, surface_flux_function,
                                               equations::ShallowWaterTwoLayerEquations2D)
-  # normalize the outward pointing direction
-  normal = normal_direction / norm(normal_direction)
-
-  # compute the normal velocity
-  v_normal_upper = normal[1] * u_inner[2] + normal[2] * u_inner[3]
-  v_normal_lower = normal[1] * u_inner[5] + normal[2] * u_inner[6]
-
-  # create the "external" boundary solution state
-  u_boundary = SVector(u_inner[1],
-                       u_inner[2] - 2.0 * v_normal_upper * normal[1],
-                       u_inner[3] - 2.0 * v_normal_upper * normal[2],
-                       u_inner[4],
-                       u_inner[5] - 2.0 * v_normal_lower * normal[1],
-                       u_inner[6] - 2.0 * v_normal_lower * normal[2],
-                       u_inner[7])
-
-  # calculate the boundary flux
-  flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations)
-  return flux
+    # normalize the outward pointing direction
+    normal = normal_direction / norm(normal_direction)
+
+    # compute the normal velocity
+    v_normal_upper = normal[1] * u_inner[2] + normal[2] * u_inner[3]
+    v_normal_lower = normal[1] * u_inner[5] + normal[2] * u_inner[6]
+
+    # create the "external" boundary solution state
+    u_boundary = SVector(u_inner[1],
+                         u_inner[2] - 2.0 * v_normal_upper * normal[1],
+                         u_inner[3] - 2.0 * v_normal_upper * normal[2],
+                         u_inner[4],
+                         u_inner[5] - 2.0 * v_normal_lower * normal[1],
+                         u_inner[6] - 2.0 * v_normal_lower * normal[2],
+                         u_inner[7])
+
+    # calculate the boundary flux
+    flux = surface_flux_function(u_inner, u_boundary, normal_direction, equations)
+    return flux
 end
 
-
 # Calculate 1D flux for a single point
 # Note, the bottom topography has no flux
-@inline function flux(u, orientation::Integer, equations::ShallowWaterTwoLayerEquations2D)
-  h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, _ = u
-
-  # Calculate velocities
-  v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations)
-
-  # Calculate pressure
-  p1 = 0.5 * equations.gravity * h_upper^2
-  p2 = 0.5 * equations.gravity * h_lower^2
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    f1 = h_v1_upper
-    f2 = h_v1_upper * v1_upper + p1
-    f3 = h_v1_upper * v2_upper
-    f4 = h_v1_lower
-    f5 = h_v1_lower * v1_lower + p2
-    f6 = h_v1_lower * v2_lower
-  else
-    f1 = h_v2_upper
-    f2 = h_v2_upper * v1_upper
-    f3 = h_v2_upper * v2_upper + p1
-    f4 = h_v2_lower
-    f5 = h_v2_lower * v1_lower
-    f6 = h_v2_lower * v2_lower + p2
-  end
-  return SVector(f1, f2, f3, f4, f5 , f6, zero(eltype(u)))
+@inline function flux(u, orientation::Integer,
+                      equations::ShallowWaterTwoLayerEquations2D)
+    h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, _ = u
+
+    # Calculate velocities
+    v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations)
+
+    # Calculate pressure
+    p1 = 0.5 * equations.gravity * h_upper^2
+    p2 = 0.5 * equations.gravity * h_lower^2
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        f1 = h_v1_upper
+        f2 = h_v1_upper * v1_upper + p1
+        f3 = h_v1_upper * v2_upper
+        f4 = h_v1_lower
+        f5 = h_v1_lower * v1_lower + p2
+        f6 = h_v1_lower * v2_lower
+    else
+        f1 = h_v2_upper
+        f2 = h_v2_upper * v1_upper
+        f3 = h_v2_upper * v2_upper + p1
+        f4 = h_v2_lower
+        f5 = h_v2_lower * v1_lower
+        f6 = h_v2_lower * v2_lower + p2
+    end
+    return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u)))
 end
 
 # Calculate 1D flux for a single point in the normal direction
 # Note, this directional vector is not normalized and the bottom topography has no flux
-@inline function flux(u, normal_direction::AbstractVector, 
+@inline function flux(u, normal_direction::AbstractVector,
                       equations::ShallowWaterTwoLayerEquations2D)
-  h_upper, h_lower = waterheight(u, equations)
-  v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations)
+    h_upper, h_lower = waterheight(u, equations)
+    v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations)
 
-  v_normal_upper   = v1_upper * normal_direction[1] + v2_upper * normal_direction[2]
-  v_normal_lower   = v1_lower * normal_direction[1] + v2_lower * normal_direction[2]
-  h_v_upper_normal = h_upper * v_normal_upper
-  h_v_lower_normal = h_lower * v_normal_lower
+    v_normal_upper = v1_upper * normal_direction[1] + v2_upper * normal_direction[2]
+    v_normal_lower = v1_lower * normal_direction[1] + v2_lower * normal_direction[2]
+    h_v_upper_normal = h_upper * v_normal_upper
+    h_v_lower_normal = h_lower * v_normal_lower
 
-  p1 = 0.5 * equations.gravity * h_upper^2
-  p2 = 0.5 * equations.gravity * h_lower^2
+    p1 = 0.5 * equations.gravity * h_upper^2
+    p2 = 0.5 * equations.gravity * h_lower^2
 
-  f1 = h_v_upper_normal
-  f2 = h_v_upper_normal * v1_upper + p1 * normal_direction[1]
-  f3 = h_v_upper_normal * v2_upper + p1 * normal_direction[2]
-  f4 = h_v_lower_normal
-  f5 = h_v_lower_normal * v1_lower + p2 * normal_direction[1]
-  f6 = h_v_lower_normal * v2_lower + p2 * normal_direction[2]
+    f1 = h_v_upper_normal
+    f2 = h_v_upper_normal * v1_upper + p1 * normal_direction[1]
+    f3 = h_v_upper_normal * v2_upper + p1 * normal_direction[2]
+    f4 = h_v_lower_normal
+    f5 = h_v_lower_normal * v1_lower + p2 * normal_direction[1]
+    f6 = h_v_lower_normal * v2_lower + p2 * normal_direction[2]
 
-  return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u)))
+    return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u)))
 end
 
-
 """
     flux_nonconservative_wintermeyer_etal(u_ll, u_rr, orientation::Integer,
                                           equations::ShallowWaterTwoLayerEquations2D)
@@ -290,58 +334,61 @@ Further details are available in the paper:
   shallow water equations on unstructured curvilinear meshes with discontinuous bathymetry
   [DOI: 10.1016/j.jcp.2017.03.036](https://doi.org/10.1016/j.jcp.2017.03.036)
 """
-@inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr, 
+@inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr,
                                                        orientation::Integer,
                                                        equations::ShallowWaterTwoLayerEquations2D)
-  # Pull the necessary left and right state information
-  h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
-  h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
-  b_rr = u_rr[7]
-
-  z = zero(eltype(u_ll))
-
-  # Bottom gradient nonconservative term: (0, g*h_upper*(b + h_lower)_x, g*h_upper*(b + h_lower)_y ,
-  #                                        0, g*h_lower*(b + r*h_upper)_x, 
-  #                                        g*h_lower*(b + r*h_upper)_y, 0)
-  if orientation == 1
-    f = SVector(z,
-    equations.gravity * h_upper_ll * (b_rr + h_lower_rr),
-    z,z,
-    equations.gravity * h_lower_ll * (b_rr + equations.r * h_upper_rr),
-    z,z)
-  else # orientation == 2
-    f = SVector(z, z,
-    equations.gravity * h_upper_ll * (b_rr + h_lower_rr),
-    z,z,
-    equations.gravity * h_lower_ll * (b_rr + equations.r * h_upper_rr),
-    z)
-  end
-
-  return f
+    # Pull the necessary left and right state information
+    h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
+    h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
+    b_rr = u_rr[7]
+
+    z = zero(eltype(u_ll))
+
+    # Bottom gradient nonconservative term: (0, g*h_upper*(b + h_lower)_x, g*h_upper*(b + h_lower)_y ,
+    #                                        0, g*h_lower*(b + r*h_upper)_x, 
+    #                                        g*h_lower*(b + r*h_upper)_y, 0)
+    if orientation == 1
+        f = SVector(z,
+                    equations.gravity * h_upper_ll * (b_rr + h_lower_rr),
+                    z, z,
+                    equations.gravity * h_lower_ll * (b_rr + equations.r * h_upper_rr),
+                    z, z)
+    else # orientation == 2
+        f = SVector(z, z,
+                    equations.gravity * h_upper_ll * (b_rr + h_lower_rr),
+                    z, z,
+                    equations.gravity * h_lower_ll * (b_rr + equations.r * h_upper_rr),
+                    z)
+    end
+
+    return f
 end
 
 @inline function flux_nonconservative_wintermeyer_etal(u_ll, u_rr,
                                                        normal_direction_ll::AbstractVector,
                                                        normal_direction_average::AbstractVector,
                                                        equations::ShallowWaterTwoLayerEquations2D)
-  # Pull the necessary left and right state information
-  h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
-  h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
-  b_rr = u_rr[7]
-
-  # Note this routine only uses the `normal_direction_average` and the average of the
-  # bottom topography to get a quadratic split form DG gradient on curved elements
-  return SVector(zero(eltype(u_ll)),
-                 normal_direction_average[1] * equations.gravity * h_upper_ll * (b_rr + h_lower_rr),
-                 normal_direction_average[2] * equations.gravity * h_upper_ll * (b_rr + h_lower_rr),
-                 zero(eltype(u_ll)),
-                 normal_direction_average[1] * equations.gravity * h_lower_ll * (b_rr + 
+    # Pull the necessary left and right state information
+    h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
+    h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
+    b_rr = u_rr[7]
+
+    # Note this routine only uses the `normal_direction_average` and the average of the
+    # bottom topography to get a quadratic split form DG gradient on curved elements
+    return SVector(zero(eltype(u_ll)),
+                   normal_direction_average[1] * equations.gravity * h_upper_ll *
+                   (b_rr + h_lower_rr),
+                   normal_direction_average[2] * equations.gravity * h_upper_ll *
+                   (b_rr + h_lower_rr),
+                   zero(eltype(u_ll)),
+                   normal_direction_average[1] * equations.gravity * h_lower_ll *
+                   (b_rr +
                     equations.r * h_upper_rr),
-                 normal_direction_average[2] * equations.gravity * h_lower_ll * (b_rr +
+                   normal_direction_average[2] * equations.gravity * h_lower_ll *
+                   (b_rr +
                     equations.r * h_upper_rr),
-                 zero(eltype(u_ll)))
-  end
-
+                   zero(eltype(u_ll)))
+end
 
 """
     flux_nonconservative_fjordholm_etal(u_ll, u_rr, orientation::Integer,
@@ -362,90 +409,99 @@ It should be noted that the equations are ordered differently and the
 designation of the upper and lower layer has been changed which leads to a slightly different
 formulation.
 """
-@inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr, 
+@inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr,
                                                      orientation::Integer,
                                                      equations::ShallowWaterTwoLayerEquations2D)
-  # Pull the necessary left and right state information
-  h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, b_ll = u_ll
-  h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, b_rr = u_rr
-
-  # Create average and jump values
-  h_upper_average = 0.5 * (h_upper_ll + h_upper_rr)
-  h_lower_average = 0.5 * (h_lower_ll + h_lower_rr)
-  h_upper_jump    = h_upper_rr - h_upper_ll
-  h_lower_jump    = h_lower_rr - h_lower_ll
-  b_jump     = b_rr  - b_ll
-
-  # Assign variables for constants for better readability
-  g = equations.gravity
-
-  # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, g*h_upper*(b+h_lower)_y, 0,
-  #                                        g*h_lower*(b+r*h_upper)_x, g*h_lower*(b+r*h_upper)_x, 0)
-
-  # Includes two parts:
-  #   (i)  Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid
-  #        cross-averaging across a discontinuous bottom topography
-  #   (ii) True surface part that uses `h_average` and `b_jump` to handle discontinuous bathymetry
-  z = zero(eltype(u_ll))
-  if orientation == 1 
-    f = SVector(
-      z,
-      g * h_upper_ll * (b_ll +   h_lower_ll) + g * h_upper_average * (b_jump +   h_lower_jump),
-      z,z,
-      g * h_lower_ll * (b_ll + equations.r * h_upper_ll) + g * h_lower_average * (b_jump +
-          equations.r * h_upper_jump),
-      z,z)
-  else # orientation == 2
-    f = SVector(
-      z,z,
-      g * h_upper_ll * (b_ll +   h_lower_ll) + g * h_upper_average * (b_jump +   h_lower_jump),
-      z,z,
-      g * h_lower_ll * (b_ll + equations.r * h_upper_ll) + g * h_lower_average * (b_jump + 
-          equations.r * h_upper_jump),
-      z)
-  end
-
-  return f
+    # Pull the necessary left and right state information
+    h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, b_ll = u_ll
+    h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, b_rr = u_rr
+
+    # Create average and jump values
+    h_upper_average = 0.5 * (h_upper_ll + h_upper_rr)
+    h_lower_average = 0.5 * (h_lower_ll + h_lower_rr)
+    h_upper_jump = h_upper_rr - h_upper_ll
+    h_lower_jump = h_lower_rr - h_lower_ll
+    b_jump = b_rr - b_ll
+
+    # Assign variables for constants for better readability
+    g = equations.gravity
+
+    # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, g*h_upper*(b+h_lower)_y, 0,
+    #                                        g*h_lower*(b+r*h_upper)_x, g*h_lower*(b+r*h_upper)_x, 0)
+
+    # Includes two parts:
+    #   (i)  Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid
+    #        cross-averaging across a discontinuous bottom topography
+    #   (ii) True surface part that uses `h_average` and `b_jump` to handle discontinuous bathymetry
+    z = zero(eltype(u_ll))
+    if orientation == 1
+        f = SVector(z,
+                    g * h_upper_ll * (b_ll + h_lower_ll) +
+                    g * h_upper_average * (b_jump + h_lower_jump),
+                    z, z,
+                    g * h_lower_ll * (b_ll + equations.r * h_upper_ll) +
+                    g * h_lower_average * (b_jump +
+                                           equations.r * h_upper_jump),
+                    z, z)
+    else # orientation == 2
+        f = SVector(z, z,
+                    g * h_upper_ll * (b_ll + h_lower_ll) +
+                    g * h_upper_average * (b_jump + h_lower_jump),
+                    z, z,
+                    g * h_lower_ll * (b_ll + equations.r * h_upper_ll) +
+                    g * h_lower_average * (b_jump +
+                                           equations.r * h_upper_jump),
+                    z)
+    end
+
+    return f
 end
 
 @inline function flux_nonconservative_fjordholm_etal(u_ll, u_rr,
                                                      normal_direction_ll::AbstractVector,
                                                      normal_direction_average::AbstractVector,
                                                      equations::ShallowWaterTwoLayerEquations2D)
-  # Pull the necessary left and right state information
-  h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, b_ll = u_ll
-  h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, b_rr = u_rr
-
-  # Create average and jump values
-  h_upper_average = 0.5 * (h_upper_ll + h_upper_rr)
-  h_lower_average = 0.5 * (h_lower_ll + h_lower_rr)
-  h_upper_jump    = h_upper_rr - h_upper_ll
-  h_lower_jump    = h_lower_rr - h_lower_ll
-  b_jump          = b_rr  - b_ll
-
-  # Comes in two parts:
-  #   (i)  Diagonal (consistent) term from the volume flux that uses `normal_direction_average`
-  #        but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography
-  f2 = normal_direction_average[1] * equations.gravity*h_upper_ll*(b_ll +     h_lower_ll)
-  f3 = normal_direction_average[2] * equations.gravity*h_upper_ll*(b_ll +     h_lower_ll)
-  f5 = normal_direction_average[1] * equations.gravity*h_lower_ll*(b_ll + equations.r * h_upper_ll)
-  f6 = normal_direction_average[2] * equations.gravity*h_lower_ll*(b_ll + equations.r * h_upper_ll)
-  #   (ii) True surface part that uses `normal_direction_ll`, `h_average` and `b_jump`
-  #        to handle discontinuous bathymetry
-  f2 += normal_direction_ll[1] * equations.gravity*h_upper_average*(b_jump +     h_lower_jump)
-  f3 += normal_direction_ll[2] * equations.gravity*h_upper_average*(b_jump +     h_lower_jump)
-  f5 += normal_direction_ll[1] * equations.gravity*h_lower_average*(b_jump + 
-                                                                    equations.r * h_upper_jump)
-  f6 += normal_direction_ll[2] * equations.gravity*h_lower_average*(b_jump +
-                                                                    equations.r * h_upper_jump)
-
-  # Continuity equations do not have a nonconservative flux
-  f1 = f4 = zero(eltype(u_ll))
-
-return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll)))
+    # Pull the necessary left and right state information
+    h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, b_ll = u_ll
+    h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, b_rr = u_rr
+
+    # Create average and jump values
+    h_upper_average = 0.5 * (h_upper_ll + h_upper_rr)
+    h_lower_average = 0.5 * (h_lower_ll + h_lower_rr)
+    h_upper_jump = h_upper_rr - h_upper_ll
+    h_lower_jump = h_lower_rr - h_lower_ll
+    b_jump = b_rr - b_ll
+
+    # Comes in two parts:
+    #   (i)  Diagonal (consistent) term from the volume flux that uses `normal_direction_average`
+    #        but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography
+    f2 = normal_direction_average[1] * equations.gravity * h_upper_ll *
+         (b_ll + h_lower_ll)
+    f3 = normal_direction_average[2] * equations.gravity * h_upper_ll *
+         (b_ll + h_lower_ll)
+    f5 = normal_direction_average[1] * equations.gravity * h_lower_ll *
+         (b_ll + equations.r * h_upper_ll)
+    f6 = normal_direction_average[2] * equations.gravity * h_lower_ll *
+         (b_ll + equations.r * h_upper_ll)
+    #   (ii) True surface part that uses `normal_direction_ll`, `h_average` and `b_jump`
+    #        to handle discontinuous bathymetry
+    f2 += normal_direction_ll[1] * equations.gravity * h_upper_average *
+          (b_jump + h_lower_jump)
+    f3 += normal_direction_ll[2] * equations.gravity * h_upper_average *
+          (b_jump + h_lower_jump)
+    f5 += normal_direction_ll[1] * equations.gravity * h_lower_average *
+          (b_jump +
+           equations.r * h_upper_jump)
+    f6 += normal_direction_ll[2] * equations.gravity * h_lower_average *
+          (b_jump +
+           equations.r * h_upper_jump)
+
+    # Continuity equations do not have a nonconservative flux
+    f1 = f4 = zero(eltype(u_ll))
+
+    return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll)))
 end
 
-
 """
     flux_fjordholm_etal(u_ll, u_rr, orientation,
                         equations::ShallowWaterTwoLayerEquations2D)
@@ -467,83 +523,86 @@ designation of the upper and lower layer has been changed which leads to a sligh
 formulation.
 """
 @inline function flux_fjordholm_etal(u_ll, u_rr,
-                                     orientation::Integer, 
+                                     orientation::Integer,
                                      equations::ShallowWaterTwoLayerEquations2D)
-  # Unpack left and right state
-  h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
-  v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations)
-  h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
-  v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations)
-
-  # Average each factor of products in flux
-  h_upper_avg  = 0.5 * (h_upper_ll  + h_upper_rr )
-  h_lower_avg  = 0.5 * (h_lower_ll  + h_lower_rr )
-  v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr )
-  v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr )
-  v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr )
-  v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr )
-  p1_avg = 0.25 * equations.gravity * (h_upper_ll^2 + h_upper_rr^2)
-  p2_avg = 0.25 * equations.gravity * (h_lower_ll^2 + h_lower_rr^2)
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    f1 = h_upper_avg * v1_upper_avg
-    f2 = f1 * v1_upper_avg + p1_avg
-    f3 = f1 * v2_upper_avg
-    f4 = h_lower_avg * v1_lower_avg
-    f5 = f4 * v1_lower_avg + p2_avg
-    f6 = f4 * v2_lower_avg
-  else
-    f1 = h_upper_avg * v2_upper_avg
-    f2 = f1 * v1_upper_avg
-    f3 = f1 * v2_upper_avg + p1_avg
-    f4 = h_lower_avg * v2_lower_avg
-    f5 = f4 * v1_lower_avg
-    f6 = f4 * v2_lower_avg + p2_avg
-  end
-
-  return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll)))
+    # Unpack left and right state
+    h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
+    v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations)
+    h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
+    v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations)
+
+    # Average each factor of products in flux
+    h_upper_avg = 0.5 * (h_upper_ll + h_upper_rr)
+    h_lower_avg = 0.5 * (h_lower_ll + h_lower_rr)
+    v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr)
+    v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr)
+    v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr)
+    v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr)
+    p1_avg = 0.25 * equations.gravity * (h_upper_ll^2 + h_upper_rr^2)
+    p2_avg = 0.25 * equations.gravity * (h_lower_ll^2 + h_lower_rr^2)
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        f1 = h_upper_avg * v1_upper_avg
+        f2 = f1 * v1_upper_avg + p1_avg
+        f3 = f1 * v2_upper_avg
+        f4 = h_lower_avg * v1_lower_avg
+        f5 = f4 * v1_lower_avg + p2_avg
+        f6 = f4 * v2_lower_avg
+    else
+        f1 = h_upper_avg * v2_upper_avg
+        f2 = f1 * v1_upper_avg
+        f3 = f1 * v2_upper_avg + p1_avg
+        f4 = h_lower_avg * v2_lower_avg
+        f5 = f4 * v1_lower_avg
+        f6 = f4 * v2_lower_avg + p2_avg
+    end
+
+    return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll)))
 end
 
 @inline function flux_fjordholm_etal(u_ll, u_rr,
                                      normal_direction::AbstractVector,
                                      equations::ShallowWaterTwoLayerEquations2D)
-  # Unpack left and right state
-  h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
-  v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations)
-  h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
-  v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations)
-
-  # Compute velocity in normal direction
-  v_upper_dot_n_ll = v1_upper_ll * normal_direction[1] + v2_upper_ll * normal_direction[2]
-  v_upper_dot_n_rr = v1_upper_rr * normal_direction[1] + v2_upper_rr * normal_direction[2]
-  v_lower_dot_n_ll = v1_lower_ll * normal_direction[1] + v2_lower_ll * normal_direction[2]
-  v_lower_dot_n_rr = v1_lower_rr * normal_direction[1] + v2_lower_rr * normal_direction[2]
-
-  # Average each factor of products in flux
-  h_upper_avg  = 0.5 * (h_upper_ll   + h_upper_rr )
-  h_lower_avg  = 0.5 * (h_lower_ll   + h_lower_rr )
-  v1_upper_avg = 0.5 * (v1_upper_ll   + v1_upper_rr )
-  v1_lower_avg = 0.5 * (v1_lower_ll   + v1_lower_rr )
-  v2_upper_avg = 0.5 * (v2_upper_ll   + v2_upper_rr )
-  v2_lower_avg = 0.5 * (v2_lower_ll   + v2_lower_rr )
-  p1_avg = 0.25* equations.gravity * (h_upper_ll^2 + h_upper_rr^2)
-  p2_avg = 0.25* equations.gravity * (h_lower_ll^2 + h_lower_rr^2)
-  v_upper_dot_n_avg = 0.5 * (v_upper_dot_n_ll + v_upper_dot_n_rr)
-  v_lower_dot_n_avg = 0.5 * (v_lower_dot_n_ll + v_lower_dot_n_rr)
-
-  # Calculate fluxes depending on normal_direction
-  f1 = h_upper_avg * v_upper_dot_n_avg
-  f2 = f1 * v1_upper_avg + p1_avg * normal_direction[1]
-  f3 = f1 * v2_upper_avg + p1_avg * normal_direction[2]
-  f4 = h_lower_avg * v_lower_dot_n_avg
-  f5 = f4 * v1_lower_avg + p2_avg * normal_direction[1]
-  f6 = f4 * v2_lower_avg + p2_avg * normal_direction[2]
-
-  return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll)))
+    # Unpack left and right state
+    h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
+    v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations)
+    h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
+    v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations)
+
+    # Compute velocity in normal direction
+    v_upper_dot_n_ll = v1_upper_ll * normal_direction[1] +
+                       v2_upper_ll * normal_direction[2]
+    v_upper_dot_n_rr = v1_upper_rr * normal_direction[1] +
+                       v2_upper_rr * normal_direction[2]
+    v_lower_dot_n_ll = v1_lower_ll * normal_direction[1] +
+                       v2_lower_ll * normal_direction[2]
+    v_lower_dot_n_rr = v1_lower_rr * normal_direction[1] +
+                       v2_lower_rr * normal_direction[2]
+
+    # Average each factor of products in flux
+    h_upper_avg = 0.5 * (h_upper_ll + h_upper_rr)
+    h_lower_avg = 0.5 * (h_lower_ll + h_lower_rr)
+    v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr)
+    v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr)
+    v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr)
+    v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr)
+    p1_avg = 0.25 * equations.gravity * (h_upper_ll^2 + h_upper_rr^2)
+    p2_avg = 0.25 * equations.gravity * (h_lower_ll^2 + h_lower_rr^2)
+    v_upper_dot_n_avg = 0.5 * (v_upper_dot_n_ll + v_upper_dot_n_rr)
+    v_lower_dot_n_avg = 0.5 * (v_lower_dot_n_ll + v_lower_dot_n_rr)
+
+    # Calculate fluxes depending on normal_direction
+    f1 = h_upper_avg * v_upper_dot_n_avg
+    f2 = f1 * v1_upper_avg + p1_avg * normal_direction[1]
+    f3 = f1 * v2_upper_avg + p1_avg * normal_direction[2]
+    f4 = h_lower_avg * v_lower_dot_n_avg
+    f5 = f4 * v1_lower_avg + p2_avg * normal_direction[1]
+    f6 = f4 * v2_lower_avg + p2_avg * normal_direction[2]
+
+    return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll)))
 end
 
-
 """
     flux_wintermeyer_etal(u_ll, u_rr, orientation,
                           equations::ShallowWaterTwoLayerEquations2D)
@@ -563,77 +622,76 @@ Further details are available in Theorem 1 of the paper:
 @inline function flux_wintermeyer_etal(u_ll, u_rr,
                                        orientation::Integer,
                                        equations::ShallowWaterTwoLayerEquations2D)
-  # Unpack left and right state
-  h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, _ = u_ll
-  h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, _ = u_rr
-
-  # Get the velocities on either side
-  v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations)
-  v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations)
-
-  # Average each factor of products in flux
-  v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr )
-  v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr )
-  v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr )
-  v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr )
-  p1_avg = 0.5 * equations.gravity * h_upper_ll * h_upper_rr
-  p2_avg = 0.5 * equations.gravity * h_lower_ll * h_lower_rr
-
-  # Calculate fluxes depending on orientation
-  if orientation == 1
-    f1 = 0.5 * (h_v1_upper_ll + h_v1_upper_rr)
-    f2 = f1 * v1_upper_avg + p1_avg
-    f3 = f1 * v2_upper_avg
-    f4 = 0.5 * (h_v1_lower_ll + h_v1_lower_rr)
-    f5 = f4 * v1_lower_avg + p2_avg
-    f6 = f4 * v2_lower_avg
-  else
-    f1 = 0.5 * (h_v2_upper_ll + h_v2_upper_rr)
-    f2 = f1 * v1_upper_avg
-    f3 = f1 * v2_upper_avg + p1_avg
-    f4 = 0.5 * (h_v2_lower_ll + h_v2_lower_rr)
-    f5 = f4 * v1_lower_avg
-    f6 = f4 * v2_lower_avg + p2_avg
-  end
-
-  return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll)))
+    # Unpack left and right state
+    h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, _ = u_ll
+    h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, _ = u_rr
+
+    # Get the velocities on either side
+    v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations)
+    v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations)
+
+    # Average each factor of products in flux
+    v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr)
+    v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr)
+    v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr)
+    v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr)
+    p1_avg = 0.5 * equations.gravity * h_upper_ll * h_upper_rr
+    p2_avg = 0.5 * equations.gravity * h_lower_ll * h_lower_rr
+
+    # Calculate fluxes depending on orientation
+    if orientation == 1
+        f1 = 0.5 * (h_v1_upper_ll + h_v1_upper_rr)
+        f2 = f1 * v1_upper_avg + p1_avg
+        f3 = f1 * v2_upper_avg
+        f4 = 0.5 * (h_v1_lower_ll + h_v1_lower_rr)
+        f5 = f4 * v1_lower_avg + p2_avg
+        f6 = f4 * v2_lower_avg
+    else
+        f1 = 0.5 * (h_v2_upper_ll + h_v2_upper_rr)
+        f2 = f1 * v1_upper_avg
+        f3 = f1 * v2_upper_avg + p1_avg
+        f4 = 0.5 * (h_v2_lower_ll + h_v2_lower_rr)
+        f5 = f4 * v1_lower_avg
+        f6 = f4 * v2_lower_avg + p2_avg
+    end
+
+    return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll)))
 end
 
 @inline function flux_wintermeyer_etal(u_ll, u_rr,
                                        normal_direction::AbstractVector,
                                        equations::ShallowWaterTwoLayerEquations2D)
-  # Unpack left and right state
-  h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, _ = u_ll
-  h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, _ = u_rr
-
-  # Get the velocities on either side
-  v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations)
-  v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations)
-
-  # Average each factor of products in flux
-  v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr )
-  v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr )
-  v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr )
-  v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr )
-  p1_avg = 0.5 * equations.gravity * h_upper_ll * h_upper_rr
-  p2_avg = 0.5 * equations.gravity * h_lower_ll * h_lower_rr
-  h_v1_upper_avg = 0.5 * (h_v1_upper_ll + h_v1_upper_rr )
-  h_v2_upper_avg = 0.5 * (h_v2_upper_ll + h_v2_upper_rr )
-  h_v1_lower_avg = 0.5 * (h_v1_lower_ll + h_v1_lower_rr )
-  h_v2_lower_avg = 0.5 * (h_v2_lower_ll + h_v2_lower_rr )
-
-  # Calculate fluxes depending on normal_direction
-  f1 = h_v1_upper_avg * normal_direction[1] + h_v2_upper_avg * normal_direction[2]
-  f2 = f1 * v1_upper_avg + p1_avg * normal_direction[1]
-  f3 = f1 * v2_upper_avg + p1_avg * normal_direction[2]
-  f4 = h_v1_lower_avg * normal_direction[1] + h_v2_lower_avg * normal_direction[2]
-  f5 = f4 * v1_lower_avg + p2_avg * normal_direction[1]
-  f6 = f4 * v2_lower_avg + p2_avg * normal_direction[2]
-
-  return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll)))
+    # Unpack left and right state
+    h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, _ = u_ll
+    h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, _ = u_rr
+
+    # Get the velocities on either side
+    v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations)
+    v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations)
+
+    # Average each factor of products in flux
+    v1_upper_avg = 0.5 * (v1_upper_ll + v1_upper_rr)
+    v1_lower_avg = 0.5 * (v1_lower_ll + v1_lower_rr)
+    v2_upper_avg = 0.5 * (v2_upper_ll + v2_upper_rr)
+    v2_lower_avg = 0.5 * (v2_lower_ll + v2_lower_rr)
+    p1_avg = 0.5 * equations.gravity * h_upper_ll * h_upper_rr
+    p2_avg = 0.5 * equations.gravity * h_lower_ll * h_lower_rr
+    h_v1_upper_avg = 0.5 * (h_v1_upper_ll + h_v1_upper_rr)
+    h_v2_upper_avg = 0.5 * (h_v2_upper_ll + h_v2_upper_rr)
+    h_v1_lower_avg = 0.5 * (h_v1_lower_ll + h_v1_lower_rr)
+    h_v2_lower_avg = 0.5 * (h_v2_lower_ll + h_v2_lower_rr)
+
+    # Calculate fluxes depending on normal_direction
+    f1 = h_v1_upper_avg * normal_direction[1] + h_v2_upper_avg * normal_direction[2]
+    f2 = f1 * v1_upper_avg + p1_avg * normal_direction[1]
+    f3 = f1 * v2_upper_avg + p1_avg * normal_direction[2]
+    f4 = h_v1_lower_avg * normal_direction[1] + h_v2_lower_avg * normal_direction[2]
+    f5 = f4 * v1_lower_avg + p2_avg * normal_direction[1]
+    f6 = f4 * v2_lower_avg + p2_avg * normal_direction[2]
+
+    return SVector(f1, f2, f3, f4, f5, f6, zero(eltype(u_ll)))
 end
 
-
 """
     flux_es_fjordholm_etal(u_ll, u_rr, orientation_or_normal_direction,
                            equations::ShallowWaterTwoLayerEquations1D)
@@ -650,85 +708,87 @@ designation of the upper and lower layer has been changed which leads to a sligh
 formulation.
 """
 @inline function flux_es_fjordholm_etal(u_ll, u_rr,
-                                        orientation_or_normal_direction, 
-                                        equations::ShallowWaterTwoLayerEquations2D)   
-  # Compute entropy conservative flux but without the bottom topography
-  f_ec = flux_fjordholm_etal(u_ll, u_rr,
-                            orientation_or_normal_direction,
-                            equations)
-
-  # Get maximum signal velocity
-  λ = max_abs_speed_naive(u_ll, u_rr, orientation_or_normal_direction, equations)
-
-  # Get entropy variables but without the bottom topography
-  q_rr = cons2entropy(u_rr,equations)
-  q_ll = cons2entropy(u_ll,equations)
-
-  # Average values from left and right 
-  u_avg = (u_ll + u_rr)/2
-
-  # Introduce variables for better readability
-  rho_upper = equations.rho_upper
-  rho_lower = equations.rho_lower
-  g    = equations.gravity
-  drho = rho_upper - rho_lower
-
-  # Entropy Jacobian matrix
-  H = @SMatrix [
-    [-rho_lower/(g*rho_upper*drho);;
-     -rho_lower*u_avg[2]/(g*rho_upper*u_avg[1]*drho);;
-     -rho_lower*u_avg[3]/(g*rho_upper*u_avg[1]*drho);;
-     1.0/(g*drho);;
-     u_avg[5]/(g*u_avg[4]*drho);;
-     u_avg[6]/(g*u_avg[4]*drho);;
-     0];
-    [-rho_lower*u_avg[2]/(g*rho_upper*u_avg[1]*drho);;
-     (g*rho_upper*u_avg[1]^3 - g*rho_lower*u_avg[1]^3 +
-         -rho_lower*u_avg[2]^2)/(g*rho_upper*u_avg[1]^2*drho);;
-     -rho_lower*u_avg[2]*u_avg[3]/(g*rho_upper*u_avg[1]^2*drho);;
-     u_avg[2]/(g*u_avg[1]*drho);;
-     u_avg[2]*u_avg[5]/(g*u_avg[1]*u_avg[4]*drho);;
-     u_avg[2]*u_avg[6]/(g*u_avg[1]*u_avg[4]*drho);;
-     0];
-    [-rho_lower*u_avg[3]/(g*rho_upper*u_avg[1]*drho);;
-     -rho_lower*u_avg[2]*u_avg[3]/(g*rho_upper*u_avg[1]^2*drho);;
-     (g*rho_upper*u_avg[1]^3 - g*rho_lower*u_avg[1]^3 +
-         -rho_lower*u_avg[3]^2)/(g*rho_upper*u_avg[1]^2*drho);;
-     u_avg[3]/(g*u_avg[1]*drho);;
-     u_avg[3]*u_avg[5]/(g*u_avg[1]*u_avg[4]*drho);;
-     u_avg[3]*u_avg[6]/(g*u_avg[1]*u_avg[4]*drho);;
-     0];
-    [1.0/(g*drho);;
-     u_avg[2]/(g*u_avg[1]*drho);;
-     u_avg[3]/(g*u_avg[1]*drho);;
-     -1.0/(g*drho);;
-     -u_avg[5]/(g*u_avg[4]*drho);;
-     -u_avg[6]/(g*u_avg[4]*drho);;
-     0];
-    [u_avg[5]/(g*u_avg[4]*drho);;
-     u_avg[2]*u_avg[5]/(g*u_avg[1]*u_avg[4]*drho);;
-     u_avg[3]*u_avg[5]/(g*u_avg[1]*u_avg[4]*drho);;
-     -u_avg[5]/(g*u_avg[4]*drho);;
-     (g*rho_upper*u_avg[4]^3 - g*rho_lower*u_avg[4]^3 +
-         -rho_lower*u_avg[5]^2)/(g*rho_lower*u_avg[4]^2*drho);;
-     -u_avg[5]*u_avg[6]/(g*u_avg[4]^2*drho);;
-     0];
-    [u_avg[6]/(g*u_avg[4]*drho);;
-     u_avg[2]*u_avg[6]/(g*u_avg[1]*u_avg[4]*drho);;
-     u_avg[3]*u_avg[6]/(g*u_avg[1]*u_avg[4]*drho);;
-     -u_avg[6]/(g*u_avg[4]*drho);;
-     -u_avg[5]*u_avg[6]/(g*u_avg[4]^2*drho);;
-     (g*rho_upper*u_avg[4]^3 - g*rho_lower*u_avg[4]^3 +
-     -rho_lower*u_avg[6]^2)/(g*rho_lower*u_avg[4]^2*drho);;0];
-    [0;;0;;0;;0;;0;;0;;0]]
-
-  # Add dissipation to entropy conservative flux to obtain entropy stable flux
-  f_es = f_ec - 0.5 * λ * H * (q_rr - q_ll)
-  
-  return SVector(f_es[1], f_es[2], f_es[3], f_es[4], f_es[5], f_es[6], zero(eltype(u_ll)))
+                                        orientation_or_normal_direction,
+                                        equations::ShallowWaterTwoLayerEquations2D)
+    # Compute entropy conservative flux but without the bottom topography
+    f_ec = flux_fjordholm_etal(u_ll, u_rr,
+                               orientation_or_normal_direction,
+                               equations)
+
+    # Get maximum signal velocity
+    λ = max_abs_speed_naive(u_ll, u_rr, orientation_or_normal_direction, equations)
+
+    # Get entropy variables but without the bottom topography
+    q_rr = cons2entropy(u_rr, equations)
+    q_ll = cons2entropy(u_ll, equations)
+
+    # Average values from left and right 
+    u_avg = (u_ll + u_rr) / 2
+
+    # Introduce variables for better readability
+    rho_upper = equations.rho_upper
+    rho_lower = equations.rho_lower
+    g = equations.gravity
+    drho = rho_upper - rho_lower
+
+    # Compute entropy Jacobian coefficients
+    h11 = -rho_lower / (g * rho_upper * drho)
+    h12 = -rho_lower * u_avg[2] / (g * rho_upper * u_avg[1] * drho)
+    h13 = -rho_lower * u_avg[3] / (g * rho_upper * u_avg[1] * drho)
+    h14 = 1.0 / (g * drho)
+    h15 = u_avg[5] / (g * u_avg[4] * drho)
+    h16 = u_avg[6] / (g * u_avg[4] * drho)
+    h21 = -rho_lower * u_avg[2] / (g * rho_upper * u_avg[1] * drho)
+    h22 = ((g * rho_upper * u_avg[1]^3 - g * rho_lower * u_avg[1]^3 +
+            -rho_lower * u_avg[2]^2) / (g * rho_upper * u_avg[1]^2 * drho))
+    h23 = -rho_lower * u_avg[2] * u_avg[3] / (g * rho_upper * u_avg[1]^2 * drho)
+    h24 = u_avg[2] / (g * u_avg[1] * drho)
+    h25 = u_avg[2] * u_avg[5] / (g * u_avg[1] * u_avg[4] * drho)
+    h26 = u_avg[2] * u_avg[6] / (g * u_avg[1] * u_avg[4] * drho)
+    h31 = -rho_lower * u_avg[3] / (g * rho_upper * u_avg[1] * drho)
+    h32 = -rho_lower * u_avg[2] * u_avg[3] / (g * rho_upper * u_avg[1]^2 * drho)
+    h33 = ((g * rho_upper * u_avg[1]^3 - g * rho_lower * u_avg[1]^3 +
+            -rho_lower * u_avg[3]^2) / (g * rho_upper * u_avg[1]^2 * drho))
+    h34 = u_avg[3] / (g * u_avg[1] * drho)
+    h35 = u_avg[3] * u_avg[5] / (g * u_avg[1] * u_avg[4] * drho)
+    h36 = u_avg[3] * u_avg[6] / (g * u_avg[1] * u_avg[4] * drho)
+    h41 = 1.0 / (g * drho)
+    h42 = u_avg[2] / (g * u_avg[1] * drho)
+    h43 = u_avg[3] / (g * u_avg[1] * drho)
+    h44 = -1.0 / (g * drho)
+    h45 = -u_avg[5] / (g * u_avg[4] * drho)
+    h46 = -u_avg[6] / (g * u_avg[4] * drho)
+    h51 = u_avg[5] / (g * u_avg[4] * drho)
+    h52 = u_avg[2] * u_avg[5] / (g * u_avg[1] * u_avg[4] * drho)
+    h53 = u_avg[3] * u_avg[5] / (g * u_avg[1] * u_avg[4] * drho)
+    h54 = -u_avg[5] / (g * u_avg[4] * drho)
+    h55 = ((g * rho_upper * u_avg[4]^3 - g * rho_lower * u_avg[4]^3 +
+            -rho_lower * u_avg[5]^2) / (g * rho_lower * u_avg[4]^2 * drho))
+    h56 = -u_avg[5] * u_avg[6] / (g * u_avg[4]^2 * drho)
+    h61 = u_avg[6] / (g * u_avg[4] * drho)
+    h62 = u_avg[2] * u_avg[6] / (g * u_avg[1] * u_avg[4] * drho)
+    h63 = u_avg[3] * u_avg[6] / (g * u_avg[1] * u_avg[4] * drho)
+    h64 = -u_avg[6] / (g * u_avg[4] * drho)
+    h65 = -u_avg[5] * u_avg[6] / (g * u_avg[4]^2 * drho)
+    h66 = ((g * rho_upper * u_avg[4]^3 - g * rho_lower * u_avg[4]^3 +
+            -rho_lower * u_avg[6]^2) / (g * rho_lower * u_avg[4]^2 * drho))
+
+    # Entropy Jacobian matrix
+    H = @SMatrix [[h11;; h12;; h13;; h14;; h15;; h16;; 0];
+                  [h21;; h22;; h23;; h24;; h25;; h26;; 0];
+                  [h31;; h32;; h33;; h34;; h35;; h36;; 0];
+                  [h41;; h42;; h43;; h44;; h45;; h46;; 0];
+                  [h51;; h52;; h53;; h54;; h55;; h56;; 0];
+                  [h61;; h62;; h63;; h64;; h65;; h66;; 0];
+                  [0;; 0;; 0;; 0;; 0;; 0;; 0]]
+
+    # Add dissipation to entropy conservative flux to obtain entropy stable flux
+    f_es = f_ec - 0.5 * λ * H * (q_rr - q_ll)
+
+    return SVector(f_es[1], f_es[2], f_es[3], f_es[4], f_es[5], f_es[6],
+                   zero(eltype(u_ll)))
 end
 
-
 # Calculate approximation for maximum wave speed for local Lax-Friedrichs-type dissipation as the
 # maximum velocity magnitude plus the maximum speed of sound. This function uses approximate 
 # eigenvalues using the speed of the barotropic mode as there is no simple way to calculate them 
@@ -738,199 +798,198 @@ end
 # -  Jonas Nycander, Andrew McC. Hogg, Leela M. Frankcombe (2008)
 #    Open boundary conditions for nonlinear channel Flows
 #    [DOI: 10.1016/j.ocemod.2008.06.003](https://doi.org/10.1016/j.ocemod.2008.06.003)
-@inline function max_abs_speed_naive(u_ll, u_rr, 
-                                     orientation::Integer, 
+@inline function max_abs_speed_naive(u_ll, u_rr,
+                                     orientation::Integer,
                                      equations::ShallowWaterTwoLayerEquations2D)
-  # Unpack left and right state
-  h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, _ = u_ll
-  h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, _ = u_rr
-
-  # Calculate averaged velocity of both layers
-  if orientation == 1
-    v_m_ll = (h_v1_upper_ll + h_v1_lower_ll) / (h_upper_ll + h_lower_ll)
-    v_m_rr = (h_v1_upper_rr + h_v1_lower_rr) / (h_upper_rr + h_lower_rr)
-  else
-    v_m_ll = (h_v2_upper_ll + h_v2_lower_ll) / (h_upper_ll + h_lower_ll)
-    v_m_rr = (h_v2_upper_rr + h_v2_lower_rr) / (h_upper_rr + h_lower_rr)
-  end
-
-  # Calculate the wave celerity on the left and right
-  h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
-  h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
-
-  c_ll = sqrt(equations.gravity * (h_upper_ll + h_lower_ll) )
-  c_rr = sqrt(equations.gravity * (h_upper_rr + h_lower_rr))
-
-  return (max(abs(v_m_ll),abs(v_m_rr)) + max(c_ll,c_rr))
+    # Unpack left and right state
+    h_upper_ll, h_v1_upper_ll, h_v2_upper_ll, h_lower_ll, h_v1_lower_ll, h_v2_lower_ll, _ = u_ll
+    h_upper_rr, h_v1_upper_rr, h_v2_upper_rr, h_lower_rr, h_v1_lower_rr, h_v2_lower_rr, _ = u_rr
+
+    # Calculate averaged velocity of both layers
+    if orientation == 1
+        v_m_ll = (h_v1_upper_ll + h_v1_lower_ll) / (h_upper_ll + h_lower_ll)
+        v_m_rr = (h_v1_upper_rr + h_v1_lower_rr) / (h_upper_rr + h_lower_rr)
+    else
+        v_m_ll = (h_v2_upper_ll + h_v2_lower_ll) / (h_upper_ll + h_lower_ll)
+        v_m_rr = (h_v2_upper_rr + h_v2_lower_rr) / (h_upper_rr + h_lower_rr)
+    end
+
+    # Calculate the wave celerity on the left and right
+    h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
+    h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
+
+    c_ll = sqrt(equations.gravity * (h_upper_ll + h_lower_ll))
+    c_rr = sqrt(equations.gravity * (h_upper_rr + h_lower_rr))
+
+    return (max(abs(v_m_ll), abs(v_m_rr)) + max(c_ll, c_rr))
 end
 
-
-@inline function max_abs_speed_naive(u_ll, u_rr, 
+@inline function max_abs_speed_naive(u_ll, u_rr,
                                      normal_direction::AbstractVector,
                                      equations::ShallowWaterTwoLayerEquations2D)
-  # Unpack left and right state
-  h_upper_ll, _, _, h_lower_ll, _, _, _ = u_ll
-  h_upper_rr, _, _, h_lower_rr, _, _, _ = u_rr
-
-  # Extract and compute the velocities in the normal direction
-  v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations)
-  v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations)
-
-  v_upper_dot_n_ll = v1_upper_ll * normal_direction[1] + v2_upper_ll * normal_direction[2]
-  v_upper_dot_n_rr = v1_upper_rr * normal_direction[1] + v2_upper_rr * normal_direction[2]
-  v_lower_dot_n_ll = v1_lower_ll * normal_direction[1] + v2_lower_ll * normal_direction[2]
-  v_lower_dot_n_rr = v1_lower_rr * normal_direction[1] + v2_lower_rr * normal_direction[2]
-  
-  # Calculate averaged velocity of both layers
-  v_m_ll = (v_upper_dot_n_ll * h_upper_ll + v_lower_dot_n_ll * h_lower_ll) / (h_upper_ll + h_lower_ll)
-  v_m_rr = (v_upper_dot_n_rr * h_upper_rr + v_lower_dot_n_rr * h_lower_rr) / (h_upper_rr + h_lower_rr)
-
-  # Compute the wave celerity on the left and right
-  h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
-  h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
-
-  c_ll = sqrt(equations.gravity * (h_upper_ll + h_lower_ll))
-  c_rr = sqrt(equations.gravity * (h_upper_rr + h_lower_rr))
-
-  # The normal velocities are already scaled by the norm
-  return max(abs(v_m_ll), abs(v_m_rr)) + max(c_ll, c_rr) * norm(normal_direction)
+    # Unpack left and right state
+    h_upper_ll, _, _, h_lower_ll, _, _, _ = u_ll
+    h_upper_rr, _, _, h_lower_rr, _, _, _ = u_rr
+
+    # Extract and compute the velocities in the normal direction
+    v1_upper_ll, v2_upper_ll, v1_lower_ll, v2_lower_ll = velocity(u_ll, equations)
+    v1_upper_rr, v2_upper_rr, v1_lower_rr, v2_lower_rr = velocity(u_rr, equations)
+
+    v_upper_dot_n_ll = v1_upper_ll * normal_direction[1] +
+                       v2_upper_ll * normal_direction[2]
+    v_upper_dot_n_rr = v1_upper_rr * normal_direction[1] +
+                       v2_upper_rr * normal_direction[2]
+    v_lower_dot_n_ll = v1_lower_ll * normal_direction[1] +
+                       v2_lower_ll * normal_direction[2]
+    v_lower_dot_n_rr = v1_lower_rr * normal_direction[1] +
+                       v2_lower_rr * normal_direction[2]
+
+    # Calculate averaged velocity of both layers
+    v_m_ll = (v_upper_dot_n_ll * h_upper_ll + v_lower_dot_n_ll * h_lower_ll) /
+             (h_upper_ll + h_lower_ll)
+    v_m_rr = (v_upper_dot_n_rr * h_upper_rr + v_lower_dot_n_rr * h_lower_rr) /
+             (h_upper_rr + h_lower_rr)
+
+    # Compute the wave celerity on the left and right
+    h_upper_ll, h_lower_ll = waterheight(u_ll, equations)
+    h_upper_rr, h_lower_rr = waterheight(u_rr, equations)
+
+    c_ll = sqrt(equations.gravity * (h_upper_ll + h_lower_ll))
+    c_rr = sqrt(equations.gravity * (h_upper_rr + h_lower_rr))
+
+    # The normal velocities are already scaled by the norm
+    return max(abs(v_m_ll), abs(v_m_rr)) + max(c_ll, c_rr) * norm(normal_direction)
 end
 
-
 # Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the bottom topography
-@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr, 
-    orientation_or_normal_direction, equations::ShallowWaterTwoLayerEquations2D)
-  λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction, equations)
-  diss = -0.5 * λ * (u_rr - u_ll)
-  return SVector(diss[1], diss[2], diss[3], diss[4], diss[5], diss[6], zero(eltype(u_ll)))
+@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr,
+                                                              orientation_or_normal_direction,
+                                                              equations::ShallowWaterTwoLayerEquations2D)
+    λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction,
+                                  equations)
+    diss = -0.5 * λ * (u_rr - u_ll)
+    return SVector(diss[1], diss[2], diss[3], diss[4], diss[5], diss[6],
+                   zero(eltype(u_ll)))
 end
 
-
 # Absolute speed of the barotropic mode
 @inline function max_abs_speeds(u, equations::ShallowWaterTwoLayerEquations2D)
-  h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, _ = u
+    h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, _ = u
 
-  # Calculate averaged velocity of both layers
-  v1_m = (h_v1_upper + h_v1_lower) / (h_upper + h_lower)
-  v2_m = (h_v2_upper + h_v2_lower) / (h_upper + h_lower)
+    # Calculate averaged velocity of both layers
+    v1_m = (h_v1_upper + h_v1_lower) / (h_upper + h_lower)
+    v2_m = (h_v2_upper + h_v2_lower) / (h_upper + h_lower)
 
-  h_upper, h_lower = waterheight(u, equations)
-  v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations)
+    h_upper, h_lower = waterheight(u, equations)
+    v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations)
 
-  c = sqrt(equations.gravity * (h_upper + h_lower)) 
-  return (max(abs(v1_m) + c, abs(v1_upper), abs(v1_lower)), 
-          max(abs(v2_m) + c, abs(v2_upper), abs(v2_lower)))
+    c = sqrt(equations.gravity * (h_upper + h_lower))
+    return (max(abs(v1_m) + c, abs(v1_upper), abs(v1_lower)),
+            max(abs(v2_m) + c, abs(v2_upper), abs(v2_lower)))
 end
 
-
 # Helper function to extract the velocity vector from the conservative variables
 @inline function velocity(u, equations::ShallowWaterTwoLayerEquations2D)
-  h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, _ = u
+    h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, _ = u
 
-  v1_upper = h_v1_upper / h_upper
-  v2_upper = h_v2_upper / h_upper
-  v1_lower = h_v1_lower / h_lower
-  v2_lower = h_v2_lower / h_lower
+    v1_upper = h_v1_upper / h_upper
+    v2_upper = h_v2_upper / h_upper
+    v1_lower = h_v1_lower / h_lower
+    v2_lower = h_v2_lower / h_lower
 
-  return SVector(v1_upper, v2_upper, v1_lower, v2_lower)
+    return SVector(v1_upper, v2_upper, v1_lower, v2_lower)
 end
 
-
 # Convert conservative variables to primitive
 @inline function cons2prim(u, equations::ShallowWaterTwoLayerEquations2D)
-  h_upper, _, _, h_lower, _, _, b = u
+    h_upper, _, _, h_lower, _, _, b = u
 
-  H_lower = h_lower + b
-  H_upper = h_lower + h_upper + b
-  v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations)
+    H_lower = h_lower + b
+    H_upper = h_lower + h_upper + b
+    v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations)
 
-  return SVector(H_upper, v1_upper, v2_upper , H_lower, v1_lower, v2_lower, b)
+    return SVector(H_upper, v1_upper, v2_upper, H_lower, v1_lower, v2_lower, b)
 end
 
-
 # Convert conservative variables to entropy variables
 # Note, only the first four are the entropy variables, the fifth entry still just carries the bottom
 # topography values for convenience. 
 # In contrast to general usage the entropy variables are denoted with q instead of w, because w is
 # already used for velocity in y-Direction
 @inline function cons2entropy(u, equations::ShallowWaterTwoLayerEquations2D)
-  h_upper, _, _, h_lower, _, _, b = u
-  # Assign new variables for better readability
-  rho_upper = equations.rho_upper
-  rho_lower = equations.rho_lower
-  v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations)
-
-  w1 = rho_upper * (equations.gravity * (              h_upper + h_lower + b) +
-               - 0.5 * (v1_upper^2 + v2_upper^2))
-  w2 = rho_upper * v1_upper
-  w3 = rho_upper * v2_upper
-  w4 = rho_lower * (equations.gravity * (equations.r * h_upper + h_lower + b) +
-               - 0.5 * (v1_lower^2 + v2_lower^2))
-  w5 = rho_lower * v1_lower
-  w6 = rho_lower * v2_lower
-  return SVector(w1, w2, w3, w4, w5, w6, b)
+    h_upper, _, _, h_lower, _, _, b = u
+    # Assign new variables for better readability
+    rho_upper = equations.rho_upper
+    rho_lower = equations.rho_lower
+    v1_upper, v2_upper, v1_lower, v2_lower = velocity(u, equations)
+
+    w1 = rho_upper * (equations.gravity * (h_upper + h_lower + b) +
+          -0.5 * (v1_upper^2 + v2_upper^2))
+    w2 = rho_upper * v1_upper
+    w3 = rho_upper * v2_upper
+    w4 = rho_lower * (equations.gravity * (equations.r * h_upper + h_lower + b) +
+          -0.5 * (v1_lower^2 + v2_lower^2))
+    w5 = rho_lower * v1_lower
+    w6 = rho_lower * v2_lower
+    return SVector(w1, w2, w3, w4, w5, w6, b)
 end
 
-
 # Convert primitive to conservative variables
 @inline function prim2cons(prim, equations::ShallowWaterTwoLayerEquations2D)
-  H_upper, v1_upper, v2_upper, H_lower, v1_lower, v2_lower, b = prim
-
-  h_lower = H_lower - b
-  h_upper = H_upper - h_lower - b
-  h_v1_upper = h_upper * v1_upper
-  h_v2_upper = h_upper * v2_upper
-  h_v1_lower = h_lower * v1_lower
-  h_v2_lower = h_lower * v2_lower
-  return SVector(h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, b)
+    H_upper, v1_upper, v2_upper, H_lower, v1_lower, v2_lower, b = prim
+
+    h_lower = H_lower - b
+    h_upper = H_upper - h_lower - b
+    h_v1_upper = h_upper * v1_upper
+    h_v2_upper = h_upper * v2_upper
+    h_v1_lower = h_lower * v1_lower
+    h_v2_lower = h_lower * v2_lower
+    return SVector(h_upper, h_v1_upper, h_v2_upper, h_lower, h_v1_lower, h_v2_lower, b)
 end
 
-
 @inline function waterheight(u, equations::ShallowWaterTwoLayerEquations2D)
-  return SVector(u[1], u[4])
+    return SVector(u[1], u[4])
 end
 
-
 # Entropy function for the shallow water equations is the total energy
-@inline entropy(cons, equations::ShallowWaterTwoLayerEquations2D) = energy_total(cons, equations)
-
+@inline function entropy(cons, equations::ShallowWaterTwoLayerEquations2D)
+    energy_total(cons, equations)
+end
 
 # Calculate total energy for a conservative state `cons`
 @inline function energy_total(cons, equations::ShallowWaterTwoLayerEquations2D)
-  h_upper, h_v1_upper, h_v2_upper, h_lower, h_v2_lower, h_v2_lower, b = cons
-  g = equations.gravity
-  rho_upper= equations.rho_upper
-  rho_lower= equations.rho_lower
-
-  e = (0.5 * rho_upper * (h_v1_upper^2 / h_upper + h_v2_upper^2 / h_upper + g * h_upper^2) +
-       0.5 * rho_lower * (h_v2_lower^2 / h_lower + h_v2_lower^2 / h_lower + g * h_lower^2) + 
-       g*rho_lower*h_lower*b + g*rho_upper*h_upper*(h_lower + b))
-  return e
+    h_upper, h_v1_upper, h_v2_upper, h_lower, h_v2_lower, h_v2_lower, b = cons
+    g = equations.gravity
+    rho_upper = equations.rho_upper
+    rho_lower = equations.rho_lower
+
+    e = (0.5 * rho_upper *
+         (h_v1_upper^2 / h_upper + h_v2_upper^2 / h_upper + g * h_upper^2) +
+         0.5 * rho_lower *
+         (h_v2_lower^2 / h_lower + h_v2_lower^2 / h_lower + g * h_lower^2) +
+         g * rho_lower * h_lower * b + g * rho_upper * h_upper * (h_lower + b))
+    return e
 end
 
-
 # Calculate kinetic energy for a conservative state `cons`
 @inline function energy_kinetic(u, equations::ShallowWaterTwoLayerEquations2D)
-  h_upper, h_v1_upper, h_v2_upper, h_lower, h_v2_lower, h_v2_lower, _ = u
+    h_upper, h_v1_upper, h_v2_upper, h_lower, h_v2_lower, h_v2_lower, _ = u
 
-  return (0.5 * equations.rho_upper * h_v1_upper^2 / h_upper +
-          0.5 * equations.rho_upper * h_v2_upper^2 / h_upper +
-          0.5 * equations.rho_lower * h_v2_lower^2 / h_lower +
-          0.5 * equations.rho_lower * h_v2_lower^2 / h_lower)
+    return (0.5 * equations.rho_upper * h_v1_upper^2 / h_upper +
+            0.5 * equations.rho_upper * h_v2_upper^2 / h_upper +
+            0.5 * equations.rho_lower * h_v2_lower^2 / h_lower +
+            0.5 * equations.rho_lower * h_v2_lower^2 / h_lower)
 end
 
-
 # Calculate potential energy for a conservative state `cons`
 @inline function energy_internal(cons, equations::ShallowWaterTwoLayerEquations2D)
-  return energy_total(cons, equations) - energy_kinetic(cons, equations)
+    return energy_total(cons, equations) - energy_kinetic(cons, equations)
 end
 
-
 # Calculate the error for the "lake-at-rest" test case where H = h_upper+h_lower+b should
 # be a constant value over time
 @inline function lake_at_rest_error(u, equations::ShallowWaterTwoLayerEquations2D)
-  h_upper, _, _, h_lower, _, _, b = u
-  return abs(equations.H0 - (h_upper + h_lower + b))
+    h_upper, _, _, h_lower, _, _, b = u
+    return abs(equations.H0 - (h_upper + h_lower + b))
 end
-
 end # @muladd
diff --git a/src/meshes/abstract_tree.jl b/src/meshes/abstract_tree.jl
index 7d075d66c82..469189ff50c 100644
--- a/src/meshes/abstract_tree.jl
+++ b/src/meshes/abstract_tree.jl
@@ -3,20 +3,21 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 abstract type AbstractTree{NDIMS} <: AbstractContainer end
 
 # Type traits to obtain dimension
-@inline Base.ndims(::AbstractTree{NDIMS}) where NDIMS = NDIMS
-
+@inline Base.ndims(::AbstractTree{NDIMS}) where {NDIMS} = NDIMS
 
 # Auxiliary methods to allow semantic queries on the tree
 # Check whether cell has parent cell
 has_parent(t::AbstractTree, cell_id::Int) = t.parent_ids[cell_id] > 0
 
 # Count number of children for a given cell
-n_children(t::AbstractTree, cell_id::Int) = count(x -> (x > 0), @view t.child_ids[:, cell_id])
+function n_children(t::AbstractTree, cell_id::Int)
+    count(x -> (x > 0), @view t.child_ids[:, cell_id])
+end
 
 # Check whether cell has any child cell
 has_children(t::AbstractTree, cell_id::Int) = n_children(t, cell_id) > 0
@@ -28,16 +29,19 @@ is_leaf(t::AbstractTree, cell_id::Int) = !has_children(t, cell_id)
 has_child(t::AbstractTree, cell_id::Int, child::Int) = t.child_ids[child, cell_id] > 0
 
 # Check if cell has a neighbor at the same refinement level in the given direction
-has_neighbor(t::AbstractTree, cell_id::Int, direction::Int) = t.neighbor_ids[direction, cell_id] > 0
+function has_neighbor(t::AbstractTree, cell_id::Int, direction::Int)
+    t.neighbor_ids[direction, cell_id] > 0
+end
 
 # Check if cell has a coarse neighbor, i.e., with one refinement level lower
 function has_coarse_neighbor(t::AbstractTree, cell_id::Int, direction::Int)
-  return has_parent(t, cell_id) && has_neighbor(t, t.parent_ids[cell_id], direction)
+    return has_parent(t, cell_id) && has_neighbor(t, t.parent_ids[cell_id], direction)
 end
 
 # Check if cell has any neighbor (same-level or lower-level)
 function has_any_neighbor(t::AbstractTree, cell_id::Int, direction::Int)
-  return has_neighbor(t, cell_id, direction) || has_coarse_neighbor(t, cell_id, direction)
+    return has_neighbor(t, cell_id, direction) ||
+           has_coarse_neighbor(t, cell_id, direction)
 end
 
 # Check if cell is own cell, i.e., belongs to this MPI rank
@@ -59,10 +63,9 @@ maximum_level(t::AbstractTree) = maximum(t.levels[leaf_cells(t)])
 isperiodic(t::AbstractTree) = all(t.periodicity)
 isperiodic(t::AbstractTree, dimension) = t.periodicity[dimension]
 
-
 # Auxiliary methods for often-required calculations
 # Number of potential child cells
-n_children_per_cell(::AbstractTree{NDIMS}) where NDIMS = 2^NDIMS
+n_children_per_cell(::AbstractTree{NDIMS}) where {NDIMS} = 2^NDIMS
 
 # Number of directions
 #
@@ -73,7 +76,7 @@ n_children_per_cell(::AbstractTree{NDIMS}) where NDIMS = 2^NDIMS
 # 4 -> +y
 # 5 -> -z
 # 6 -> +z
-@inline n_directions(::AbstractTree{NDIMS}) where NDIMS = 2 * NDIMS
+@inline n_directions(::AbstractTree{NDIMS}) where {NDIMS} = 2 * NDIMS
 # TODO: Taal performance, 1:n_directions(tree) vs. Base.OneTo(n_directions(tree)) vs. SOneTo(n_directions(tree))
 """
     eachdirection(tree::AbstractTree)
@@ -121,7 +124,6 @@ const _child_signs = [-1 -1 -1;
                       +1 +1 +1]
 child_sign(child::Int, dim::Int) = _child_signs[child, dim]
 
-
 # For each child position (1 to 8) and a given direction (from 1 to 6), return
 # neighboring child position.
 const _adjacent_child_ids = [2 2 3 3 5 5;
@@ -134,310 +136,298 @@ const _adjacent_child_ids = [2 2 3 3 5 5;
                              7 7 6 6 4 4]
 adjacent_child(child::Int, direction::Int) = _adjacent_child_ids[child, direction]
 
-
 # For each child position (1 to 8) and a given direction (from 1 to 6), return
 # if neighbor is a sibling
 function has_sibling(child::Int, direction::Int)
-  return (child_sign(child, div(direction + 1, 2)) * (-1)^(direction - 1)) > 0
+    return (child_sign(child, div(direction + 1, 2)) * (-1)^(direction - 1)) > 0
 end
 
-
 # Obtain leaf cells that fulfill a given criterion.
 #
 # The function `f` is passed the cell id of each leaf cell
 # as an argument.
 function filter_leaf_cells(f, t::AbstractTree)
-  filtered = Vector{Int}(undef, length(t))
-  count = 0
-  for cell_id in 1:length(t)
-    if is_leaf(t, cell_id) && f(cell_id)
-      count += 1
-      filtered[count] = cell_id
+    filtered = Vector{Int}(undef, length(t))
+    count = 0
+    for cell_id in 1:length(t)
+        if is_leaf(t, cell_id) && f(cell_id)
+            count += 1
+            filtered[count] = cell_id
+        end
     end
-  end
 
-  return filtered[1:count]
+    return filtered[1:count]
 end
 
-
 # Return an array with the ids of all leaf cells
-leaf_cells(t::AbstractTree) = filter_leaf_cells((cell_id)->true, t)
-
+leaf_cells(t::AbstractTree) = filter_leaf_cells((cell_id) -> true, t)
 
 # Return an array with the ids of all leaf cells for a given rank
 leaf_cells_by_rank(t::AbstractTree, rank) = leaf_cells(t)
 
-
 # Return an array with the ids of all local leaf cells
 local_leaf_cells(t::AbstractTree) = leaf_cells(t)
 
-
 # Count the number of leaf cells.
 count_leaf_cells(t::AbstractTree) = length(leaf_cells(t))
 
-
 @inline function cell_coordinates(t::AbstractTree{NDIMS}, cell) where {NDIMS}
-  SVector(ntuple(d -> t.coordinates[d, cell], Val(NDIMS)))
+    SVector(ntuple(d -> t.coordinates[d, cell], Val(NDIMS)))
 end
 
-@inline function set_cell_coordinates!(t::AbstractTree{NDIMS}, coords, cell) where {NDIMS}
-  for d in 1:NDIMS
-    t.coordinates[d, cell] = coords[d]
-  end
+@inline function set_cell_coordinates!(t::AbstractTree{NDIMS}, coords,
+                                       cell) where {NDIMS}
+    for d in 1:NDIMS
+        t.coordinates[d, cell] = coords[d]
+    end
 end
 
-
 # Determine if point is located inside cell
 function is_point_in_cell(t::AbstractTree, point_coordinates, cell_id)
-  cell_length = length_at_cell(t, cell_id)
-  cell_coordinates_ = cell_coordinates(t, cell_id)
-  min_coordinates = cell_coordinates_ .- cell_length / 2
-  max_coordinates = cell_coordinates_ .+ cell_length / 2
+    cell_length = length_at_cell(t, cell_id)
+    cell_coordinates_ = cell_coordinates(t, cell_id)
+    min_coordinates = cell_coordinates_ .- cell_length / 2
+    max_coordinates = cell_coordinates_ .+ cell_length / 2
 
-  return all(min_coordinates .<= point_coordinates .<= max_coordinates)
+    return all(min_coordinates .<= point_coordinates .<= max_coordinates)
 end
 
-
 # Store cell id in each cell to use for post-AMR analysis
 function reset_original_cell_ids!(t::AbstractTree)
-  t.original_cell_ids[1:length(t)] .= 1:length(t)
+    t.original_cell_ids[1:length(t)] .= 1:length(t)
 end
 
-
 # Efficiently perform uniform refinement up to a given level (works only on mesh with a single cell)
 function refine_uniformly!(t::AbstractTree, max_level)
-  @assert length(t) == 1 "efficient uniform refinement only works for a newly created tree"
-  @assert max_level >= 0 "the uniform refinement level must be non-zero"
+    @assert length(t)==1 "efficient uniform refinement only works for a newly created tree"
+    @assert max_level>=0 "the uniform refinement level must be non-zero"
 
-  # Calculate size of final tree and resize tree
-  total_length = 1
-  for level in 1:max_level
-    total_length += n_children_per_cell(t)^level
-  end
-  resize!(t, total_length)
+    # Calculate size of final tree and resize tree
+    total_length = 1
+    for level in 1:max_level
+        total_length += n_children_per_cell(t)^level
+    end
+    resize!(t, total_length)
 
-  # Traverse tree to set parent-child relationships
-  init_children!(t, 1, max_level)
+    # Traverse tree to set parent-child relationships
+    init_children!(t, 1, max_level)
 
-  # Set all neighbor relationships
-  init_neighbors!(t, max_level)
+    # Set all neighbor relationships
+    init_neighbors!(t, max_level)
 end
 
-
 # Recursively initialize children up to level `max_level` in depth-first ordering, starting with
 # cell `cell_id` and set all information except neighbor relations (see `init_neighbors!`).
 #
 # Return the number of offspring of the initialized cell plus one
 function init_children!(t::AbstractTree, cell_id, max_level)
-  # Stop recursion if max_level has been reached
-  if t.levels[cell_id] >= max_level
-    return 1
-  else
-    # Initialize each child cell, counting the total number of offspring
-    n_offspring = 0
-    for child in 1:n_children_per_cell(t)
-      # Get cell id of child
-      child_id = cell_id + 1 + n_offspring
-
-      # Initialize child cell (except neighbors)
-      init_child!(t, cell_id, child, child_id)
-
-      # Recursively initialize child cell
-      n_offspring += init_children!(t, child_id, max_level)
-    end
+    # Stop recursion if max_level has been reached
+    if t.levels[cell_id] >= max_level
+        return 1
+    else
+        # Initialize each child cell, counting the total number of offspring
+        n_offspring = 0
+        for child in 1:n_children_per_cell(t)
+            # Get cell id of child
+            child_id = cell_id + 1 + n_offspring
 
-    return n_offspring + 1
-  end
-end
+            # Initialize child cell (except neighbors)
+            init_child!(t, cell_id, child, child_id)
 
+            # Recursively initialize child cell
+            n_offspring += init_children!(t, child_id, max_level)
+        end
+
+        return n_offspring + 1
+    end
+end
 
 # Iteratively set all neighbor relations, starting at an initialized level 0 cell. Assume that
 # parent-child relations have already been initialized (see `init_children!`).
-function init_neighbors!(t::AbstractTree, max_level=maximum_level(t))
-  @assert all(n >= 0 for n in t.neighbor_ids[:, 1]) "level 0 cell neighbors must be initialized"
-
-  # Initialize neighbors level by level
-  for level in 1:max_level
-    # Walk entire tree, starting from level 0 cell
-    for cell_id in 1:length(t)
-      # Skip cells whose immediate children are already initialized *or* whose level is too high for this round
-      if t.levels[cell_id] != level - 1
-        continue
-      end
-
-      # Iterate over children and set neighbor information
-      for child in 1:n_children_per_cell(t)
-        child_id = t.child_ids[child, cell_id]
-        init_child_neighbors!(t, cell_id, child, child_id)
-      end
+function init_neighbors!(t::AbstractTree, max_level = maximum_level(t))
+    @assert all(n >= 0 for n in t.neighbor_ids[:, 1]) "level 0 cell neighbors must be initialized"
+
+    # Initialize neighbors level by level
+    for level in 1:max_level
+        # Walk entire tree, starting from level 0 cell
+        for cell_id in 1:length(t)
+            # Skip cells whose immediate children are already initialized *or* whose level is too high for this round
+            if t.levels[cell_id] != level - 1
+                continue
+            end
+
+            # Iterate over children and set neighbor information
+            for child in 1:n_children_per_cell(t)
+                child_id = t.child_ids[child, cell_id]
+                init_child_neighbors!(t, cell_id, child, child_id)
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Initialize the neighbors of child cell `child_id` based on parent cell `cell_id`
 function init_child_neighbors!(t::AbstractTree, cell_id, child, child_id)
-  t.neighbor_ids[:, child_id] .= zero(eltype(t.neighbor_ids))
-  for direction in eachdirection(t)
-    # If neighbor is a sibling, establish one-sided connectivity
-    # Note: two-sided is not necessary, as each sibling will do this
-    if has_sibling(child, direction)
-      adjacent = adjacent_child(child, direction)
-      neighbor_id = t.child_ids[adjacent, cell_id]
-
-      t.neighbor_ids[direction, child_id] = neighbor_id
-      continue
-    end
+    t.neighbor_ids[:, child_id] .= zero(eltype(t.neighbor_ids))
+    for direction in eachdirection(t)
+        # If neighbor is a sibling, establish one-sided connectivity
+        # Note: two-sided is not necessary, as each sibling will do this
+        if has_sibling(child, direction)
+            adjacent = adjacent_child(child, direction)
+            neighbor_id = t.child_ids[adjacent, cell_id]
 
-    # Skip if original cell does have no neighbor in direction
-    if !has_neighbor(t, cell_id, direction)
-      continue
-    end
+            t.neighbor_ids[direction, child_id] = neighbor_id
+            continue
+        end
 
-    # Otherwise, check if neighbor has children - if not, skip again
-    neighbor_id = t.neighbor_ids[direction, cell_id]
-    if !has_children(t, neighbor_id)
-      continue
-    end
+        # Skip if original cell does have no neighbor in direction
+        if !has_neighbor(t, cell_id, direction)
+            continue
+        end
 
-    # Check if neighbor has corresponding child and if yes, establish connectivity
-    adjacent = adjacent_child(child, direction)
-    if has_child(t, neighbor_id, adjacent)
-      neighbor_child_id = t.child_ids[adjacent, neighbor_id]
-      opposite = opposite_direction(direction)
+        # Otherwise, check if neighbor has children - if not, skip again
+        neighbor_id = t.neighbor_ids[direction, cell_id]
+        if !has_children(t, neighbor_id)
+            continue
+        end
+
+        # Check if neighbor has corresponding child and if yes, establish connectivity
+        adjacent = adjacent_child(child, direction)
+        if has_child(t, neighbor_id, adjacent)
+            neighbor_child_id = t.child_ids[adjacent, neighbor_id]
+            opposite = opposite_direction(direction)
 
-      t.neighbor_ids[direction, child_id] = neighbor_child_id
-      t.neighbor_ids[opposite, neighbor_child_id] = child_id
+            t.neighbor_ids[direction, child_id] = neighbor_child_id
+            t.neighbor_ids[opposite, neighbor_child_id] = child_id
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Refine given cells without rebalancing tree.
 #
 # Note: After a call to this method the tree may be unbalanced!
-function refine_unbalanced!(t::AbstractTree, cell_ids, sorted_unique_cell_ids=sort(unique(cell_ids)))
-  # Store actual ids refined cells (shifted due to previous insertions)
-  refined = zeros(Int, length(cell_ids))
-
-  # Loop over all cells that are to be refined
-  for (count, original_cell_id) in enumerate(sorted_unique_cell_ids)
-    # Determine actual cell id, taking into account previously inserted cells
-    n_children = n_children_per_cell(t)
-    cell_id = original_cell_id + (count - 1) * n_children
-    refined[count] = cell_id
-
-    @assert !has_children(t, cell_id) "Non-leaf cell $cell_id cannot be refined"
-
-    # Insert new cells directly behind parent (depth-first)
-    insert!(t, cell_id + 1, n_children)
-
-    # Flip sign of refined cell such that we can easily find it later
-    t.original_cell_ids[cell_id] = -t.original_cell_ids[cell_id]
-
-    # Initialize child cells (except neighbors)
-    for child in 1:n_children
-      child_id = cell_id + child
-      init_child!(t, cell_id, child, child_id)
-    end
+function refine_unbalanced!(t::AbstractTree, cell_ids,
+                            sorted_unique_cell_ids = sort(unique(cell_ids)))
+    # Store actual ids refined cells (shifted due to previous insertions)
+    refined = zeros(Int, length(cell_ids))
+
+    # Loop over all cells that are to be refined
+    for (count, original_cell_id) in enumerate(sorted_unique_cell_ids)
+        # Determine actual cell id, taking into account previously inserted cells
+        n_children = n_children_per_cell(t)
+        cell_id = original_cell_id + (count - 1) * n_children
+        refined[count] = cell_id
+
+        @assert !has_children(t, cell_id) "Non-leaf cell $cell_id cannot be refined"
+
+        # Insert new cells directly behind parent (depth-first)
+        insert!(t, cell_id + 1, n_children)
+
+        # Flip sign of refined cell such that we can easily find it later
+        t.original_cell_ids[cell_id] = -t.original_cell_ids[cell_id]
+
+        # Initialize child cells (except neighbors)
+        for child in 1:n_children
+            child_id = cell_id + child
+            init_child!(t, cell_id, child, child_id)
+        end
 
-    # Initialize child cells (only neighbors)
-    # This separate loop is required since init_child_neighbors requires initialized parent-child
-    # relationships
-    for child in 1:n_children
-      child_id = cell_id + child
-      init_child_neighbors!(t, cell_id, child, child_id)
+        # Initialize child cells (only neighbors)
+        # This separate loop is required since init_child_neighbors requires initialized parent-child
+        # relationships
+        for child in 1:n_children
+            child_id = cell_id + child
+            init_child_neighbors!(t, cell_id, child, child_id)
+        end
     end
-  end
 
-  return refined
+    return refined
 end
 
-
 # Refine entire tree by one level
 function refine!(t::AbstractTree)
-  cells = @trixi_timeit timer() "collect all leaf cells" leaf_cells(t)
-  @trixi_timeit timer() "refine!" refine!(t, cells, cells)
+    cells = @trixi_timeit timer() "collect all leaf cells" leaf_cells(t)
+    @trixi_timeit timer() "refine!" refine!(t, cells, cells)
 end
 
-
 # Refine given cells and rebalance tree.
 #
 # Note 1: Rebalancing is iterative, i.e., neighboring cells are refined if
 #         otherwise the 2:1 rule would be violated, which can cause more
 #         refinements.
 # Note 2: Rebalancing currently only considers *Cartesian* neighbors, not diagonal neighbors!
-function refine!(t::AbstractTree, cell_ids, sorted_unique_cell_ids=sort(unique(cell_ids)))
-  # Reset original cell ids such that each cell knows its current id
-  reset_original_cell_ids!(t)
-
-  # Refine all requested cells
-  refined = @trixi_timeit timer() "refine_unbalanced!" refine_unbalanced!(t, cell_ids, sorted_unique_cell_ids)
-  refinement_count = length(refined)
-
-  # Iteratively rebalance the tree until it does not change anymore
-  while length(refined) > 0
-    refined = @trixi_timeit timer() "rebalance!" rebalance!(t, refined)
-    refinement_count += length(refined)
-  end
-
-  # Determine list of *original* cell ids that were refined
-  # Note: original_cell_ids contains the cell_id *before* refinement. At
-  # refinement, the refined cell's original_cell_ids value has its sign flipped
-  # to easily find it now.
-  refined_original_cells = @views(
-      -t.original_cell_ids[1:length(t)][t.original_cell_ids[1:length(t)] .< 0])
-
-  # Check if count of refinement cells matches information in original_cell_ids
-  @assert refinement_count == length(refined_original_cells) (
-      "Mismatch in number of refined cells")
-
-  return refined_original_cells
-end
+function refine!(t::AbstractTree, cell_ids,
+                 sorted_unique_cell_ids = sort(unique(cell_ids)))
+    # Reset original cell ids such that each cell knows its current id
+    reset_original_cell_ids!(t)
+
+    # Refine all requested cells
+    refined = @trixi_timeit timer() "refine_unbalanced!" refine_unbalanced!(t, cell_ids,
+                                                                            sorted_unique_cell_ids)
+    refinement_count = length(refined)
+
+    # Iteratively rebalance the tree until it does not change anymore
+    while length(refined) > 0
+        refined = @trixi_timeit timer() "rebalance!" rebalance!(t, refined)
+        refinement_count += length(refined)
+    end
+
+    # Determine list of *original* cell ids that were refined
+    # Note: original_cell_ids contains the cell_id *before* refinement. At
+    # refinement, the refined cell's original_cell_ids value has its sign flipped
+    # to easily find it now.
+    refined_original_cells = @views(-t.original_cell_ids[1:length(t)][t.original_cell_ids[1:length(t)] .< 0])
 
+    # Check if count of refinement cells matches information in original_cell_ids
+    @assert refinement_count==length(refined_original_cells) ("Mismatch in number of refined cells")
+
+    return refined_original_cells
+end
 
 # Refine all leaf cells with coordinates in a given rectangular box
-function refine_box!(t::AbstractTree{NDIMS}, coordinates_min, coordinates_max) where NDIMS
-  for dim in 1:NDIMS
-    @assert coordinates_min[dim] < coordinates_max[dim] "Minimum coordinates are not minimum."
-  end
-
-  # Find all leaf cells within box
-  cells = filter_leaf_cells(t) do cell_id
-    return (all(coordinates_min .< cell_coordinates(t, cell_id)) &&
-            all(coordinates_max .> cell_coordinates(t, cell_id)))
-  end
-
-  # Refine cells
-  refine!(t, cells)
+function refine_box!(t::AbstractTree{NDIMS}, coordinates_min,
+                     coordinates_max) where {NDIMS}
+    for dim in 1:NDIMS
+        @assert coordinates_min[dim]<coordinates_max[dim] "Minimum coordinates are not minimum."
+    end
+
+    # Find all leaf cells within box
+    cells = filter_leaf_cells(t) do cell_id
+        return (all(coordinates_min .< cell_coordinates(t, cell_id)) &&
+                all(coordinates_max .> cell_coordinates(t, cell_id)))
+    end
+
+    # Refine cells
+    refine!(t, cells)
 end
 
 # Convenience method for 1D
 function refine_box!(t::AbstractTree{1}, coordinates_min::Real, coordinates_max::Real)
-  return refine_box!(t, [convert(Float64, coordinates_min)], [convert(Float64, coordinates_max)])
+    return refine_box!(t, [convert(Float64, coordinates_min)],
+                       [convert(Float64, coordinates_max)])
 end
 
-
 # Refine all leaf cells with coordinates in a given sphere
-function refine_sphere!(t::AbstractTree{NDIMS}, center::SVector{NDIMS}, radius) where NDIMS
-  @assert radius >= 0 "Radius must be positive."
+function refine_sphere!(t::AbstractTree{NDIMS}, center::SVector{NDIMS},
+                        radius) where {NDIMS}
+    @assert radius>=0 "Radius must be positive."
 
-  # Find all leaf cells within sphere
-  cells = filter_leaf_cells(t) do cell_id
-    return sum(abs2, cell_coordinates(t, cell_id) - center) < radius^2
-  end
+    # Find all leaf cells within sphere
+    cells = filter_leaf_cells(t) do cell_id
+        return sum(abs2, cell_coordinates(t, cell_id) - center) < radius^2
+    end
 
-  # Refine cells
-  refine!(t, cells)
+    # Refine cells
+    refine!(t, cells)
 end
 
 # Convenience function to allow passing center as a tuple
-function refine_sphere!(t::AbstractTree{NDIMS}, center::NTuple{NDIMS}, radius) where NDIMS
-  refine_sphere!(t, SVector(center), radius)
+function refine_sphere!(t::AbstractTree{NDIMS}, center::NTuple{NDIMS},
+                        radius) where {NDIMS}
+    refine_sphere!(t, SVector(center), radius)
 end
 
 # For the given cell ids, check if neighbors need to be refined to restore a rebalanced tree.
@@ -447,42 +437,41 @@ end
 #         created level differences of at most 2. That is, before the previous
 #         refinement step, the tree was balanced.
 function rebalance!(t::AbstractTree, refined_cell_ids)
-  # Create buffer for newly refined cells
-  to_refine = zeros(Int, n_directions(t) * length(refined_cell_ids))
-  count = 0
-
-  # Iterate over cell ids that have previously been refined
-  for cell_id in refined_cell_ids
-    # Go over all potential neighbors of child cell
-    for direction in eachdirection(t)
-      # Continue if refined cell has a neighbor in that direction
-      if has_neighbor(t, cell_id, direction)
-        continue
-      end
-
-      # Continue if refined cell has no coarse neighbor, since that would
-      # mean it there is no neighbor in that direction at all (domain
-      # boundary)
-      if !has_coarse_neighbor(t, cell_id, direction)
-        continue
-      end
-
-      # Otherwise, the coarse neighbor exists and is not refined, thus it must
-      # be marked for refinement
-      coarse_neighbor_id = t.neighbor_ids[direction, t.parent_ids[cell_id]]
-      count += 1
-      to_refine[count] = coarse_neighbor_id
+    # Create buffer for newly refined cells
+    to_refine = zeros(Int, n_directions(t) * length(refined_cell_ids))
+    count = 0
+
+    # Iterate over cell ids that have previously been refined
+    for cell_id in refined_cell_ids
+        # Go over all potential neighbors of child cell
+        for direction in eachdirection(t)
+            # Continue if refined cell has a neighbor in that direction
+            if has_neighbor(t, cell_id, direction)
+                continue
+            end
+
+            # Continue if refined cell has no coarse neighbor, since that would
+            # mean it there is no neighbor in that direction at all (domain
+            # boundary)
+            if !has_coarse_neighbor(t, cell_id, direction)
+                continue
+            end
+
+            # Otherwise, the coarse neighbor exists and is not refined, thus it must
+            # be marked for refinement
+            coarse_neighbor_id = t.neighbor_ids[direction, t.parent_ids[cell_id]]
+            count += 1
+            to_refine[count] = coarse_neighbor_id
+        end
     end
-  end
 
-  # Finally, refine all marked cells...
-  refined = refine_unbalanced!(t, unique(to_refine[1:count]))
+    # Finally, refine all marked cells...
+    refined = refine_unbalanced!(t, unique(to_refine[1:count]))
 
-  # ...and return list of refined cells
-  return refined
+    # ...and return list of refined cells
+    return refined
 end
 
-
 # Refine given cells without rebalancing tree.
 #
 # Note: After a call to this method the tree may be unbalanced!
@@ -491,19 +480,17 @@ end
 # Wrap single-cell refinements such that `sort(...)` does not complain
 refine_unbalanced!(t::AbstractTree, cell_id::Int) = refine_unbalanced!(t, [cell_id])
 
-
 # Coarsen entire tree by one level
 function coarsen!(t::AbstractTree)
-  # Special case: if there is only one cell (root), there is nothing to do
-  if length(t) == 1
-    return Int[]
-  end
-
-  # Get list of unique parent ids for all leaf cells
-  parent_ids = unique(t.parent_ids[leaf_cells(t)])
-  coarsen!(t, parent_ids)
-end
+    # Special case: if there is only one cell (root), there is nothing to do
+    if length(t) == 1
+        return Int[]
+    end
 
+    # Get list of unique parent ids for all leaf cells
+    parent_ids = unique(t.parent_ids[leaf_cells(t)])
+    coarsen!(t, parent_ids)
+end
 
 # Coarsen given *parent* cells (= these cells must have children who are all
 # leaf cells) while retaining a balanced tree.
@@ -513,165 +500,163 @@ end
 # coarsened without specifically asking for it, these cells will then *not* be
 # coarsened.
 function coarsen!(t::AbstractTree, cell_ids::AbstractArray{Int})
-  # Return early if array is empty
-  if length(cell_ids) == 0
-    return Int[]
-  end
-
-  # Reset original cell ids such that each cell knows its current id
-  reset_original_cell_ids!(t)
-
-  # To maximize the number of cells that may be coarsened, start with the cells at the highest level
-  sorted_by_level = sort(cell_ids, by = i -> t.levels[i])
-
-  # Keep track of number of cells that were actually coarsened
-  n_coarsened = 0
-
-  # Local function to adjust cell ids after some cells have been removed
-  function adjust_cell_ids!(cell_ids, coarsened_cell_id, count)
-    for (id, cell_id) in enumerate(cell_ids)
-      if cell_id > coarsened_cell_id
-        cell_ids[id] = cell_id - count
-      end
+    # Return early if array is empty
+    if length(cell_ids) == 0
+        return Int[]
     end
-  end
 
-  # Iterate backwards over cells to coarsen
-  while true
-    # Retrieve next cell or quit
-    if length(sorted_by_level) > 0
-      coarse_cell_id = pop!(sorted_by_level)
-    else
-      break
-    end
+    # Reset original cell ids such that each cell knows its current id
+    reset_original_cell_ids!(t)
 
-    # Ensure that cell has children (violation is an error)
-    if !has_children(t, coarse_cell_id)
-      error("cell is leaf and cannot be coarsened to: $coarse_cell_id")
-    end
+    # To maximize the number of cells that may be coarsened, start with the cells at the highest level
+    sorted_by_level = sort(cell_ids, by = i -> t.levels[i])
 
-    # Ensure that all child cells are leaf cells (violation is an error)
-    for child in 1:n_children_per_cell(t)
-      if has_child(t, coarse_cell_id, child)
-        if !is_leaf(t, t.child_ids[child, coarse_cell_id])
-          error("cell $coarse_cell_id has child cell at position $child that is not a leaf cell")
+    # Keep track of number of cells that were actually coarsened
+    n_coarsened = 0
+
+    # Local function to adjust cell ids after some cells have been removed
+    function adjust_cell_ids!(cell_ids, coarsened_cell_id, count)
+        for (id, cell_id) in enumerate(cell_ids)
+            if cell_id > coarsened_cell_id
+                cell_ids[id] = cell_id - count
+            end
         end
-      end
     end
 
-    # Check if coarse cell has refined neighbors that would prevent coarsening
-    skip = false
-    # Iterate over all children (which are to be removed)
-    for child in 1:n_children_per_cell(t)
-      # Continue if child does not exist
-      if !has_child(t, coarse_cell_id, child)
-        continue
-      end
-      child_id = t.child_ids[child, coarse_cell_id]
-
-      # Go over all neighbors of child cell. If it has a neighbor that is *not*
-      # a sibling and that is not a leaf cell, we cannot coarsen its parent
-      # without creating an unbalanced tree.
-      for direction in eachdirection(t)
-        # Continue if neighbor would be a sibling
-        if has_sibling(child, direction)
-          continue
+    # Iterate backwards over cells to coarsen
+    while true
+        # Retrieve next cell or quit
+        if length(sorted_by_level) > 0
+            coarse_cell_id = pop!(sorted_by_level)
+        else
+            break
         end
 
-        # Continue if child cell has no neighbor in that direction
-        if !has_neighbor(t, child_id, direction)
-          continue
+        # Ensure that cell has children (violation is an error)
+        if !has_children(t, coarse_cell_id)
+            error("cell is leaf and cannot be coarsened to: $coarse_cell_id")
         end
-        neighbor_id = t.neighbor_ids[direction, child_id]
 
-        if !has_children(t, neighbor_id)
-          continue
+        # Ensure that all child cells are leaf cells (violation is an error)
+        for child in 1:n_children_per_cell(t)
+            if has_child(t, coarse_cell_id, child)
+                if !is_leaf(t, t.child_ids[child, coarse_cell_id])
+                    error("cell $coarse_cell_id has child cell at position $child that is not a leaf cell")
+                end
+            end
         end
 
-        # If neighbor is not a sibling, is existing, and has children, do not coarsen
-        skip = true
-        break
-      end
-    end
-    # Skip if a neighboring cell prevents coarsening
-    if skip
-      continue
-    end
+        # Check if coarse cell has refined neighbors that would prevent coarsening
+        skip = false
+        # Iterate over all children (which are to be removed)
+        for child in 1:n_children_per_cell(t)
+            # Continue if child does not exist
+            if !has_child(t, coarse_cell_id, child)
+                continue
+            end
+            child_id = t.child_ids[child, coarse_cell_id]
+
+            # Go over all neighbors of child cell. If it has a neighbor that is *not*
+            # a sibling and that is not a leaf cell, we cannot coarsen its parent
+            # without creating an unbalanced tree.
+            for direction in eachdirection(t)
+                # Continue if neighbor would be a sibling
+                if has_sibling(child, direction)
+                    continue
+                end
+
+                # Continue if child cell has no neighbor in that direction
+                if !has_neighbor(t, child_id, direction)
+                    continue
+                end
+                neighbor_id = t.neighbor_ids[direction, child_id]
+
+                if !has_children(t, neighbor_id)
+                    continue
+                end
+
+                # If neighbor is not a sibling, is existing, and has children, do not coarsen
+                skip = true
+                break
+            end
+        end
+        # Skip if a neighboring cell prevents coarsening
+        if skip
+            continue
+        end
 
-    # Flip sign of cell to be coarsened to such that we can easily find it
-    t.original_cell_ids[coarse_cell_id] = -t.original_cell_ids[coarse_cell_id]
+        # Flip sign of cell to be coarsened to such that we can easily find it
+        t.original_cell_ids[coarse_cell_id] = -t.original_cell_ids[coarse_cell_id]
 
-    # If a coarse cell has children that are all leaf cells, they must follow
-    # immediately due to depth-first ordering of the tree
-    count = n_children(t, coarse_cell_id)
-    @assert count == n_children_per_cell(t) "cell $coarse_cell_id does not have all child cells"
-    remove_shift!(t, coarse_cell_id + 1, coarse_cell_id + count)
+        # If a coarse cell has children that are all leaf cells, they must follow
+        # immediately due to depth-first ordering of the tree
+        count = n_children(t, coarse_cell_id)
+        @assert count==n_children_per_cell(t) "cell $coarse_cell_id does not have all child cells"
+        remove_shift!(t, coarse_cell_id + 1, coarse_cell_id + count)
 
-    # Take into account shifts in tree that alters cell ids
-    adjust_cell_ids!(sorted_by_level, coarse_cell_id, count)
+        # Take into account shifts in tree that alters cell ids
+        adjust_cell_ids!(sorted_by_level, coarse_cell_id, count)
 
-    # Keep track of number of coarsened cells
-    n_coarsened += 1
-  end
+        # Keep track of number of coarsened cells
+        n_coarsened += 1
+    end
 
-  # Determine list of *original* cell ids that were coarsened to
-  # Note: original_cell_ids contains the cell_id *before* coarsening. At
-  # coarsening, the coarsened parent cell's original_cell_ids value has its sign flipped
-  # to easily find it now.
-  @views coarsened_original_cells = (
-      -t.original_cell_ids[1:length(t)][t.original_cell_ids[1:length(t)] .< 0])
+    # Determine list of *original* cell ids that were coarsened to
+    # Note: original_cell_ids contains the cell_id *before* coarsening. At
+    # coarsening, the coarsened parent cell's original_cell_ids value has its sign flipped
+    # to easily find it now.
+    @views coarsened_original_cells = (-t.original_cell_ids[1:length(t)][t.original_cell_ids[1:length(t)] .< 0])
 
-  # Check if count of coarsened cells matches information in original_cell_ids
-  @assert n_coarsened == length(coarsened_original_cells) (
-      "Mismatch in number of coarsened cells")
+    # Check if count of coarsened cells matches information in original_cell_ids
+    @assert n_coarsened==length(coarsened_original_cells) ("Mismatch in number of coarsened cells")
 
-  return coarsened_original_cells
+    return coarsened_original_cells
 end
 
 # Wrap single-cell coarsening such that `sort(...)` does not complain
 coarsen!(t::AbstractTree, cell_id::Int) = coarsen!(t::AbstractTree, [cell_id])
 
-
 # Coarsen all viable parent cells with coordinates in a given rectangular box
 function coarsen_box!(t::AbstractTree{NDIMS}, coordinates_min::AbstractArray{Float64},
-                     coordinates_max::AbstractArray{Float64}) where NDIMS
-  for dim in 1:NDIMS
-    @assert coordinates_min[dim] < coordinates_max[dim] "Minimum coordinates are not minimum."
-  end
-
-  # Find all leaf cells within box
-  leaves = filter_leaf_cells(t) do cell_id
-    return (all(coordinates_min .< cell_coordinates(t, cell_id)) &&
-            all(coordinates_max .> cell_coordinates(t, cell_id)))
-  end
-
-  # Get list of unique parent ids for all leaf cells
-  parent_ids = unique(t.parent_ids[leaves])
-
-  # Filter parent ids to be within box
-  parents = filter(parent_ids) do cell_id
-    return (all(coordinates_min .< cell_coordinates(t, cell_id)) &&
-            all(coordinates_max .> cell_coordinates(t, cell_id)))
-  end
-
-  # Coarsen cells
-  coarsen!(t, parents)
+                      coordinates_max::AbstractArray{Float64}) where {NDIMS}
+    for dim in 1:NDIMS
+        @assert coordinates_min[dim]<coordinates_max[dim] "Minimum coordinates are not minimum."
+    end
+
+    # Find all leaf cells within box
+    leaves = filter_leaf_cells(t) do cell_id
+        return (all(coordinates_min .< cell_coordinates(t, cell_id)) &&
+                all(coordinates_max .> cell_coordinates(t, cell_id)))
+    end
+
+    # Get list of unique parent ids for all leaf cells
+    parent_ids = unique(t.parent_ids[leaves])
+
+    # Filter parent ids to be within box
+    parents = filter(parent_ids) do cell_id
+        return (all(coordinates_min .< cell_coordinates(t, cell_id)) &&
+                all(coordinates_max .> cell_coordinates(t, cell_id)))
+    end
+
+    # Coarsen cells
+    coarsen!(t, parents)
 end
 
 # Convenience method for 1D
 function coarsen_box!(t::AbstractTree{1}, coordinates_min::Real, coordinates_max::Real)
-  return coarsen_box!(t, [convert(Float64, coordinates_min)], [convert(Float64, coordinates_max)])
+    return coarsen_box!(t, [convert(Float64, coordinates_min)],
+                        [convert(Float64, coordinates_max)])
 end
 
-
 # Return coordinates of a child cell based on its relative position to the parent.
-function child_coordinates(::AbstractTree{NDIMS}, parent_coordinates, parent_length::Number, child::Int) where NDIMS
-  # Calculate length of child cells
-  child_length = parent_length / 2
-  return SVector(ntuple(d -> parent_coordinates[d] + child_sign(child, d) * child_length / 2, Val(NDIMS)))
+function child_coordinates(::AbstractTree{NDIMS}, parent_coordinates,
+                           parent_length::Number, child::Int) where {NDIMS}
+    # Calculate length of child cells
+    child_length = parent_length / 2
+    return SVector(ntuple(d -> parent_coordinates[d] +
+                               child_sign(child, d) * child_length / 2, Val(NDIMS)))
 end
 
-
 # Reset range of cells to values that are prone to cause errors as soon as they are used.
 #
 # Rationale: If an invalid cell is accidentally used, we want to know it as soon as possible.
@@ -679,121 +664,116 @@ end
 invalidate!(t::AbstractTree, id::Int) = invalidate!(t, id, id)
 invalidate!(t::AbstractTree) = invalidate!(t, 1, length(t))
 
-
 # Delete connectivity with parents/children/neighbors before cells are erased
 function delete_connectivity!(t::AbstractTree, first::Int, last::Int)
-  @assert first > 0
-  @assert first <= last
-  @assert last <= t.capacity + 1
-
-  # Iterate over all cells
-  for cell_id in first:last
-    # Delete connectivity from parent cell
-    if has_parent(t, cell_id)
-      parent_id = t.parent_ids[cell_id]
-      for child in 1:n_children_per_cell(t)
-        if t.child_ids[child, parent_id] == cell_id
-          t.child_ids[child, parent_id] = 0
-          break
+    @assert first > 0
+    @assert first <= last
+    @assert last <= t.capacity + 1
+
+    # Iterate over all cells
+    for cell_id in first:last
+        # Delete connectivity from parent cell
+        if has_parent(t, cell_id)
+            parent_id = t.parent_ids[cell_id]
+            for child in 1:n_children_per_cell(t)
+                if t.child_ids[child, parent_id] == cell_id
+                    t.child_ids[child, parent_id] = 0
+                    break
+                end
+            end
         end
-      end
-    end
 
-    # Delete connectivity from child cells
-    for child in 1:n_children_per_cell(t)
-      if has_child(t, cell_id, child)
-        t.parent_ids[t._child_ids[child, cell_id]] = 0
-      end
-    end
+        # Delete connectivity from child cells
+        for child in 1:n_children_per_cell(t)
+            if has_child(t, cell_id, child)
+                t.parent_ids[t._child_ids[child, cell_id]] = 0
+            end
+        end
 
-    # Delete connectivity from neighboring cells
-    for direction in eachdirection(t)
-      if has_neighbor(t, cell_id, direction)
-        t.neighbor_ids[opposite_direction(direction), t.neighbor_ids[direction, cell_id]] = 0
-      end
+        # Delete connectivity from neighboring cells
+        for direction in eachdirection(t)
+            if has_neighbor(t, cell_id, direction)
+                t.neighbor_ids[opposite_direction(direction), t.neighbor_ids[direction, cell_id]] = 0
+            end
+        end
     end
-  end
 end
 
-
 # Move connectivity with parents/children/neighbors after cells have been moved
 function move_connectivity!(t::AbstractTree, first::Int, last::Int, destination::Int)
-  @assert first > 0
-  @assert first <= last
-  @assert last <= t.capacity + 1
-  @assert destination > 0
-  @assert destination <= t.capacity + 1
-
-  # Strategy
-  # 1) Loop over moved cells (at target location)
-  # 2) Check if parent/children/neighbors connections are to a cell that was moved
-  #    a) if cell was moved: apply offset to current cell
-  #    b) if cell was not moved: go to connected cell and update connectivity there
-
-  offset = destination - first
-  has_moved(n) = (first <= n <= last)
-
-  for source in first:last
-    target = source + offset
-
-    # Update parent
-    if has_parent(t, target)
-      # Get parent cell
-      parent_id = t.parent_ids[target]
-      if has_moved(parent_id)
-        # If parent itself was moved, just update parent id accordingly
-        t.parent_ids[target] += offset
-      else
-        # If parent was not moved, update its corresponding child id
-        for child in 1:n_children_per_cell(t)
-          if t.child_ids[child, parent_id] == source
-            t.child_ids[child, parent_id] = target
-          end
+    @assert first > 0
+    @assert first <= last
+    @assert last <= t.capacity + 1
+    @assert destination > 0
+    @assert destination <= t.capacity + 1
+
+    # Strategy
+    # 1) Loop over moved cells (at target location)
+    # 2) Check if parent/children/neighbors connections are to a cell that was moved
+    #    a) if cell was moved: apply offset to current cell
+    #    b) if cell was not moved: go to connected cell and update connectivity there
+
+    offset = destination - first
+    has_moved(n) = (first <= n <= last)
+
+    for source in first:last
+        target = source + offset
+
+        # Update parent
+        if has_parent(t, target)
+            # Get parent cell
+            parent_id = t.parent_ids[target]
+            if has_moved(parent_id)
+                # If parent itself was moved, just update parent id accordingly
+                t.parent_ids[target] += offset
+            else
+                # If parent was not moved, update its corresponding child id
+                for child in 1:n_children_per_cell(t)
+                    if t.child_ids[child, parent_id] == source
+                        t.child_ids[child, parent_id] = target
+                    end
+                end
+            end
         end
-      end
-    end
 
-    # Update children
-    for child in 1:n_children_per_cell(t)
-      if has_child(t, target, child)
-        # Get child cell
-        child_id = t.child_ids[child, target]
-        if has_moved(child_id)
-          # If child itself was moved, just update child id accordingly
-          t.child_ids[child, target] += offset
-        else
-          # If child was not moved, update its parent id
-          t.parent_ids[child_id] = target
+        # Update children
+        for child in 1:n_children_per_cell(t)
+            if has_child(t, target, child)
+                # Get child cell
+                child_id = t.child_ids[child, target]
+                if has_moved(child_id)
+                    # If child itself was moved, just update child id accordingly
+                    t.child_ids[child, target] += offset
+                else
+                    # If child was not moved, update its parent id
+                    t.parent_ids[child_id] = target
+                end
+            end
         end
-      end
-    end
 
-    # Update neighbors
-    for direction in eachdirection(t)
-      if has_neighbor(t, target, direction)
-        # Get neighbor cell
-        neighbor_id = t.neighbor_ids[direction, target]
-        if has_moved(neighbor_id)
-          # If neighbor itself was moved, just update neighbor id accordingly
-          t.neighbor_ids[direction, target] += offset
-        else
-          # If neighbor was not moved, update its opposing neighbor id
-          t.neighbor_ids[opposite_direction(direction), neighbor_id] = target
+        # Update neighbors
+        for direction in eachdirection(t)
+            if has_neighbor(t, target, direction)
+                # Get neighbor cell
+                neighbor_id = t.neighbor_ids[direction, target]
+                if has_moved(neighbor_id)
+                    # If neighbor itself was moved, just update neighbor id accordingly
+                    t.neighbor_ids[direction, target] += offset
+                else
+                    # If neighbor was not moved, update its opposing neighbor id
+                    t.neighbor_ids[opposite_direction(direction), neighbor_id] = target
+                end
+            end
         end
-      end
     end
-  end
 end
 
-
 # Raw copy operation for ranges of cells.
 #
 # This method is used by the higher-level copy operations for AbstractContainer
 # function raw_copy!(target::AbstractTree, source::AbstractTree, first::Int, last::Int, destination::Int) end
 
-
 # Reset data structures by recreating all internal storage containers and invalidating all elements
 # function reset_data_structures!(t::AbstractTree{NDIMS}) where NDIMS end
 
-
 end # @muladd
diff --git a/src/meshes/dgmulti_meshes.jl b/src/meshes/dgmulti_meshes.jl
index c41f03abcbf..7ae7c0f904e 100644
--- a/src/meshes/dgmulti_meshes.jl
+++ b/src/meshes/dgmulti_meshes.jl
@@ -3,6 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 """
     DGMultiMesh{NDIMS, ...}
@@ -12,33 +13,39 @@ dispatchable type. This is intended to store geometric data and connectivities f
 mesh (Cartesian, affine, curved, structured/unstructured).
 """
 struct DGMultiMesh{NDIMS, MeshType, MeshDataT <: MeshData{NDIMS}, BoundaryFaceT}
-  md::MeshDataT
-  boundary_faces::BoundaryFaceT
+    md::MeshDataT
+    boundary_faces::BoundaryFaceT
 end
 
 # enable use of @set and setproperties(...) for DGMultiMesh
-ConstructionBase.constructorof(::Type{DGMultiMesh{T1, T2, T3, T4}}) where {T1, T2, T3, T4} = DGMultiMesh{T1, T2, T3, T4}
+function ConstructionBase.constructorof(::Type{DGMultiMesh{T1, T2, T3, T4}}) where {T1,
+                                                                                    T2,
+                                                                                    T3,
+                                                                                    T4}
+    DGMultiMesh{T1, T2, T3, T4}
+end
 
 Base.ndims(::DGMultiMesh{NDIMS}) where {NDIMS} = NDIMS
 
 function Base.show(io::IO, mesh::DGMultiMesh{NDIMS, MeshType}) where {NDIMS, MeshType}
-  @nospecialize mesh # reduce precompilation time
-  print(io, "$MeshType DGMultiMesh with NDIMS = $NDIMS.")
+    @nospecialize mesh # reduce precompilation time
+    print(io, "$MeshType DGMultiMesh with NDIMS = $NDIMS.")
 end
 
-function Base.show(io::IO, ::MIME"text/plain", mesh::DGMultiMesh{NDIMS, MeshType}) where {NDIMS, MeshType}
-  @nospecialize mesh # reduce precompilation time
-  if get(io, :compact, false)
-    show(io, mesh)
-  else
-    summary_header(io, "DGMultiMesh{$NDIMS, $MeshType}, ")
-    summary_line(io, "number of elements", mesh.md.num_elements)
-    summary_line(io, "number of boundaries", length(mesh.boundary_faces))
-    for (boundary_name, faces) in mesh.boundary_faces
-      summary_line(increment_indent(io), "nfaces on $boundary_name", length(faces))
+function Base.show(io::IO, ::MIME"text/plain",
+                   mesh::DGMultiMesh{NDIMS, MeshType}) where {NDIMS, MeshType}
+    @nospecialize mesh # reduce precompilation time
+    if get(io, :compact, false)
+        show(io, mesh)
+    else
+        summary_header(io, "DGMultiMesh{$NDIMS, $MeshType}, ")
+        summary_line(io, "number of elements", mesh.md.num_elements)
+        summary_line(io, "number of boundaries", length(mesh.boundary_faces))
+        for (boundary_name, faces) in mesh.boundary_faces
+            summary_line(increment_indent(io), "nfaces on $boundary_name",
+                         length(faces))
+        end
+        summary_footer(io)
     end
-    summary_footer(io)
-  end
 end
-
 end # @muladd
diff --git a/src/meshes/face_interpolant.jl b/src/meshes/face_interpolant.jl
index be2f2ddbd76..201cef9a062 100644
--- a/src/meshes/face_interpolant.jl
+++ b/src/meshes/face_interpolant.jl
@@ -3,50 +3,52 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 #     CurvedFace{RealT<:Real}
 #
 # Contains the data needed to represent a curved face with data points (x,y,z) as a Lagrange polynomial
 # interpolant written in barycentric form at a given set of nodes.
-struct CurvedFace{RealT<:Real}
-  nodes               ::Vector{RealT}
-  barycentric_weights ::Vector{RealT}
-  coordinates         ::Array{RealT, 3} #[ndims, nnodes, nnodes]
+struct CurvedFace{RealT <: Real}
+    nodes::Vector{RealT}
+    barycentric_weights::Vector{RealT}
+    coordinates::Array{RealT, 3} #[ndims, nnodes, nnodes]
 end
 
-
 # evaluate the Gamma face interpolant at a particular point s = (s_1, s_2) and return the (x,y,z) coordinate
 function evaluate_at(s, boundary_face::CurvedFace)
-
-   @unpack nodes, barycentric_weights, coordinates = boundary_face
-
-   x_coordinate_at_s_on_boundary_face = lagrange_interpolation_2d(s, nodes, view(coordinates, 1, :, :),
-                                                                  barycentric_weights)
-   y_coordinate_at_s_on_boundary_face = lagrange_interpolation_2d(s, nodes, view(coordinates, 2, :, :),
-                                                                  barycentric_weights)
-   z_coordinate_at_s_on_boundary_face = lagrange_interpolation_2d(s, nodes, view(coordinates, 3, :, :),
-                                                                  barycentric_weights)
-
-   return x_coordinate_at_s_on_boundary_face,
-          y_coordinate_at_s_on_boundary_face,
-          z_coordinate_at_s_on_boundary_face
+    @unpack nodes, barycentric_weights, coordinates = boundary_face
+
+    x_coordinate_at_s_on_boundary_face = lagrange_interpolation_2d(s, nodes,
+                                                                   view(coordinates, 1,
+                                                                        :, :),
+                                                                   barycentric_weights)
+    y_coordinate_at_s_on_boundary_face = lagrange_interpolation_2d(s, nodes,
+                                                                   view(coordinates, 2,
+                                                                        :, :),
+                                                                   barycentric_weights)
+    z_coordinate_at_s_on_boundary_face = lagrange_interpolation_2d(s, nodes,
+                                                                   view(coordinates, 3,
+                                                                        :, :),
+                                                                   barycentric_weights)
+
+    return x_coordinate_at_s_on_boundary_face,
+           y_coordinate_at_s_on_boundary_face,
+           z_coordinate_at_s_on_boundary_face
 end
 
-
 # Calculate a 2D Lagrange interpolating polynomial in barycentric 2 form
 # of a function f(x,y) at a given coordinate (x,y) for a given node distribution.
 function lagrange_interpolation_2d(x, nodes, function_values, barycentric_weights)
-
-  f_intermediate = zeros(eltype(function_values), length(nodes))
-  for j in eachindex(nodes)
-    f_intermediate[j] = lagrange_interpolation(x[2], nodes, view(function_values, j, :),
-                                               barycentric_weights)
-  end
-  point_value = lagrange_interpolation(x[1], nodes, f_intermediate, barycentric_weights)
-
-  return point_value
+    f_intermediate = zeros(eltype(function_values), length(nodes))
+    for j in eachindex(nodes)
+        f_intermediate[j] = lagrange_interpolation(x[2], nodes,
+                                                   view(function_values, j, :),
+                                                   barycentric_weights)
+    end
+    point_value = lagrange_interpolation(x[1], nodes, f_intermediate,
+                                         barycentric_weights)
+
+    return point_value
 end
-
-
 end # @muladd
diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index beef5341e26..b9c462fa15a 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -3,463 +3,464 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Save current mesh with some context information as an HDF5 file.
-function save_mesh_file(mesh::Union{TreeMesh, P4estMesh}, output_directory, timestep=0)
-  save_mesh_file(mesh, output_directory, timestep, mpi_parallel(mesh))
+function save_mesh_file(mesh::Union{TreeMesh, P4estMesh}, output_directory,
+                        timestep = 0)
+    save_mesh_file(mesh, output_directory, timestep, mpi_parallel(mesh))
 end
 
 function save_mesh_file(mesh::TreeMesh, output_directory, timestep,
                         mpi_parallel::False)
-  # Create output directory (if it does not exist)
-  mkpath(output_directory)
+    # Create output directory (if it does not exist)
+    mkpath(output_directory)
 
-  # Determine file name based on existence of meaningful time step
-  if timestep > 0
-    filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep))
-  else
-    filename = joinpath(output_directory, "mesh.h5")
-  end
-
-  # Open file (clobber existing content)
-  h5open(filename, "w") do file
-    # Add context information as attributes
-    n_cells = length(mesh.tree)
-    attributes(file)["mesh_type"] = get_name(mesh)
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["n_cells"] = n_cells
-    attributes(file)["n_leaf_cells"] = count_leaf_cells(mesh.tree)
-    attributes(file)["minimum_level"] = minimum_level(mesh.tree)
-    attributes(file)["maximum_level"] = maximum_level(mesh.tree)
-    attributes(file)["center_level_0"] = mesh.tree.center_level_0
-    attributes(file)["length_level_0"] = mesh.tree.length_level_0
-    attributes(file)["periodicity"] = collect(mesh.tree.periodicity)
-
-    # Add tree data
-    file["parent_ids"] = @view mesh.tree.parent_ids[1:n_cells]
-    file["child_ids"] = @view mesh.tree.child_ids[:, 1:n_cells]
-    file["neighbor_ids"] = @view mesh.tree.neighbor_ids[:, 1:n_cells]
-    file["levels"] = @view mesh.tree.levels[1:n_cells]
-    file["coordinates"] = @view mesh.tree.coordinates[:, 1:n_cells]
-  end
-
-  return filename
+    # Determine file name based on existence of meaningful time step
+    if timestep > 0
+        filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep))
+    else
+        filename = joinpath(output_directory, "mesh.h5")
+    end
+
+    # Open file (clobber existing content)
+    h5open(filename, "w") do file
+        # Add context information as attributes
+        n_cells = length(mesh.tree)
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["n_cells"] = n_cells
+        attributes(file)["n_leaf_cells"] = count_leaf_cells(mesh.tree)
+        attributes(file)["minimum_level"] = minimum_level(mesh.tree)
+        attributes(file)["maximum_level"] = maximum_level(mesh.tree)
+        attributes(file)["center_level_0"] = mesh.tree.center_level_0
+        attributes(file)["length_level_0"] = mesh.tree.length_level_0
+        attributes(file)["periodicity"] = collect(mesh.tree.periodicity)
+
+        # Add tree data
+        file["parent_ids"] = @view mesh.tree.parent_ids[1:n_cells]
+        file["child_ids"] = @view mesh.tree.child_ids[:, 1:n_cells]
+        file["neighbor_ids"] = @view mesh.tree.neighbor_ids[:, 1:n_cells]
+        file["levels"] = @view mesh.tree.levels[1:n_cells]
+        file["coordinates"] = @view mesh.tree.coordinates[:, 1:n_cells]
+    end
+
+    return filename
 end
 
 # Save current mesh with some context information as an HDF5 file.
 function save_mesh_file(mesh::TreeMesh, output_directory, timestep,
                         mpi_parallel::True)
-  # Create output directory (if it does not exist)
-  mpi_isroot() && mkpath(output_directory)
+    # Create output directory (if it does not exist)
+    mpi_isroot() && mkpath(output_directory)
 
-  # Determine file name based on existence of meaningful time step
-  if timestep >= 0
-    filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep))
-  else
-    filename = joinpath(output_directory, "mesh.h5")
-  end
+    # Determine file name based on existence of meaningful time step
+    if timestep >= 0
+        filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep))
+    else
+        filename = joinpath(output_directory, "mesh.h5")
+    end
+
+    # Since the mesh is replicated on all ranks, only save from MPI root
+    if !mpi_isroot()
+        return filename
+    end
+
+    # Open file (clobber existing content)
+    h5open(filename, "w") do file
+        # Add context information as attributes
+        n_cells = length(mesh.tree)
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["n_cells"] = n_cells
+        attributes(file)["n_leaf_cells"] = count_leaf_cells(mesh.tree)
+        attributes(file)["minimum_level"] = minimum_level(mesh.tree)
+        attributes(file)["maximum_level"] = maximum_level(mesh.tree)
+        attributes(file)["center_level_0"] = mesh.tree.center_level_0
+        attributes(file)["length_level_0"] = mesh.tree.length_level_0
+        attributes(file)["periodicity"] = collect(mesh.tree.periodicity)
+
+        # Add tree data
+        file["parent_ids"] = @view mesh.tree.parent_ids[1:n_cells]
+        file["child_ids"] = @view mesh.tree.child_ids[:, 1:n_cells]
+        file["neighbor_ids"] = @view mesh.tree.neighbor_ids[:, 1:n_cells]
+        file["levels"] = @view mesh.tree.levels[1:n_cells]
+        file["coordinates"] = @view mesh.tree.coordinates[:, 1:n_cells]
+    end
 
-  # Since the mesh is replicated on all ranks, only save from MPI root
-  if !mpi_isroot()
     return filename
-  end
-
-  # Open file (clobber existing content)
-  h5open(filename, "w") do file
-    # Add context information as attributes
-    n_cells = length(mesh.tree)
-    attributes(file)["mesh_type"] = get_name(mesh)
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["n_cells"] = n_cells
-    attributes(file)["n_leaf_cells"] = count_leaf_cells(mesh.tree)
-    attributes(file)["minimum_level"] = minimum_level(mesh.tree)
-    attributes(file)["maximum_level"] = maximum_level(mesh.tree)
-    attributes(file)["center_level_0"] = mesh.tree.center_level_0
-    attributes(file)["length_level_0"] = mesh.tree.length_level_0
-    attributes(file)["periodicity"] = collect(mesh.tree.periodicity)
-
-    # Add tree data
-    file["parent_ids"] = @view mesh.tree.parent_ids[1:n_cells]
-    file["child_ids"] = @view mesh.tree.child_ids[:, 1:n_cells]
-    file["neighbor_ids"] = @view mesh.tree.neighbor_ids[:, 1:n_cells]
-    file["levels"] = @view mesh.tree.levels[1:n_cells]
-    file["coordinates"] = @view mesh.tree.coordinates[:, 1:n_cells]
-  end
-
-  return filename
 end
 
-
 # Does not save the mesh itself to an HDF5 file. Instead saves important attributes
 # of the mesh, like its size and the type of boundary mapping function.
 # Then, within Trixi2Vtk, the StructuredMesh and its node coordinates are reconstructured from
 # these attributes for plotting purposes
 function save_mesh_file(mesh::StructuredMesh, output_directory)
-  # Create output directory (if it does not exist)
-  mkpath(output_directory)
+    # Create output directory (if it does not exist)
+    mkpath(output_directory)
 
-  filename = joinpath(output_directory, "mesh.h5")
+    filename = joinpath(output_directory, "mesh.h5")
 
-  # Open file (clobber existing content)
-  h5open(filename, "w") do file
-    # Add context information as attributes
-    attributes(file)["mesh_type"] = get_name(mesh)
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["size"] = collect(size(mesh))
-    attributes(file)["mapping"] = mesh.mapping_as_string
-  end
+    # Open file (clobber existing content)
+    h5open(filename, "w") do file
+        # Add context information as attributes
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["size"] = collect(size(mesh))
+        attributes(file)["mapping"] = mesh.mapping_as_string
+    end
 
-  return filename
+    return filename
 end
 
-
 # Does not save the mesh itself to an HDF5 file. Instead saves important attributes
 # of the mesh, like its size and the corresponding `.mesh` file used to construct the mesh.
 # Then, within Trixi2Vtk, the UnstructuredMesh2D and its node coordinates are reconstructured
 # from these attributes for plotting purposes
 function save_mesh_file(mesh::UnstructuredMesh2D, output_directory)
-  # Create output directory (if it does not exist)
-  mkpath(output_directory)
-
-  filename = joinpath(output_directory, "mesh.h5")
-
-  # Open file (clobber existing content)
-  h5open(filename, "w") do file
-    # Add context information as attributes
-    attributes(file)["mesh_type"] = get_name(mesh)
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["size"] = length(mesh)
-    attributes(file)["mesh_filename"] = mesh.filename
-    attributes(file)["periodicity"] = collect(mesh.periodicity)
-  end
-
-  return filename
-end
+    # Create output directory (if it does not exist)
+    mkpath(output_directory)
+
+    filename = joinpath(output_directory, "mesh.h5")
 
+    # Open file (clobber existing content)
+    h5open(filename, "w") do file
+        # Add context information as attributes
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["size"] = length(mesh)
+        attributes(file)["mesh_filename"] = mesh.filename
+        attributes(file)["periodicity"] = collect(mesh.periodicity)
+    end
+
+    return filename
+end
 
 # Does not save the mesh itself to an HDF5 file. Instead saves important attributes
 # of the mesh, like its size and the type of boundary mapping function.
 # Then, within Trixi2Vtk, the P4estMesh and its node coordinates are reconstructured from
 # these attributes for plotting purposes
-function save_mesh_file(mesh::P4estMesh, output_directory, timestep, mpi_parallel::False)
-  # Create output directory (if it does not exist)
-  mkpath(output_directory)
-
-  # Determine file name based on existence of meaningful time step
-  if timestep > 0
-    filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep))
-    p4est_filename = @sprintf("p4est_data_%06d", timestep)
-  else
-    filename = joinpath(output_directory, "mesh.h5")
-    p4est_filename = "p4est_data"
-  end
+function save_mesh_file(mesh::P4estMesh, output_directory, timestep,
+                        mpi_parallel::False)
+    # Create output directory (if it does not exist)
+    mkpath(output_directory)
 
-  p4est_file = joinpath(output_directory, p4est_filename)
+    # Determine file name based on existence of meaningful time step
+    if timestep > 0
+        filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep))
+        p4est_filename = @sprintf("p4est_data_%06d", timestep)
+    else
+        filename = joinpath(output_directory, "mesh.h5")
+        p4est_filename = "p4est_data"
+    end
 
-  # Save the complete connectivity and `p4est` data to disk.
-  save_p4est!(p4est_file, mesh.p4est)
+    p4est_file = joinpath(output_directory, p4est_filename)
 
-  # Open file (clobber existing content)
-  h5open(filename, "w") do file
-    # Add context information as attributes
-    attributes(file)["mesh_type"] = get_name(mesh)
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["p4est_file"] = p4est_filename
+    # Save the complete connectivity and `p4est` data to disk.
+    save_p4est!(p4est_file, mesh.p4est)
 
-    file["tree_node_coordinates"] = mesh.tree_node_coordinates
-    file["nodes"] = Vector(mesh.nodes) # the mesh uses `SVector`s for the nodes
-                                       # to increase the runtime performance
-                                       # but HDF5 can only handle plain arrays
-    file["boundary_names"] = mesh.boundary_names .|> String
-  end
+    # Open file (clobber existing content)
+    h5open(filename, "w") do file
+        # Add context information as attributes
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["p4est_file"] = p4est_filename
 
-  return filename
+        file["tree_node_coordinates"] = mesh.tree_node_coordinates
+        file["nodes"] = Vector(mesh.nodes) # the mesh uses `SVector`s for the nodes
+        # to increase the runtime performance
+        # but HDF5 can only handle plain arrays
+        file["boundary_names"] = mesh.boundary_names .|> String
+    end
+
+    return filename
 end
 
 function save_mesh_file(mesh::P4estMesh, output_directory, timestep, mpi_parallel::True)
-  # Create output directory (if it does not exist)
-  mpi_isroot() && mkpath(output_directory)
-
-  # Determine file name based on existence of meaningful time step
-  if timestep > 0
-    filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep))
-    p4est_filename = @sprintf("p4est_data_%06d", timestep)
-  else
-    filename = joinpath(output_directory, "mesh.h5")
-    p4est_filename = "p4est_data"
-  end
+    # Create output directory (if it does not exist)
+    mpi_isroot() && mkpath(output_directory)
 
-  p4est_file = joinpath(output_directory, p4est_filename)
+    # Determine file name based on existence of meaningful time step
+    if timestep > 0
+        filename = joinpath(output_directory, @sprintf("mesh_%06d.h5", timestep))
+        p4est_filename = @sprintf("p4est_data_%06d", timestep)
+    else
+        filename = joinpath(output_directory, "mesh.h5")
+        p4est_filename = "p4est_data"
+    end
+
+    p4est_file = joinpath(output_directory, p4est_filename)
 
-  # Save the complete connectivity/p4est data to disk.
-  save_p4est!(p4est_file, mesh.p4est)
+    # Save the complete connectivity/p4est data to disk.
+    save_p4est!(p4est_file, mesh.p4est)
+
+    # Since the mesh attributes are replicated on all ranks, only save from MPI root
+    if !mpi_isroot()
+        return filename
+    end
+
+    # Open file (clobber existing content)
+    h5open(filename, "w") do file
+        # Add context information as attributes
+        attributes(file)["mesh_type"] = get_name(mesh)
+        attributes(file)["ndims"] = ndims(mesh)
+        attributes(file)["p4est_file"] = p4est_filename
+
+        file["tree_node_coordinates"] = mesh.tree_node_coordinates
+        file["nodes"] = Vector(mesh.nodes) # the mesh uses `SVector`s for the nodes
+        # to increase the runtime performance
+        # but HDF5 can only handle plain arrays
+        file["boundary_names"] = mesh.boundary_names .|> String
+    end
 
-  # Since the mesh attributes are replicated on all ranks, only save from MPI root
-  if !mpi_isroot()
     return filename
-  end
-
-  # Open file (clobber existing content)
-  h5open(filename, "w") do file
-    # Add context information as attributes
-    attributes(file)["mesh_type"] = get_name(mesh)
-    attributes(file)["ndims"] = ndims(mesh)
-    attributes(file)["p4est_file"] = p4est_filename
-
-    file["tree_node_coordinates"] = mesh.tree_node_coordinates
-    file["nodes"] = Vector(mesh.nodes) # the mesh uses `SVector`s for the nodes
-                                       # to increase the runtime performance
-                                       # but HDF5 can only handle plain arrays
-    file["boundary_names"] = mesh.boundary_names .|> String
-  end
-
-  return filename
 end
 
-
 """
     load_mesh(restart_file::AbstractString; n_cells_max)
 
 Load the mesh from the `restart_file`.
 """
-function load_mesh(restart_file::AbstractString; n_cells_max=0, RealT=Float64)
-  if mpi_isparallel()
-    mesh_file = get_restart_mesh_filename(restart_file, True())
-    return load_mesh_parallel(mesh_file; n_cells_max=n_cells_max, RealT=RealT)
-  else
-    mesh_file = get_restart_mesh_filename(restart_file, False())
-    load_mesh_serial(mesh_file; n_cells_max=n_cells_max, RealT=RealT)
-  end
+function load_mesh(restart_file::AbstractString; n_cells_max = 0, RealT = Float64)
+    if mpi_isparallel()
+        mesh_file = get_restart_mesh_filename(restart_file, True())
+        return load_mesh_parallel(mesh_file; n_cells_max = n_cells_max, RealT = RealT)
+    else
+        mesh_file = get_restart_mesh_filename(restart_file, False())
+        load_mesh_serial(mesh_file; n_cells_max = n_cells_max, RealT = RealT)
+    end
 end
 
 function load_mesh_serial(mesh_file::AbstractString; n_cells_max, RealT)
-  ndims, mesh_type = h5open(mesh_file, "r") do file
-    return read(attributes(file)["ndims"]),
-           read(attributes(file)["mesh_type"])
-  end
-
-  if mesh_type == "TreeMesh"
-    n_cells = h5open(mesh_file, "r") do file
-      return read(attributes(file)["n_cells"])
-    end
-    mesh = TreeMesh(SerialTree{ndims}, max(n_cells, n_cells_max))
-    load_mesh!(mesh, mesh_file)
-  elseif mesh_type == "StructuredMesh"
-    size_, mapping_as_string = h5open(mesh_file, "r") do file
-      return read(attributes(file)["size"]),
-             read(attributes(file)["mapping"])
-    end
-
-    size = Tuple(size_)
-
-    # TODO: `@eval` is evil
-    # A temporary workaround to evaluate the code that defines the domain mapping in a local scope.
-    # This prevents errors when multiple restart elixirs are executed in one session, where one
-    # defines `mapping` as a variable, while the other defines it as a function.
-    #
-    # This should be replaced with something more robust and secure,
-    # see https://github.com/trixi-framework/Trixi.jl/issues/541).
-    expr = Meta.parse(mapping_as_string)
-    if expr.head == :toplevel
-      expr.head = :block
-    end
-
-    if ndims == 1
-      mapping = @eval function(xi)
-        $expr
-        mapping(xi)
-      end
-    elseif ndims == 2
-      mapping = @eval function(xi, eta)
-        $expr
-        mapping(xi, eta)
-      end
-    else # ndims == 3
-      mapping = @eval function(xi, eta, zeta)
-        $expr
-        mapping(xi, eta, zeta)
-      end
+    ndims, mesh_type = h5open(mesh_file, "r") do file
+        return read(attributes(file)["ndims"]),
+               read(attributes(file)["mesh_type"])
     end
 
-    mesh = StructuredMesh(size, mapping; RealT=RealT, unsaved_changes=false,
-                      mapping_as_string=mapping_as_string)
-  elseif mesh_type == "UnstructuredMesh2D"
-    mesh_filename, periodicity_ = h5open(mesh_file, "r") do file
-      return read(attributes(file)["mesh_filename"]),
-             read(attributes(file)["periodicity"])
-    end
-    mesh = UnstructuredMesh2D(mesh_filename; RealT=RealT, periodicity=periodicity_,
-                                unsaved_changes=false)
-  elseif mesh_type == "P4estMesh"
-    p4est_filename, tree_node_coordinates,
+    if mesh_type == "TreeMesh"
+        n_cells = h5open(mesh_file, "r") do file
+            return read(attributes(file)["n_cells"])
+        end
+        mesh = TreeMesh(SerialTree{ndims}, max(n_cells, n_cells_max))
+        load_mesh!(mesh, mesh_file)
+    elseif mesh_type == "StructuredMesh"
+        size_, mapping_as_string = h5open(mesh_file, "r") do file
+            return read(attributes(file)["size"]),
+                   read(attributes(file)["mapping"])
+        end
+
+        size = Tuple(size_)
+
+        # TODO: `@eval` is evil
+        # A temporary workaround to evaluate the code that defines the domain mapping in a local scope.
+        # This prevents errors when multiple restart elixirs are executed in one session, where one
+        # defines `mapping` as a variable, while the other defines it as a function.
+        #
+        # This should be replaced with something more robust and secure,
+        # see https://github.com/trixi-framework/Trixi.jl/issues/541).
+        expr = Meta.parse(mapping_as_string)
+        if expr.head == :toplevel
+            expr.head = :block
+        end
+
+        if ndims == 1
+            mapping = @eval function (xi)
+                $expr
+                mapping(xi)
+            end
+        elseif ndims == 2
+            mapping = @eval function (xi, eta)
+                $expr
+                mapping(xi, eta)
+            end
+        else # ndims == 3
+            mapping = @eval function (xi, eta, zeta)
+                $expr
+                mapping(xi, eta, zeta)
+            end
+        end
+
+        mesh = StructuredMesh(size, mapping; RealT = RealT, unsaved_changes = false,
+                              mapping_as_string = mapping_as_string)
+    elseif mesh_type == "UnstructuredMesh2D"
+        mesh_filename, periodicity_ = h5open(mesh_file, "r") do file
+            return read(attributes(file)["mesh_filename"]),
+                   read(attributes(file)["periodicity"])
+        end
+        mesh = UnstructuredMesh2D(mesh_filename; RealT = RealT,
+                                  periodicity = periodicity_,
+                                  unsaved_changes = false)
+    elseif mesh_type == "P4estMesh"
+        p4est_filename, tree_node_coordinates,
         nodes, boundary_names_ = h5open(mesh_file, "r") do file
-      return read(attributes(file)["p4est_file"]),
-             read(file["tree_node_coordinates"]),
-             read(file["nodes"]),
-             read(file["boundary_names"])
-    end
+            return read(attributes(file)["p4est_file"]),
+                   read(file["tree_node_coordinates"]),
+                   read(file["nodes"]),
+                   read(file["boundary_names"])
+        end
 
-    boundary_names = boundary_names_ .|> Symbol
+        boundary_names = boundary_names_ .|> Symbol
 
-    p4est_file = joinpath(dirname(mesh_file), p4est_filename)
-    # Prevent Julia crashes when `p4est` can't find the file
-    @assert isfile(p4est_file)
+        p4est_file = joinpath(dirname(mesh_file), p4est_filename)
+        # Prevent Julia crashes when `p4est` can't find the file
+        @assert isfile(p4est_file)
 
-    p4est = load_p4est(p4est_file, Val(ndims))
+        p4est = load_p4est(p4est_file, Val(ndims))
 
-    mesh = P4estMesh{ndims}(p4est, tree_node_coordinates,
-                            nodes, boundary_names, "", false, true)
-  else
-    error("Unknown mesh type!")
-  end
+        mesh = P4estMesh{ndims}(p4est, tree_node_coordinates,
+                                nodes, boundary_names, "", false, true)
+    else
+        error("Unknown mesh type!")
+    end
 
-  return mesh
+    return mesh
 end
 
 function load_mesh!(mesh::SerialTreeMesh, mesh_file::AbstractString)
-  mesh.current_filename = mesh_file
-  mesh.unsaved_changes = false
-
-  # Read mesh file
-  h5open(mesh_file, "r") do file
-    # Set domain information
-    mesh.tree.center_level_0 = read(attributes(file)["center_level_0"])
-    mesh.tree.length_level_0 = read(attributes(file)["length_level_0"])
-    mesh.tree.periodicity    = Tuple(read(attributes(file)["periodicity"]))
-
-    # Set length
-    n_cells = read(attributes(file)["n_cells"])
-    resize!(mesh.tree, n_cells)
-
-    # Read in data
-    mesh.tree.parent_ids[1:n_cells] = read(file["parent_ids"])
-    mesh.tree.child_ids[:, 1:n_cells] = read(file["child_ids"])
-    mesh.tree.neighbor_ids[:, 1:n_cells] = read(file["neighbor_ids"])
-    mesh.tree.levels[1:n_cells] = read(file["levels"])
-    mesh.tree.coordinates[:, 1:n_cells] = read(file["coordinates"])
-  end
-
-  return mesh
-end
+    mesh.current_filename = mesh_file
+    mesh.unsaved_changes = false
 
+    # Read mesh file
+    h5open(mesh_file, "r") do file
+        # Set domain information
+        mesh.tree.center_level_0 = read(attributes(file)["center_level_0"])
+        mesh.tree.length_level_0 = read(attributes(file)["length_level_0"])
+        mesh.tree.periodicity = Tuple(read(attributes(file)["periodicity"]))
+
+        # Set length
+        n_cells = read(attributes(file)["n_cells"])
+        resize!(mesh.tree, n_cells)
+
+        # Read in data
+        mesh.tree.parent_ids[1:n_cells] = read(file["parent_ids"])
+        mesh.tree.child_ids[:, 1:n_cells] = read(file["child_ids"])
+        mesh.tree.neighbor_ids[:, 1:n_cells] = read(file["neighbor_ids"])
+        mesh.tree.levels[1:n_cells] = read(file["levels"])
+        mesh.tree.coordinates[:, 1:n_cells] = read(file["coordinates"])
+    end
+
+    return mesh
+end
 
 function load_mesh_parallel(mesh_file::AbstractString; n_cells_max, RealT)
-  if mpi_isroot()
-    ndims_, mesh_type = h5open(mesh_file, "r") do file
-      return read(attributes(file)["ndims"]),
-            read(attributes(file)["mesh_type"])
-    end
-    MPI.Bcast!(Ref(ndims_), mpi_root(), mpi_comm())
-    MPI.bcast(mesh_type, mpi_root(), mpi_comm())
-  else
-    ndims_ = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[]
-    mesh_type = MPI.bcast(nothing, mpi_root(), mpi_comm())
-  end
-
-  if mesh_type == "TreeMesh"
     if mpi_isroot()
-      n_cells = h5open(mesh_file, "r") do file
-        read(attributes(file)["n_cells"])
-      end
-      MPI.Bcast!(Ref(ndims_), mpi_root(), mpi_comm())
-      MPI.Bcast!(Ref(n_cells), mpi_root(), mpi_comm())
+        ndims_, mesh_type = h5open(mesh_file, "r") do file
+            return read(attributes(file)["ndims"]),
+                   read(attributes(file)["mesh_type"])
+        end
+        MPI.Bcast!(Ref(ndims_), mpi_root(), mpi_comm())
+        MPI.bcast(mesh_type, mpi_root(), mpi_comm())
     else
-      ndims_ = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[]
-      n_cells = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[]
+        ndims_ = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[]
+        mesh_type = MPI.bcast(nothing, mpi_root(), mpi_comm())
     end
 
-    mesh = TreeMesh(ParallelTree{ndims_}, max(n_cells, n_cells_max))
-    load_mesh!(mesh, mesh_file)
-  elseif mesh_type == "P4estMesh"
-    if mpi_isroot()
-      p4est_filename, tree_node_coordinates,
-          nodes, boundary_names_ = h5open(mesh_file, "r") do file
-        return read(attributes(file)["p4est_file"]),
-              read(file["tree_node_coordinates"]),
-              read(file["nodes"]),
-              read(file["boundary_names"])
-      end
-
-      boundary_names = boundary_names_ .|> Symbol
-
-      p4est_file = joinpath(dirname(mesh_file), p4est_filename)
-
-      data = (p4est_file, tree_node_coordinates, nodes, boundary_names)
-      MPI.bcast(data, mpi_root(), mpi_comm())
+    if mesh_type == "TreeMesh"
+        if mpi_isroot()
+            n_cells = h5open(mesh_file, "r") do file
+                read(attributes(file)["n_cells"])
+            end
+            MPI.Bcast!(Ref(ndims_), mpi_root(), mpi_comm())
+            MPI.Bcast!(Ref(n_cells), mpi_root(), mpi_comm())
+        else
+            ndims_ = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[]
+            n_cells = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[]
+        end
+
+        mesh = TreeMesh(ParallelTree{ndims_}, max(n_cells, n_cells_max))
+        load_mesh!(mesh, mesh_file)
+    elseif mesh_type == "P4estMesh"
+        if mpi_isroot()
+            p4est_filename, tree_node_coordinates,
+            nodes, boundary_names_ = h5open(mesh_file, "r") do file
+                return read(attributes(file)["p4est_file"]),
+                       read(file["tree_node_coordinates"]),
+                       read(file["nodes"]),
+                       read(file["boundary_names"])
+            end
+
+            boundary_names = boundary_names_ .|> Symbol
+
+            p4est_file = joinpath(dirname(mesh_file), p4est_filename)
+
+            data = (p4est_file, tree_node_coordinates, nodes, boundary_names)
+            MPI.bcast(data, mpi_root(), mpi_comm())
+        else
+            data = MPI.bcast(nothing, mpi_root(), mpi_comm())
+            p4est_file, tree_node_coordinates, nodes, boundary_names = data
+        end
+
+        # Prevent Julia crashes when `p4est` can't find the file
+        @assert isfile(p4est_file)
+
+        p4est = load_p4est(p4est_file, Val(ndims_))
+
+        mesh = P4estMesh{ndims_}(p4est, tree_node_coordinates,
+                                 nodes, boundary_names, "", false, true)
     else
-      data = MPI.bcast(nothing, mpi_root(), mpi_comm())
-      p4est_file, tree_node_coordinates, nodes, boundary_names = data
+        error("Unknown mesh type!")
     end
 
-    # Prevent Julia crashes when `p4est` can't find the file
-    @assert isfile(p4est_file)
-
-    p4est = load_p4est(p4est_file, Val(ndims_))
-
-    mesh = P4estMesh{ndims_}(p4est, tree_node_coordinates,
-                            nodes, boundary_names, "", false, true)
-  else
-    error("Unknown mesh type!")
-  end
-
-  return mesh
+    return mesh
 end
 
 function load_mesh!(mesh::ParallelTreeMesh, mesh_file::AbstractString)
-  mesh.current_filename = mesh_file
-  mesh.unsaved_changes = false
+    mesh.current_filename = mesh_file
+    mesh.unsaved_changes = false
 
-  if mpi_isroot()
-    h5open(mesh_file, "r") do file
-      # Set domain information
-      mesh.tree.center_level_0 = read(attributes(file)["center_level_0"])
-      mesh.tree.length_level_0 = read(attributes(file)["length_level_0"])
-      mesh.tree.periodicity    = Tuple(read(attributes(file)["periodicity"]))
-      MPI.Bcast!(collect(mesh.tree.center_level_0), mpi_root(), mpi_comm())
-      MPI.Bcast!(collect(mesh.tree.length_level_0), mpi_root(), mpi_comm())
-      MPI.Bcast!(collect(mesh.tree.periodicity),    mpi_root(), mpi_comm())
-
-      # Set length
-      n_cells = read(attributes(file)["n_cells"])
-      MPI.Bcast!(Ref(n_cells), mpi_root(), mpi_comm())
-      resize!(mesh.tree, n_cells)
-
-      # Read in data
-      mesh.tree.parent_ids[1:n_cells] = read(file["parent_ids"])
-      mesh.tree.child_ids[:, 1:n_cells] = read(file["child_ids"])
-      mesh.tree.neighbor_ids[:, 1:n_cells] = read(file["neighbor_ids"])
-      mesh.tree.levels[1:n_cells] = read(file["levels"])
-      mesh.tree.coordinates[:, 1:n_cells] = read(file["coordinates"])
-      @views MPI.Bcast!(mesh.tree.parent_ids[1:n_cells],      mpi_root(), mpi_comm())
-      @views MPI.Bcast!(mesh.tree.child_ids[:, 1:n_cells],    mpi_root(), mpi_comm())
-      @views MPI.Bcast!(mesh.tree.neighbor_ids[:, 1:n_cells], mpi_root(), mpi_comm())
-      @views MPI.Bcast!(mesh.tree.levels[1:n_cells],          mpi_root(), mpi_comm())
-      @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells],  mpi_root(), mpi_comm())
+    if mpi_isroot()
+        h5open(mesh_file, "r") do file
+            # Set domain information
+            mesh.tree.center_level_0 = read(attributes(file)["center_level_0"])
+            mesh.tree.length_level_0 = read(attributes(file)["length_level_0"])
+            mesh.tree.periodicity = Tuple(read(attributes(file)["periodicity"]))
+            MPI.Bcast!(collect(mesh.tree.center_level_0), mpi_root(), mpi_comm())
+            MPI.Bcast!(collect(mesh.tree.length_level_0), mpi_root(), mpi_comm())
+            MPI.Bcast!(collect(mesh.tree.periodicity), mpi_root(), mpi_comm())
+
+            # Set length
+            n_cells = read(attributes(file)["n_cells"])
+            MPI.Bcast!(Ref(n_cells), mpi_root(), mpi_comm())
+            resize!(mesh.tree, n_cells)
+
+            # Read in data
+            mesh.tree.parent_ids[1:n_cells] = read(file["parent_ids"])
+            mesh.tree.child_ids[:, 1:n_cells] = read(file["child_ids"])
+            mesh.tree.neighbor_ids[:, 1:n_cells] = read(file["neighbor_ids"])
+            mesh.tree.levels[1:n_cells] = read(file["levels"])
+            mesh.tree.coordinates[:, 1:n_cells] = read(file["coordinates"])
+            @views MPI.Bcast!(mesh.tree.parent_ids[1:n_cells], mpi_root(), mpi_comm())
+            @views MPI.Bcast!(mesh.tree.child_ids[:, 1:n_cells], mpi_root(), mpi_comm())
+            @views MPI.Bcast!(mesh.tree.neighbor_ids[:, 1:n_cells], mpi_root(),
+                              mpi_comm())
+            @views MPI.Bcast!(mesh.tree.levels[1:n_cells], mpi_root(), mpi_comm())
+            @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells], mpi_root(),
+                              mpi_comm())
+        end
+    else # non-root ranks
+        # Set domain information
+        mesh.tree.center_level_0 = MPI.Bcast!(collect(mesh.tree.center_level_0),
+                                              mpi_root(), mpi_comm())
+        mesh.tree.length_level_0 = MPI.Bcast!(collect(mesh.tree.length_level_0),
+                                              mpi_root(), mpi_comm())[1]
+        mesh.tree.periodicity = Tuple(MPI.Bcast!(collect(mesh.tree.periodicity),
+                                                 mpi_root(), mpi_comm()))
+
+        # Set length
+        n_cells = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[]
+        resize!(mesh.tree, n_cells)
+
+        # Read in data
+        @views MPI.Bcast!(mesh.tree.parent_ids[1:n_cells], mpi_root(), mpi_comm())
+        @views MPI.Bcast!(mesh.tree.child_ids[:, 1:n_cells], mpi_root(), mpi_comm())
+        @views MPI.Bcast!(mesh.tree.neighbor_ids[:, 1:n_cells], mpi_root(), mpi_comm())
+        @views MPI.Bcast!(mesh.tree.levels[1:n_cells], mpi_root(), mpi_comm())
+        @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells], mpi_root(), mpi_comm())
     end
-  else # non-root ranks
-    # Set domain information
-    mesh.tree.center_level_0 = MPI.Bcast!(collect(mesh.tree.center_level_0), mpi_root(), mpi_comm())
-    mesh.tree.length_level_0 = MPI.Bcast!(collect(mesh.tree.length_level_0), mpi_root(), mpi_comm())[1]
-    mesh.tree.periodicity    = Tuple(MPI.Bcast!(collect(mesh.tree.periodicity),    mpi_root(), mpi_comm()))
-
-    # Set length
-    n_cells = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())[]
-    resize!(mesh.tree, n_cells)
-
-    # Read in data
-    @views MPI.Bcast!(mesh.tree.parent_ids[1:n_cells],      mpi_root(), mpi_comm())
-    @views MPI.Bcast!(mesh.tree.child_ids[:, 1:n_cells],    mpi_root(), mpi_comm())
-    @views MPI.Bcast!(mesh.tree.neighbor_ids[:, 1:n_cells], mpi_root(), mpi_comm())
-    @views MPI.Bcast!(mesh.tree.levels[1:n_cells],          mpi_root(), mpi_comm())
-    @views MPI.Bcast!(mesh.tree.coordinates[:, 1:n_cells],  mpi_root(), mpi_comm())
-  end
-
-  # Partition mesh
-  partition!(mesh)
-
-  return mesh
-end
 
+    # Partition mesh
+    partition!(mesh)
 
+    return mesh
+end
 end # @muladd
diff --git a/src/meshes/meshes.jl b/src/meshes/meshes.jl
index a6dcbe132d8..2716aa2007b 100644
--- a/src/meshes/meshes.jl
+++ b/src/meshes/meshes.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 include("tree_mesh.jl")
 include("structured_mesh.jl")
@@ -14,6 +14,4 @@ include("transfinite_mappings_3d.jl")
 include("p4est_mesh.jl")
 include("mesh_io.jl")
 include("dgmulti_meshes.jl")
-
-
 end # @muladd
diff --git a/src/meshes/p4est_mesh.jl b/src/meshes/p4est_mesh.jl
index 2a9777f2a11..ddd6cf473e4 100644
--- a/src/meshes/p4est_mesh.jl
+++ b/src/meshes/p4est_mesh.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     P4estMesh{NDIMS} <: AbstractMesh{NDIMS}
@@ -11,100 +11,107 @@
 An unstructured curved mesh based on trees that uses the C library `p4est`
 to manage trees and mesh refinement.
 """
-mutable struct P4estMesh{NDIMS, RealT<:Real, IsParallel, P, Ghost, NDIMSP2, NNODES} <: AbstractMesh{NDIMS}
-  p4est                 ::P # Either Ptr{p4est_t} or Ptr{p8est_t}
-  is_parallel           ::IsParallel
-  ghost                 ::Ghost # Either Ptr{p4est_ghost_t} or Ptr{p8est_ghost_t}
-  # Coordinates at the nodes specified by the tensor product of `nodes` (NDIMS times).
-  # This specifies the geometry interpolation for each tree.
-  tree_node_coordinates ::Array{RealT, NDIMSP2} # [dimension, i, j, k, tree]
-  nodes                 ::SVector{NNODES, RealT}
-  boundary_names        ::Array{Symbol, 2}      # [face direction, tree]
-  current_filename      ::String
-  unsaved_changes       ::Bool
-  p4est_partition_allow_for_coarsening::Bool
-
-  function P4estMesh{NDIMS}(p4est, tree_node_coordinates, nodes, boundary_names,
-                            current_filename, unsaved_changes, p4est_partition_allow_for_coarsening) where NDIMS
-    if NDIMS == 2
-      @assert p4est isa Ptr{p4est_t}
-    elseif NDIMS == 3
-      @assert p4est isa Ptr{p8est_t}
-    end
+mutable struct P4estMesh{NDIMS, RealT <: Real, IsParallel, P, Ghost, NDIMSP2, NNODES} <:
+               AbstractMesh{NDIMS}
+    p4est::P # Either Ptr{p4est_t} or Ptr{p8est_t}
+    is_parallel::IsParallel
+    ghost::Ghost # Either Ptr{p4est_ghost_t} or Ptr{p8est_ghost_t}
+    # Coordinates at the nodes specified by the tensor product of `nodes` (NDIMS times).
+    # This specifies the geometry interpolation for each tree.
+    tree_node_coordinates::Array{RealT, NDIMSP2} # [dimension, i, j, k, tree]
+    nodes::SVector{NNODES, RealT}
+    boundary_names::Array{Symbol, 2}      # [face direction, tree]
+    current_filename::String
+    unsaved_changes::Bool
+    p4est_partition_allow_for_coarsening::Bool
+
+    function P4estMesh{NDIMS}(p4est, tree_node_coordinates, nodes, boundary_names,
+                              current_filename, unsaved_changes,
+                              p4est_partition_allow_for_coarsening) where {NDIMS}
+        if NDIMS == 2
+            @assert p4est isa Ptr{p4est_t}
+        elseif NDIMS == 3
+            @assert p4est isa Ptr{p8est_t}
+        end
 
-    if mpi_isparallel()
-      if !P4est.uses_mpi()
-        error("p4est library does not support MPI")
-      end
-      is_parallel = True()
-    else
-      is_parallel = False()
-    end
+        if mpi_isparallel()
+            if !P4est.uses_mpi()
+                error("p4est library does not support MPI")
+            end
+            is_parallel = True()
+        else
+            is_parallel = False()
+        end
 
-    ghost = ghost_new_p4est(p4est)
+        ghost = ghost_new_p4est(p4est)
 
-    mesh = new{NDIMS, eltype(tree_node_coordinates), typeof(is_parallel), typeof(p4est), typeof(ghost), NDIMS+2, length(nodes)}(
-      p4est, is_parallel, ghost, tree_node_coordinates, nodes, boundary_names, current_filename, unsaved_changes,
-      p4est_partition_allow_for_coarsening)
+        mesh = new{NDIMS, eltype(tree_node_coordinates), typeof(is_parallel),
+                   typeof(p4est), typeof(ghost), NDIMS + 2, length(nodes)}(p4est,
+                                                                           is_parallel,
+                                                                           ghost,
+                                                                           tree_node_coordinates,
+                                                                           nodes,
+                                                                           boundary_names,
+                                                                           current_filename,
+                                                                           unsaved_changes,
+                                                                           p4est_partition_allow_for_coarsening)
 
-    # Destroy `p4est` structs when the mesh is garbage collected
-    finalizer(destroy_mesh, mesh)
+        # Destroy `p4est` structs when the mesh is garbage collected
+        finalizer(destroy_mesh, mesh)
 
-    return mesh
-  end
+        return mesh
+    end
 end
 
-const SerialP4estMesh{NDIMS}   = P4estMesh{NDIMS, <:Real, <:False}
+const SerialP4estMesh{NDIMS} = P4estMesh{NDIMS, <:Real, <:False}
 const ParallelP4estMesh{NDIMS} = P4estMesh{NDIMS, <:Real, <:True}
 
 @inline mpi_parallel(mesh::SerialP4estMesh) = False()
 @inline mpi_parallel(mesh::ParallelP4estMesh) = True()
 
-
 function destroy_mesh(mesh::P4estMesh{2})
-  connectivity = unsafe_load(mesh.p4est).connectivity
-  p4est_ghost_destroy(mesh.ghost)
-  p4est_destroy(mesh.p4est)
-  p4est_connectivity_destroy(connectivity)
+    connectivity = unsafe_load(mesh.p4est).connectivity
+    p4est_ghost_destroy(mesh.ghost)
+    p4est_destroy(mesh.p4est)
+    p4est_connectivity_destroy(connectivity)
 end
 
 function destroy_mesh(mesh::P4estMesh{3})
-  connectivity = unsafe_load(mesh.p4est).connectivity
-  p8est_ghost_destroy(mesh.ghost)
-  p8est_destroy(mesh.p4est)
-  p8est_connectivity_destroy(connectivity)
+    connectivity = unsafe_load(mesh.p4est).connectivity
+    p8est_ghost_destroy(mesh.ghost)
+    p8est_destroy(mesh.p4est)
+    p8est_connectivity_destroy(connectivity)
 end
 
-
-@inline Base.ndims(::P4estMesh{NDIMS}) where NDIMS = NDIMS
+@inline Base.ndims(::P4estMesh{NDIMS}) where {NDIMS} = NDIMS
 @inline Base.real(::P4estMesh{NDIMS, RealT}) where {NDIMS, RealT} = RealT
 
 @inline function ntrees(mesh::P4estMesh)
-  trees = unsafe_load(mesh.p4est).trees
-  return unsafe_load(trees).elem_count
+    trees = unsafe_load(mesh.p4est).trees
+    return unsafe_load(trees).elem_count
 end
 # returns Int32 by default which causes a weird method error when creating the cache
 @inline ncells(mesh::P4estMesh) = Int(unsafe_load(mesh.p4est).local_num_quadrants)
 
-
 function Base.show(io::IO, mesh::P4estMesh)
-  print(io, "P4estMesh{", ndims(mesh), ", ", real(mesh), "}")
+    print(io, "P4estMesh{", ndims(mesh), ", ", real(mesh), "}")
 end
 
 function Base.show(io::IO, ::MIME"text/plain", mesh::P4estMesh)
-  if get(io, :compact, false)
-    show(io, mesh)
-  else
-    setup = [
-             "#trees" => ntrees(mesh),
-             "current #cells" => ncells(mesh),
-             "polydeg" => length(mesh.nodes) - 1,
-            ]
-    summary_box(io, "P4estMesh{" * string(ndims(mesh)) * ", " * string(real(mesh)) * "}", setup)
-  end
+    if get(io, :compact, false)
+        show(io, mesh)
+    else
+        setup = [
+            "#trees" => ntrees(mesh),
+            "current #cells" => ncells(mesh),
+            "polydeg" => length(mesh.nodes) - 1,
+        ]
+        summary_box(io,
+                    "P4estMesh{" * string(ndims(mesh)) * ", " * string(real(mesh)) *
+                    "}", setup)
+    end
 end
 
-
 """
     P4estMesh(trees_per_dimension; polydeg,
               mapping=nothing, faces=nothing, coordinates_min=nothing, coordinates_max=nothing,
@@ -153,128 +160,129 @@ Non-periodic boundaries will be called `:x_neg`, `:x_pos`, `:y_neg`, `:y_pos`, `
                                                 to permit more fine-grained partitioning.
 """
 function P4estMesh(trees_per_dimension; polydeg,
-                   mapping=nothing, faces=nothing, coordinates_min=nothing, coordinates_max=nothing,
-                   RealT=Float64, initial_refinement_level=0, periodicity=true, unsaved_changes=true,
-                   p4est_partition_allow_for_coarsening=true)
-
-  @assert (
-    (coordinates_min === nothing) === (coordinates_max === nothing)
-  ) "Either both or none of coordinates_min and coordinates_max must be specified"
-
-  @assert count(i -> i !== nothing,
-    (mapping, faces, coordinates_min)
-  ) == 1 "Exactly one of mapping, faces and coordinates_min/max must be specified"
-
-  # Extract mapping
-  if faces !== nothing
-    validate_faces(faces)
-    mapping = transfinite_mapping(faces)
-  elseif coordinates_min !== nothing
-    mapping = coordinates2mapping(coordinates_min, coordinates_max)
-  end
-
-  NDIMS = length(trees_per_dimension)
-
-  # Convert periodicity to a Tuple of a Bool for every dimension
-  if all(periodicity)
-    # Also catches case where periodicity = true
-    periodicity = ntuple(_->true, NDIMS)
-  elseif !any(periodicity)
-    # Also catches case where periodicity = false
-    periodicity = ntuple(_->false, NDIMS)
-  else
-    # Default case if periodicity is an iterable
-    periodicity = Tuple(periodicity)
-  end
-
-  basis = LobattoLegendreBasis(RealT, polydeg)
-  nodes = basis.nodes
-  tree_node_coordinates = Array{RealT, NDIMS+2}(undef, NDIMS,
-                                                ntuple(_ -> length(nodes), NDIMS)...,
-                                                prod(trees_per_dimension))
-  calc_tree_node_coordinates!(tree_node_coordinates, nodes, mapping, trees_per_dimension)
-
-  # p4est_connectivity_new_brick has trees in Z-order, so use our own function for this
-  connectivity = connectivity_structured(trees_per_dimension..., periodicity)
-
-  p4est = new_p4est(connectivity, initial_refinement_level)
-
-  # Non-periodic boundaries
-  boundary_names = fill(Symbol("---"), 2 * NDIMS, prod(trees_per_dimension))
-
-  structured_boundary_names!(boundary_names, trees_per_dimension, periodicity)
-
-  return P4estMesh{NDIMS}(p4est, tree_node_coordinates, nodes,
-                          boundary_names, "", unsaved_changes,
-                          p4est_partition_allow_for_coarsening)
-end
+                   mapping = nothing, faces = nothing, coordinates_min = nothing,
+                   coordinates_max = nothing,
+                   RealT = Float64, initial_refinement_level = 0, periodicity = true,
+                   unsaved_changes = true,
+                   p4est_partition_allow_for_coarsening = true)
+    @assert ((coordinates_min === nothing)===(coordinates_max === nothing)) "Either both or none of coordinates_min and coordinates_max must be specified"
+
+    @assert count(i -> i !== nothing,
+                  (mapping, faces, coordinates_min))==1 "Exactly one of mapping, faces and coordinates_min/max must be specified"
+
+    # Extract mapping
+    if faces !== nothing
+        validate_faces(faces)
+        mapping = transfinite_mapping(faces)
+    elseif coordinates_min !== nothing
+        mapping = coordinates2mapping(coordinates_min, coordinates_max)
+    end
 
-# 2D version
-function structured_boundary_names!(boundary_names, trees_per_dimension::NTuple{2}, periodicity)
-  linear_indices = LinearIndices(trees_per_dimension)
+    NDIMS = length(trees_per_dimension)
+
+    # Convert periodicity to a Tuple of a Bool for every dimension
+    if all(periodicity)
+        # Also catches case where periodicity = true
+        periodicity = ntuple(_ -> true, NDIMS)
+    elseif !any(periodicity)
+        # Also catches case where periodicity = false
+        periodicity = ntuple(_ -> false, NDIMS)
+    else
+        # Default case if periodicity is an iterable
+        periodicity = Tuple(periodicity)
+    end
+
+    basis = LobattoLegendreBasis(RealT, polydeg)
+    nodes = basis.nodes
+    tree_node_coordinates = Array{RealT, NDIMS + 2}(undef, NDIMS,
+                                                    ntuple(_ -> length(nodes),
+                                                           NDIMS)...,
+                                                    prod(trees_per_dimension))
+    calc_tree_node_coordinates!(tree_node_coordinates, nodes, mapping,
+                                trees_per_dimension)
+
+    # p4est_connectivity_new_brick has trees in Z-order, so use our own function for this
+    connectivity = connectivity_structured(trees_per_dimension..., periodicity)
+
+    p4est = new_p4est(connectivity, initial_refinement_level)
 
-  # Boundaries in x-direction
-  if !periodicity[1]
-    for cell_y in 1:trees_per_dimension[2]
-      tree = linear_indices[1, cell_y]
-      boundary_names[1, tree] = :x_neg
+    # Non-periodic boundaries
+    boundary_names = fill(Symbol("---"), 2 * NDIMS, prod(trees_per_dimension))
 
-      tree = linear_indices[end, cell_y]
-      boundary_names[2, tree] = :x_pos
+    structured_boundary_names!(boundary_names, trees_per_dimension, periodicity)
+
+    return P4estMesh{NDIMS}(p4est, tree_node_coordinates, nodes,
+                            boundary_names, "", unsaved_changes,
+                            p4est_partition_allow_for_coarsening)
+end
+
+# 2D version
+function structured_boundary_names!(boundary_names, trees_per_dimension::NTuple{2},
+                                    periodicity)
+    linear_indices = LinearIndices(trees_per_dimension)
+
+    # Boundaries in x-direction
+    if !periodicity[1]
+        for cell_y in 1:trees_per_dimension[2]
+            tree = linear_indices[1, cell_y]
+            boundary_names[1, tree] = :x_neg
+
+            tree = linear_indices[end, cell_y]
+            boundary_names[2, tree] = :x_pos
+        end
     end
-  end
 
-  # Boundaries in y-direction
-  if !periodicity[2]
-    for cell_x in 1:trees_per_dimension[1]
-      tree = linear_indices[cell_x, 1]
-      boundary_names[3, tree] = :y_neg
+    # Boundaries in y-direction
+    if !periodicity[2]
+        for cell_x in 1:trees_per_dimension[1]
+            tree = linear_indices[cell_x, 1]
+            boundary_names[3, tree] = :y_neg
 
-      tree = linear_indices[cell_x, end]
-      boundary_names[4, tree] = :y_pos
+            tree = linear_indices[cell_x, end]
+            boundary_names[4, tree] = :y_pos
+        end
     end
-  end
 end
 
 # 3D version
-function structured_boundary_names!(boundary_names, trees_per_dimension::NTuple{3}, periodicity)
-  linear_indices = LinearIndices(trees_per_dimension)
-
-  # Boundaries in x-direction
-  if !periodicity[1]
-    for cell_z in 1:trees_per_dimension[3], cell_y in 1:trees_per_dimension[2]
-      tree = linear_indices[1, cell_y, cell_z]
-      boundary_names[1, tree] = :x_neg
-
-      tree = linear_indices[end, cell_y, cell_z]
-      boundary_names[2, tree] = :x_pos
+function structured_boundary_names!(boundary_names, trees_per_dimension::NTuple{3},
+                                    periodicity)
+    linear_indices = LinearIndices(trees_per_dimension)
+
+    # Boundaries in x-direction
+    if !periodicity[1]
+        for cell_z in 1:trees_per_dimension[3], cell_y in 1:trees_per_dimension[2]
+            tree = linear_indices[1, cell_y, cell_z]
+            boundary_names[1, tree] = :x_neg
+
+            tree = linear_indices[end, cell_y, cell_z]
+            boundary_names[2, tree] = :x_pos
+        end
     end
-  end
 
-  # Boundaries in y-direction
-  if !periodicity[2]
-    for cell_z in 1:trees_per_dimension[3], cell_x in 1:trees_per_dimension[1]
-      tree = linear_indices[cell_x, 1, cell_z]
-      boundary_names[3, tree] = :y_neg
+    # Boundaries in y-direction
+    if !periodicity[2]
+        for cell_z in 1:trees_per_dimension[3], cell_x in 1:trees_per_dimension[1]
+            tree = linear_indices[cell_x, 1, cell_z]
+            boundary_names[3, tree] = :y_neg
 
-      tree = linear_indices[cell_x, end, cell_z]
-      boundary_names[4, tree] = :y_pos
+            tree = linear_indices[cell_x, end, cell_z]
+            boundary_names[4, tree] = :y_pos
+        end
     end
-  end
 
-  # Boundaries in z-direction
-  if !periodicity[3]
-    for cell_y in 1:trees_per_dimension[2], cell_x in 1:trees_per_dimension[1]
-      tree = linear_indices[cell_x, cell_y, 1]
-      boundary_names[5, tree] = :z_neg
+    # Boundaries in z-direction
+    if !periodicity[3]
+        for cell_y in 1:trees_per_dimension[2], cell_x in 1:trees_per_dimension[1]
+            tree = linear_indices[cell_x, cell_y, 1]
+            boundary_names[5, tree] = :z_neg
 
-      tree = linear_indices[cell_x, cell_y, end]
-      boundary_names[6, tree] = :z_pos
+            tree = linear_indices[cell_x, cell_y, end]
+            boundary_names[6, tree] = :z_pos
+        end
     end
-  end
 end
 
-
 """
     P4estMesh{NDIMS}(meshfile::String;
                      mapping=nothing, polydeg=1, RealT=Float64,
@@ -337,125 +345,134 @@ For example, if a two-dimensional base mesh contains 25 elements then setting
                                                 to permit more fine-grained partitioning.
 """
 function P4estMesh{NDIMS}(meshfile::String;
-                          mapping=nothing, polydeg=1, RealT=Float64,
-                          initial_refinement_level=0, unsaved_changes=true,
-                          p4est_partition_allow_for_coarsening=true) where NDIMS
-  # Prevent `p4est` from crashing Julia if the file doesn't exist
-  @assert isfile(meshfile)
-
-  # Read in the Header of the meshfile to determine which constructor is appropriate
-  header = open(meshfile, "r") do io
-    readline(io) # *Header of the Abaqus file; discarded
-    readline(io) # Readin the actual header information
-  end
-
-  # Check if the meshfile was generated using HOHQMesh
-  if header == " File created by HOHQMesh"
-    # Mesh curvature and boundary naming is handled with additional information available in meshfile
-    p4est, tree_node_coordinates, nodes, boundary_names = p4est_mesh_from_hohqmesh_abaqus(meshfile, initial_refinement_level,
-                                                                                          NDIMS, RealT)
-  else
-    # Mesh curvature is handled directly by applying the mapping keyword argument
-    p4est, tree_node_coordinates, nodes, boundary_names = p4est_mesh_from_standard_abaqus(meshfile, mapping, polydeg,
-                                                                                          initial_refinement_level,
-                                                                                          NDIMS, RealT)
-  end
-
-  return P4estMesh{NDIMS}(p4est, tree_node_coordinates, nodes,
-                          boundary_names, "", unsaved_changes,
-                          p4est_partition_allow_for_coarsening)
-end
+                          mapping = nothing, polydeg = 1, RealT = Float64,
+                          initial_refinement_level = 0, unsaved_changes = true,
+                          p4est_partition_allow_for_coarsening = true) where {NDIMS}
+    # Prevent `p4est` from crashing Julia if the file doesn't exist
+    @assert isfile(meshfile)
+
+    # Read in the Header of the meshfile to determine which constructor is appropriate
+    header = open(meshfile, "r") do io
+        readline(io) # *Header of the Abaqus file; discarded
+        readline(io) # Readin the actual header information
+    end
 
+    # Check if the meshfile was generated using HOHQMesh
+    if header == " File created by HOHQMesh"
+        # Mesh curvature and boundary naming is handled with additional information available in meshfile
+        p4est, tree_node_coordinates, nodes, boundary_names = p4est_mesh_from_hohqmesh_abaqus(meshfile,
+                                                                                              initial_refinement_level,
+                                                                                              NDIMS,
+                                                                                              RealT)
+    else
+        # Mesh curvature is handled directly by applying the mapping keyword argument
+        p4est, tree_node_coordinates, nodes, boundary_names = p4est_mesh_from_standard_abaqus(meshfile,
+                                                                                              mapping,
+                                                                                              polydeg,
+                                                                                              initial_refinement_level,
+                                                                                              NDIMS,
+                                                                                              RealT)
+    end
+
+    return P4estMesh{NDIMS}(p4est, tree_node_coordinates, nodes,
+                            boundary_names, "", unsaved_changes,
+                            p4est_partition_allow_for_coarsening)
+end
 
 # Create the mesh connectivity, mapped node coordinates within each tree, reference nodes in [-1,1]
 # and a list of boundary names for the `P4estMesh`. High-order boundary curve information as well as
 # the boundary names on each tree are provided by the `meshfile` created by
 # [`HOHQMesh.jl`](https://github.com/trixi-framework/HOHQMesh.jl).
-function p4est_mesh_from_hohqmesh_abaqus(meshfile, initial_refinement_level, n_dimensions, RealT)
-  # Create the mesh connectivity using `p4est`
-  connectivity = read_inp_p4est(meshfile, Val(n_dimensions))
-  connectivity_obj = unsafe_load(connectivity)
-
-  # These need to be of the type Int for unsafe_wrap below to work
-  n_trees::Int = connectivity_obj.num_trees
-  n_vertices::Int = connectivity_obj.num_vertices
-
-  # Extract a copy of the element vertices to compute the tree node coordinates
-  vertices = unsafe_wrap(Array, connectivity_obj.vertices, (3, n_vertices))
-
-  # Readin all the information from the mesh file into a string array
-  file_lines = readlines(open(meshfile))
+function p4est_mesh_from_hohqmesh_abaqus(meshfile, initial_refinement_level,
+                                         n_dimensions, RealT)
+    # Create the mesh connectivity using `p4est`
+    connectivity = read_inp_p4est(meshfile, Val(n_dimensions))
+    connectivity_obj = unsafe_load(connectivity)
 
-  # Get the file index where the mesh polynomial degree is given in the meshfile
-  file_idx = findfirst(contains("** mesh polynomial degree"), file_lines)
+    # These need to be of the type Int for unsafe_wrap below to work
+    n_trees::Int = connectivity_obj.num_trees
+    n_vertices::Int = connectivity_obj.num_vertices
 
-  # Get the polynomial order of the mesh boundary information
-  current_line = split(file_lines[file_idx])
-  mesh_polydeg = parse(Int, current_line[6])
-  mesh_nnodes = mesh_polydeg + 1
+    # Extract a copy of the element vertices to compute the tree node coordinates
+    vertices = unsafe_wrap(Array, connectivity_obj.vertices, (3, n_vertices))
 
-  # Create the Chebyshev-Gauss-Lobatto nodes used by HOHQMesh to represent the boundaries
-  cheby_nodes, _ = chebyshev_gauss_lobatto_nodes_weights(mesh_nnodes)
-  nodes = SVector{mesh_nnodes}(cheby_nodes)
+    # Readin all the information from the mesh file into a string array
+    file_lines = readlines(open(meshfile))
 
-  # Allocate the memory for the tree node coordinates
-  tree_node_coordinates = Array{RealT, n_dimensions+2}(undef, n_dimensions,
-                                                       ntuple(_ -> length(nodes), n_dimensions)...,
-                                                       n_trees)
+    # Get the file index where the mesh polynomial degree is given in the meshfile
+    file_idx = findfirst(contains("** mesh polynomial degree"), file_lines)
 
-  # Compute the tree node coordinates and return the updated file index
-  file_idx = calc_tree_node_coordinates!(tree_node_coordinates, file_lines, nodes, vertices, RealT)
-
-  # Allocate the memory for the boundary labels
-  boundary_names = Array{Symbol}(undef, (2 * n_dimensions, n_trees))
-
-  # Read in the boundary names from the last portion of the meshfile
-  # Note here the boundary names where "---" means an internal connection
-  for tree in 1:n_trees
+    # Get the polynomial order of the mesh boundary information
     current_line = split(file_lines[file_idx])
-    boundary_names[:, tree] = map(Symbol, current_line[2:end])
-    file_idx += 1
-  end
+    mesh_polydeg = parse(Int, current_line[6])
+    mesh_nnodes = mesh_polydeg + 1
+
+    # Create the Chebyshev-Gauss-Lobatto nodes used by HOHQMesh to represent the boundaries
+    cheby_nodes, _ = chebyshev_gauss_lobatto_nodes_weights(mesh_nnodes)
+    nodes = SVector{mesh_nnodes}(cheby_nodes)
+
+    # Allocate the memory for the tree node coordinates
+    tree_node_coordinates = Array{RealT, n_dimensions + 2}(undef, n_dimensions,
+                                                           ntuple(_ -> length(nodes),
+                                                                  n_dimensions)...,
+                                                           n_trees)
+
+    # Compute the tree node coordinates and return the updated file index
+    file_idx = calc_tree_node_coordinates!(tree_node_coordinates, file_lines, nodes,
+                                           vertices, RealT)
+
+    # Allocate the memory for the boundary labels
+    boundary_names = Array{Symbol}(undef, (2 * n_dimensions, n_trees))
+
+    # Read in the boundary names from the last portion of the meshfile
+    # Note here the boundary names where "---" means an internal connection
+    for tree in 1:n_trees
+        current_line = split(file_lines[file_idx])
+        boundary_names[:, tree] = map(Symbol, current_line[2:end])
+        file_idx += 1
+    end
 
-  p4est = new_p4est(connectivity, initial_refinement_level)
+    p4est = new_p4est(connectivity, initial_refinement_level)
 
-  return p4est, tree_node_coordinates, nodes, boundary_names
+    return p4est, tree_node_coordinates, nodes, boundary_names
 end
 
-
 # Create the mesh connectivity, mapped node coordinates within each tree, reference nodes in [-1,1]
 # and a list of boundary names for the `P4estMesh`. The tree node coordinates are computed according to
 # the `mapping` passed to this function using polynomial interpolants of degree `polydeg`. All boundary
 # names are given the name `:all`.
-function p4est_mesh_from_standard_abaqus(meshfile, mapping, polydeg, initial_refinement_level, n_dimensions, RealT)
-  # Create the mesh connectivity using `p4est`
-  connectivity = read_inp_p4est(meshfile, Val(n_dimensions))
-  connectivity_obj = unsafe_load(connectivity)
+function p4est_mesh_from_standard_abaqus(meshfile, mapping, polydeg,
+                                         initial_refinement_level, n_dimensions, RealT)
+    # Create the mesh connectivity using `p4est`
+    connectivity = read_inp_p4est(meshfile, Val(n_dimensions))
+    connectivity_obj = unsafe_load(connectivity)
 
-  # These need to be of the type Int for unsafe_wrap below to work
-  n_trees::Int = connectivity_obj.num_trees
-  n_vertices::Int = connectivity_obj.num_vertices
+    # These need to be of the type Int for unsafe_wrap below to work
+    n_trees::Int = connectivity_obj.num_trees
+    n_vertices::Int = connectivity_obj.num_vertices
 
-  vertices       = unsafe_wrap(Array, connectivity_obj.vertices, (3, n_vertices))
-  tree_to_vertex = unsafe_wrap(Array, connectivity_obj.tree_to_vertex, (2^n_dimensions, n_trees))
+    vertices = unsafe_wrap(Array, connectivity_obj.vertices, (3, n_vertices))
+    tree_to_vertex = unsafe_wrap(Array, connectivity_obj.tree_to_vertex,
+                                 (2^n_dimensions, n_trees))
 
-  basis = LobattoLegendreBasis(RealT, polydeg)
-  nodes = basis.nodes
+    basis = LobattoLegendreBasis(RealT, polydeg)
+    nodes = basis.nodes
 
-  tree_node_coordinates = Array{RealT, n_dimensions+2}(undef, n_dimensions,
-                                                       ntuple(_ -> length(nodes), n_dimensions)...,
-                                                       n_trees)
-  calc_tree_node_coordinates!(tree_node_coordinates, nodes, mapping, vertices, tree_to_vertex)
+    tree_node_coordinates = Array{RealT, n_dimensions + 2}(undef, n_dimensions,
+                                                           ntuple(_ -> length(nodes),
+                                                                  n_dimensions)...,
+                                                           n_trees)
+    calc_tree_node_coordinates!(tree_node_coordinates, nodes, mapping, vertices,
+                                tree_to_vertex)
 
-  p4est = new_p4est(connectivity, initial_refinement_level)
+    p4est = new_p4est(connectivity, initial_refinement_level)
 
-  # There's no simple and generic way to distinguish boundaries. Name all of them :all.
-  boundary_names = fill(:all, 2 * n_dimensions, n_trees)
+    # There's no simple and generic way to distinguish boundaries. Name all of them :all.
+    boundary_names = fill(:all, 2 * n_dimensions, n_trees)
 
-  return p4est, tree_node_coordinates, nodes, boundary_names
+    return p4est, tree_node_coordinates, nodes, boundary_names
 end
 
-
 """
     P4estMeshCubedSphere(trees_per_face_dimension, layers, inner_radius, thickness;
                          polydeg, RealT=Float64,
@@ -485,545 +502,562 @@ The mesh will have two boundaries, `:inside` and `:outside`.
                                                 to permit more fine-grained partitioning.
 """
 function P4estMeshCubedSphere(trees_per_face_dimension, layers, inner_radius, thickness;
-                              polydeg, RealT=Float64,
-                              initial_refinement_level=0, unsaved_changes=true,
-                              p4est_partition_allow_for_coarsening=true)
-  connectivity = connectivity_cubed_sphere(trees_per_face_dimension, layers)
+                              polydeg, RealT = Float64,
+                              initial_refinement_level = 0, unsaved_changes = true,
+                              p4est_partition_allow_for_coarsening = true)
+    connectivity = connectivity_cubed_sphere(trees_per_face_dimension, layers)
 
-  n_trees = 6 * trees_per_face_dimension^2 * layers
+    n_trees = 6 * trees_per_face_dimension^2 * layers
 
-  basis = LobattoLegendreBasis(RealT, polydeg)
-  nodes = basis.nodes
+    basis = LobattoLegendreBasis(RealT, polydeg)
+    nodes = basis.nodes
 
-  tree_node_coordinates = Array{RealT, 5}(undef, 3,
-                                          ntuple(_ -> length(nodes), 3)...,
-                                          n_trees)
-  calc_tree_node_coordinates!(tree_node_coordinates, nodes, trees_per_face_dimension, layers,
-                              inner_radius, thickness)
+    tree_node_coordinates = Array{RealT, 5}(undef, 3,
+                                            ntuple(_ -> length(nodes), 3)...,
+                                            n_trees)
+    calc_tree_node_coordinates!(tree_node_coordinates, nodes, trees_per_face_dimension,
+                                layers,
+                                inner_radius, thickness)
 
-  p4est = new_p4est(connectivity, initial_refinement_level)
+    p4est = new_p4est(connectivity, initial_refinement_level)
 
-  boundary_names = fill(Symbol("---"), 2 * 3, n_trees)
-  boundary_names[5, :] .= Symbol("inside")
-  boundary_names[6, :] .= Symbol("outside")
+    boundary_names = fill(Symbol("---"), 2 * 3, n_trees)
+    boundary_names[5, :] .= Symbol("inside")
+    boundary_names[6, :] .= Symbol("outside")
 
-  return P4estMesh{3}(p4est, tree_node_coordinates, nodes,
-                      boundary_names, "", unsaved_changes,
-                      p4est_partition_allow_for_coarsening)
+    return P4estMesh{3}(p4est, tree_node_coordinates, nodes,
+                        boundary_names, "", unsaved_changes,
+                        p4est_partition_allow_for_coarsening)
 end
 
-
 # Create a new p4est_connectivity that represents a structured rectangle.
 # Similar to p4est_connectivity_new_brick, but doesn't use Morton order.
 # This order makes `calc_tree_node_coordinates!` below and the calculation
 # of `boundary_names` above easier but is irrelevant otherwise.
 # 2D version
 function connectivity_structured(n_cells_x, n_cells_y, periodicity)
-  linear_indices = LinearIndices((n_cells_x, n_cells_y))
-
-  # Vertices represent the coordinates of the forest. This is used by `p4est`
-  # to write VTK files.
-  # Trixi.jl doesn't use the coordinates from `p4est`, so the vertices can be empty.
-  n_vertices = 0
-  n_trees = n_cells_x * n_cells_y
-  # No corner connectivity is needed
-  n_corners = 0
-  vertices = C_NULL
-  tree_to_vertex = C_NULL
-
-  tree_to_tree = Array{p4est_topidx_t, 2}(undef, 4, n_trees)
-  tree_to_face = Array{Int8, 2}(undef, 4, n_trees)
-
-  for cell_y in 1:n_cells_y, cell_x in 1:n_cells_x
-    tree = linear_indices[cell_x, cell_y]
-
-    # Subtract 1 because `p4est` uses zero-based indexing
-    # Negative x-direction
-    if cell_x > 1
-      tree_to_tree[1, tree] = linear_indices[cell_x - 1, cell_y] - 1
-      tree_to_face[1, tree] = 1
-    elseif periodicity[1]
-      tree_to_tree[1, tree] = linear_indices[n_cells_x, cell_y] - 1
-      tree_to_face[1, tree] = 1
-    else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
-      tree_to_tree[1, tree] = tree - 1
-      tree_to_face[1, tree] = 0
-    end
+    linear_indices = LinearIndices((n_cells_x, n_cells_y))
+
+    # Vertices represent the coordinates of the forest. This is used by `p4est`
+    # to write VTK files.
+    # Trixi.jl doesn't use the coordinates from `p4est`, so the vertices can be empty.
+    n_vertices = 0
+    n_trees = n_cells_x * n_cells_y
+    # No corner connectivity is needed
+    n_corners = 0
+    vertices = C_NULL
+    tree_to_vertex = C_NULL
+
+    tree_to_tree = Array{p4est_topidx_t, 2}(undef, 4, n_trees)
+    tree_to_face = Array{Int8, 2}(undef, 4, n_trees)
+
+    for cell_y in 1:n_cells_y, cell_x in 1:n_cells_x
+        tree = linear_indices[cell_x, cell_y]
+
+        # Subtract 1 because `p4est` uses zero-based indexing
+        # Negative x-direction
+        if cell_x > 1
+            tree_to_tree[1, tree] = linear_indices[cell_x - 1, cell_y] - 1
+            tree_to_face[1, tree] = 1
+        elseif periodicity[1]
+            tree_to_tree[1, tree] = linear_indices[n_cells_x, cell_y] - 1
+            tree_to_face[1, tree] = 1
+        else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
+            tree_to_tree[1, tree] = tree - 1
+            tree_to_face[1, tree] = 0
+        end
 
-    # Positive x-direction
-    if cell_x < n_cells_x
-      tree_to_tree[2, tree] = linear_indices[cell_x + 1, cell_y] - 1
-      tree_to_face[2, tree] = 0
-    elseif periodicity[1]
-      tree_to_tree[2, tree] = linear_indices[1, cell_y] - 1
-      tree_to_face[2, tree] = 0
-    else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
-      tree_to_tree[2, tree] = tree - 1
-      tree_to_face[2, tree] = 1
-    end
+        # Positive x-direction
+        if cell_x < n_cells_x
+            tree_to_tree[2, tree] = linear_indices[cell_x + 1, cell_y] - 1
+            tree_to_face[2, tree] = 0
+        elseif periodicity[1]
+            tree_to_tree[2, tree] = linear_indices[1, cell_y] - 1
+            tree_to_face[2, tree] = 0
+        else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
+            tree_to_tree[2, tree] = tree - 1
+            tree_to_face[2, tree] = 1
+        end
 
-    # Negative y-direction
-    if cell_y > 1
-      tree_to_tree[3, tree] = linear_indices[cell_x, cell_y - 1] - 1
-      tree_to_face[3, tree] = 3
-    elseif periodicity[2]
-      tree_to_tree[3, tree] = linear_indices[cell_x, n_cells_y] - 1
-      tree_to_face[3, tree] = 3
-    else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
-      tree_to_tree[3, tree] = tree - 1
-      tree_to_face[3, tree] = 2
-    end
+        # Negative y-direction
+        if cell_y > 1
+            tree_to_tree[3, tree] = linear_indices[cell_x, cell_y - 1] - 1
+            tree_to_face[3, tree] = 3
+        elseif periodicity[2]
+            tree_to_tree[3, tree] = linear_indices[cell_x, n_cells_y] - 1
+            tree_to_face[3, tree] = 3
+        else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
+            tree_to_tree[3, tree] = tree - 1
+            tree_to_face[3, tree] = 2
+        end
 
-    # Positive y-direction
-    if cell_y < n_cells_y
-      tree_to_tree[4, tree] = linear_indices[cell_x, cell_y + 1] - 1
-      tree_to_face[4, tree] = 2
-    elseif periodicity[2]
-      tree_to_tree[4, tree] = linear_indices[cell_x, 1] - 1
-      tree_to_face[4, tree] = 2
-    else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
-      tree_to_tree[4, tree] = tree - 1
-      tree_to_face[4, tree] = 3
+        # Positive y-direction
+        if cell_y < n_cells_y
+            tree_to_tree[4, tree] = linear_indices[cell_x, cell_y + 1] - 1
+            tree_to_face[4, tree] = 2
+        elseif periodicity[2]
+            tree_to_tree[4, tree] = linear_indices[cell_x, 1] - 1
+            tree_to_face[4, tree] = 2
+        else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
+            tree_to_tree[4, tree] = tree - 1
+            tree_to_face[4, tree] = 3
+        end
     end
-  end
 
-  tree_to_corner = C_NULL
-  # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0."
-  # We don't need corner connectivity, so this is a trivial case.
-  ctt_offset = zeros(p4est_topidx_t, 1)
+    tree_to_corner = C_NULL
+    # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0."
+    # We don't need corner connectivity, so this is a trivial case.
+    ctt_offset = zeros(p4est_topidx_t, 1)
 
-  corner_to_tree = C_NULL
-  corner_to_corner = C_NULL
+    corner_to_tree = C_NULL
+    corner_to_corner = C_NULL
 
-  connectivity = p4est_connectivity_new_copy(n_vertices, n_trees, n_corners,
-                                     vertices, tree_to_vertex,
-                                     tree_to_tree, tree_to_face,
-                                     tree_to_corner, ctt_offset,
-                                     corner_to_tree, corner_to_corner)
+    connectivity = p4est_connectivity_new_copy(n_vertices, n_trees, n_corners,
+                                               vertices, tree_to_vertex,
+                                               tree_to_tree, tree_to_face,
+                                               tree_to_corner, ctt_offset,
+                                               corner_to_tree, corner_to_corner)
 
-  @assert p4est_connectivity_is_valid(connectivity) == 1
+    @assert p4est_connectivity_is_valid(connectivity) == 1
 
-  return connectivity
+    return connectivity
 end
 
 # 3D version
 function connectivity_structured(n_cells_x, n_cells_y, n_cells_z, periodicity)
-  linear_indices = LinearIndices((n_cells_x, n_cells_y, n_cells_z))
-
-  # Vertices represent the coordinates of the forest. This is used by `p4est`
-  # to write VTK files.
-  # Trixi.jl doesn't use the coordinates from `p4est`, so the vertices can be empty.
-  n_vertices = 0
-  n_trees = n_cells_x * n_cells_y * n_cells_z
-  # No edge connectivity is needed
-  n_edges = 0
-  # No corner connectivity is needed
-  n_corners = 0
-  vertices = C_NULL
-  tree_to_vertex = C_NULL
-
-  tree_to_tree = Array{p4est_topidx_t, 2}(undef, 6, n_trees)
-  tree_to_face = Array{Int8, 2}(undef, 6, n_trees)
-
-  for cell_z in 1:n_cells_z, cell_y in 1:n_cells_y, cell_x in 1:n_cells_x
-    tree = linear_indices[cell_x, cell_y, cell_z]
-
-    # Subtract 1 because `p4est` uses zero-based indexing
-    # Negative x-direction
-    if cell_x > 1
-      tree_to_tree[1, tree] = linear_indices[cell_x - 1, cell_y, cell_z] - 1
-      tree_to_face[1, tree] = 1
-    elseif periodicity[1]
-      tree_to_tree[1, tree] = linear_indices[n_cells_x, cell_y, cell_z] - 1
-      tree_to_face[1, tree] = 1
-    else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
-      tree_to_tree[1, tree] = tree - 1
-      tree_to_face[1, tree] = 0
-    end
+    linear_indices = LinearIndices((n_cells_x, n_cells_y, n_cells_z))
+
+    # Vertices represent the coordinates of the forest. This is used by `p4est`
+    # to write VTK files.
+    # Trixi.jl doesn't use the coordinates from `p4est`, so the vertices can be empty.
+    n_vertices = 0
+    n_trees = n_cells_x * n_cells_y * n_cells_z
+    # No edge connectivity is needed
+    n_edges = 0
+    # No corner connectivity is needed
+    n_corners = 0
+    vertices = C_NULL
+    tree_to_vertex = C_NULL
+
+    tree_to_tree = Array{p4est_topidx_t, 2}(undef, 6, n_trees)
+    tree_to_face = Array{Int8, 2}(undef, 6, n_trees)
 
-    # Positive x-direction
-    if cell_x < n_cells_x
-      tree_to_tree[2, tree] = linear_indices[cell_x + 1, cell_y, cell_z] - 1
-      tree_to_face[2, tree] = 0
-    elseif periodicity[1]
-      tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z] - 1
-      tree_to_face[2, tree] = 0
-    else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
-      tree_to_tree[2, tree] = tree - 1
-      tree_to_face[2, tree] = 1
-    end
+    for cell_z in 1:n_cells_z, cell_y in 1:n_cells_y, cell_x in 1:n_cells_x
+        tree = linear_indices[cell_x, cell_y, cell_z]
+
+        # Subtract 1 because `p4est` uses zero-based indexing
+        # Negative x-direction
+        if cell_x > 1
+            tree_to_tree[1, tree] = linear_indices[cell_x - 1, cell_y, cell_z] - 1
+            tree_to_face[1, tree] = 1
+        elseif periodicity[1]
+            tree_to_tree[1, tree] = linear_indices[n_cells_x, cell_y, cell_z] - 1
+            tree_to_face[1, tree] = 1
+        else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
+            tree_to_tree[1, tree] = tree - 1
+            tree_to_face[1, tree] = 0
+        end
 
-    # Negative y-direction
-    if cell_y > 1
-      tree_to_tree[3, tree] = linear_indices[cell_x, cell_y - 1, cell_z] - 1
-      tree_to_face[3, tree] = 3
-    elseif periodicity[2]
-      tree_to_tree[3, tree] = linear_indices[cell_x, n_cells_y, cell_z] - 1
-      tree_to_face[3, tree] = 3
-    else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
-      tree_to_tree[3, tree] = tree - 1
-      tree_to_face[3, tree] = 2
-    end
+        # Positive x-direction
+        if cell_x < n_cells_x
+            tree_to_tree[2, tree] = linear_indices[cell_x + 1, cell_y, cell_z] - 1
+            tree_to_face[2, tree] = 0
+        elseif periodicity[1]
+            tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z] - 1
+            tree_to_face[2, tree] = 0
+        else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
+            tree_to_tree[2, tree] = tree - 1
+            tree_to_face[2, tree] = 1
+        end
 
-    # Positive y-direction
-    if cell_y < n_cells_y
-      tree_to_tree[4, tree] = linear_indices[cell_x, cell_y + 1, cell_z] - 1
-      tree_to_face[4, tree] = 2
-    elseif periodicity[2]
-      tree_to_tree[4, tree] = linear_indices[cell_x, 1, cell_z] - 1
-      tree_to_face[4, tree] = 2
-    else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
-      tree_to_tree[4, tree] = tree - 1
-      tree_to_face[4, tree] = 3
-    end
+        # Negative y-direction
+        if cell_y > 1
+            tree_to_tree[3, tree] = linear_indices[cell_x, cell_y - 1, cell_z] - 1
+            tree_to_face[3, tree] = 3
+        elseif periodicity[2]
+            tree_to_tree[3, tree] = linear_indices[cell_x, n_cells_y, cell_z] - 1
+            tree_to_face[3, tree] = 3
+        else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
+            tree_to_tree[3, tree] = tree - 1
+            tree_to_face[3, tree] = 2
+        end
 
-    # Negative z-direction
-    if cell_z > 1
-      tree_to_tree[5, tree] = linear_indices[cell_x, cell_y, cell_z - 1] - 1
-      tree_to_face[5, tree] = 5
-    elseif periodicity[3]
-      tree_to_tree[5, tree] = linear_indices[cell_x, cell_y, n_cells_z] - 1
-      tree_to_face[5, tree] = 5
-    else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
-      tree_to_tree[5, tree] = tree - 1
-      tree_to_face[5, tree] = 4
-    end
+        # Positive y-direction
+        if cell_y < n_cells_y
+            tree_to_tree[4, tree] = linear_indices[cell_x, cell_y + 1, cell_z] - 1
+            tree_to_face[4, tree] = 2
+        elseif periodicity[2]
+            tree_to_tree[4, tree] = linear_indices[cell_x, 1, cell_z] - 1
+            tree_to_face[4, tree] = 2
+        else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
+            tree_to_tree[4, tree] = tree - 1
+            tree_to_face[4, tree] = 3
+        end
+
+        # Negative z-direction
+        if cell_z > 1
+            tree_to_tree[5, tree] = linear_indices[cell_x, cell_y, cell_z - 1] - 1
+            tree_to_face[5, tree] = 5
+        elseif periodicity[3]
+            tree_to_tree[5, tree] = linear_indices[cell_x, cell_y, n_cells_z] - 1
+            tree_to_face[5, tree] = 5
+        else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
+            tree_to_tree[5, tree] = tree - 1
+            tree_to_face[5, tree] = 4
+        end
 
-    # Positive z-direction
-    if cell_z < n_cells_z
-      tree_to_tree[6, tree] = linear_indices[cell_x, cell_y, cell_z + 1] - 1
-      tree_to_face[6, tree] = 4
-    elseif periodicity[3]
-      tree_to_tree[6, tree] = linear_indices[cell_x, cell_y, 1] - 1
-      tree_to_face[6, tree] = 4
-    else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
-      tree_to_tree[6, tree] = tree - 1
-      tree_to_face[6, tree] = 5
+        # Positive z-direction
+        if cell_z < n_cells_z
+            tree_to_tree[6, tree] = linear_indices[cell_x, cell_y, cell_z + 1] - 1
+            tree_to_face[6, tree] = 4
+        elseif periodicity[3]
+            tree_to_tree[6, tree] = linear_indices[cell_x, cell_y, 1] - 1
+            tree_to_face[6, tree] = 4
+        else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
+            tree_to_tree[6, tree] = tree - 1
+            tree_to_face[6, tree] = 5
+        end
     end
-  end
-
-  tree_to_edge = C_NULL
-  # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0."
-  # We don't need edge connectivity, so this is a trivial case.
-  ett_offset = zeros(p4est_topidx_t, 1)
-  edge_to_tree = C_NULL
-  edge_to_edge = C_NULL
-
-  tree_to_corner = C_NULL
-  # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0."
-  # We don't need corner connectivity, so this is a trivial case.
-  ctt_offset = zeros(p4est_topidx_t, 1)
-
-  corner_to_tree = C_NULL
-  corner_to_corner = C_NULL
-
-  connectivity = p8est_connectivity_new_copy(n_vertices, n_trees, n_corners, n_edges,
-                                     vertices, tree_to_vertex,
-                                     tree_to_tree, tree_to_face,
-                                     tree_to_edge, ett_offset,
-                                     edge_to_tree, edge_to_edge,
-                                     tree_to_corner, ctt_offset,
-                                     corner_to_tree, corner_to_corner)
-
-  @assert p8est_connectivity_is_valid(connectivity) == 1
-
-  return connectivity
-end
 
+    tree_to_edge = C_NULL
+    # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0."
+    # We don't need edge connectivity, so this is a trivial case.
+    ett_offset = zeros(p4est_topidx_t, 1)
+    edge_to_tree = C_NULL
+    edge_to_edge = C_NULL
+
+    tree_to_corner = C_NULL
+    # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0."
+    # We don't need corner connectivity, so this is a trivial case.
+    ctt_offset = zeros(p4est_topidx_t, 1)
+
+    corner_to_tree = C_NULL
+    corner_to_corner = C_NULL
+
+    connectivity = p8est_connectivity_new_copy(n_vertices, n_trees, n_corners, n_edges,
+                                               vertices, tree_to_vertex,
+                                               tree_to_tree, tree_to_face,
+                                               tree_to_edge, ett_offset,
+                                               edge_to_tree, edge_to_edge,
+                                               tree_to_corner, ctt_offset,
+                                               corner_to_tree, corner_to_corner)
+
+    @assert p8est_connectivity_is_valid(connectivity) == 1
+
+    return connectivity
+end
 
 function connectivity_cubed_sphere(trees_per_face_dimension, layers)
-  n_cells_x = n_cells_y = trees_per_face_dimension
-  n_cells_z = layers
-
-  linear_indices = LinearIndices((trees_per_face_dimension, trees_per_face_dimension, layers, 6))
-
-  # Vertices represent the coordinates of the forest. This is used by `p4est`
-  # to write VTK files.
-  # Trixi.jl doesn't use the coordinates from `p4est`, so the vertices can be empty.
-  n_vertices = 0
-  n_trees = 6 * n_cells_x * n_cells_y * n_cells_z
-  # No edge connectivity is needed
-  n_edges = 0
-  # No corner connectivity is needed
-  n_corners = 0
-  vertices = C_NULL
-  tree_to_vertex = C_NULL
-
-  tree_to_tree = Array{p4est_topidx_t, 2}(undef, 6, n_trees)
-  tree_to_face = Array{Int8, 2}(undef, 6, n_trees)
-
-  # Illustration of the local coordinates of each face. ξ and η are the first
-  # local coordinates of each face. The third local coordinate ζ is always
-  # pointing outwards, which yields a right-handed coordinate system for each face.
-  #               ┌────────────────────────────────────────────────────┐
-  #              ╱│                                                   ╱│
-  #             ╱ │                       ξ <───┐                    ╱ │
-  #            ╱  │                            ╱                    ╱  │
-  #           ╱   │                4 (+y)     V                    ╱   │
-  #          ╱    │                          η                    ╱    │
-  #         ╱     │                                              ╱     │
-  #        ╱      │                                             ╱      │
-  #       ╱       │                                            ╱       │
-  #      ╱        │                                           ╱        │
-  #     ╱         │                    5 (-z)   η            ╱         │
-  #    ╱          │                             ↑           ╱          │
-  #   ╱           │                             │          ╱           │
-  #  ╱            │                       ξ <───┘         ╱            │
-  # ┌────────────────────────────────────────────────────┐    2 (+x)   │
-  # │             │                                      │             │
-  # │             │                                      │      ξ      │
-  # │             │                                      │      ↑      │
-  # │    1 (-x)   │                                      │      │      │
-  # │             │                                      │      │      │
-  # │     ╱│      │                                      │     ╱       │
-  # │    V │      │                                      │    V        │
-  # │   η  ↓      │                                      │   η         │
-  # │      ξ      └──────────────────────────────────────│─────────────┘
-  # │            ╱         η   6 (+z)                    │            ╱
-  # │           ╱          ↑                             │           ╱
-  # │          ╱           │                             │          ╱
-  # │         ╱            └───> ξ                       │         ╱
-  # │        ╱                                           │        ╱
-  # │       ╱                                            │       ╱ Global coordinates:
-  # │      ╱                                             │      ╱        y
-  # │     ╱                      ┌───> ξ                 │     ╱         ↑
-  # │    ╱                      ╱                        │    ╱          │
-  # │   ╱                      V      3 (-y)             │   ╱           │
-  # │  ╱                      η                          │  ╱            └─────> x
-  # │ ╱                                                  │ ╱            ╱
-  # │╱                                                   │╱            V
-  # └────────────────────────────────────────────────────┘            z
-  for direction in 1:6
-    for cell_z in 1:n_cells_z, cell_y in 1:n_cells_y, cell_x in 1:n_cells_x
-      tree = linear_indices[cell_x, cell_y, cell_z, direction]
-
-      # Subtract 1 because `p4est` uses zero-based indexing
-      # Negative x-direction
-      if cell_x > 1 # Connect to tree at the same face
-        tree_to_tree[1, tree] = linear_indices[cell_x - 1, cell_y, cell_z, direction] - 1
-        tree_to_face[1, tree] = 1
-      elseif direction == 1 # This is the -x face
-        target = 4
-        tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1
-        tree_to_face[1, tree] = 1
-      elseif direction == 2 # This is the +x face
-        target = 3
-        tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1
-        tree_to_face[1, tree] = 1
-      elseif direction == 3 # This is the -y face
-        target = 1
-        tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1
-        tree_to_face[1, tree] = 1
-      elseif direction == 4 # This is the +y face
-        target = 2
-        tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1
-        tree_to_face[1, tree] = 1
-      elseif direction == 5 # This is the -z face
-        target = 2
-        tree_to_tree[1, tree] = linear_indices[cell_y, 1, cell_z, target] - 1
-        tree_to_face[1, tree] = 2
-      else # direction == 6, this is the +z face
-        target = 1
-        tree_to_tree[1, tree] = linear_indices[end - cell_y + 1, end, cell_z, target] - 1
-        tree_to_face[1, tree] = 9 # first face dimensions are oppositely oriented, add 6
-      end
-
-      # Positive x-direction
-      if cell_x < n_cells_x # Connect to tree at the same face
-        tree_to_tree[2, tree] = linear_indices[cell_x + 1, cell_y, cell_z, direction] - 1
-        tree_to_face[2, tree] = 0
-      elseif direction == 1 # This is the -x face
-        target = 3
-        tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1
-        tree_to_face[2, tree] = 0
-      elseif direction == 2 # This is the +x face
-        target = 4
-        tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1
-        tree_to_face[2, tree] = 0
-      elseif direction == 3 # This is the -y face
-        target = 2
-        tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1
-        tree_to_face[2, tree] = 0
-      elseif direction == 4 # This is the +y face
-        target = 1
-        tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1
-        tree_to_face[2, tree] = 0
-      elseif direction == 5 # This is the -z face
-        target = 1
-        tree_to_tree[2, tree] = linear_indices[end - cell_y + 1, 1, cell_z, target] - 1
-        tree_to_face[2, tree] = 8 # first face dimensions are oppositely oriented, add 6
-      else # direction == 6, this is the +z face
-        target = 2
-        tree_to_tree[2, tree] = linear_indices[cell_y, end, cell_z, target] - 1
-        tree_to_face[2, tree] = 3
-      end
-
-      # Negative y-direction
-      if cell_y > 1 # Connect to tree at the same face
-        tree_to_tree[3, tree] = linear_indices[cell_x, cell_y - 1, cell_z, direction] - 1
-        tree_to_face[3, tree] = 3
-      elseif direction == 1
-        target = 5
-        tree_to_tree[3, tree] = linear_indices[end, end - cell_x + 1, cell_z, target] - 1
-        tree_to_face[3, tree] = 7 # first face dimensions are oppositely oriented, add 6
-      elseif direction == 2
-        target = 5
-        tree_to_tree[3, tree] = linear_indices[1, cell_x, cell_z, target] - 1
-        tree_to_face[3, tree] = 0
-      elseif direction == 3
-        target = 5
-        tree_to_tree[3, tree] = linear_indices[end - cell_x + 1, 1, cell_z, target] - 1
-        tree_to_face[3, tree] = 8 # first face dimensions are oppositely oriented, add 6
-      elseif direction == 4
-        target = 5
-        tree_to_tree[3, tree] = linear_indices[cell_x, end, cell_z, target] - 1
-        tree_to_face[3, tree] = 3
-      elseif direction == 5
-        target = 3
-        tree_to_tree[3, tree] = linear_indices[end - cell_x + 1, 1, cell_z, target] - 1
-        tree_to_face[3, tree] = 8 # first face dimensions are oppositely oriented, add 6
-      else # direction == 6
-        target = 3
-        tree_to_tree[3, tree] = linear_indices[cell_x, end, cell_z, target] - 1
-        tree_to_face[3, tree] = 3
-      end
-
-      # Positive y-direction
-      if cell_y < n_cells_y # Connect to tree at the same face
-        tree_to_tree[4, tree] = linear_indices[cell_x, cell_y + 1, cell_z, direction] - 1
-        tree_to_face[4, tree] = 2
-      elseif direction == 1
-        target = 6
-        tree_to_tree[4, tree] = linear_indices[1, end - cell_x + 1, cell_z, target] - 1
-        tree_to_face[4, tree] = 6 # first face dimensions are oppositely oriented, add 6
-      elseif direction == 2
-        target = 6
-        tree_to_tree[4, tree] = linear_indices[end, cell_x, cell_z, target] - 1
-        tree_to_face[4, tree] = 1
-      elseif direction == 3
-        target = 6
-        tree_to_tree[4, tree] = linear_indices[cell_x, 1, cell_z, target] - 1
-        tree_to_face[4, tree] = 2
-      elseif direction == 4
-        target = 6
-        tree_to_tree[4, tree] = linear_indices[end - cell_x + 1, end, cell_z, target] - 1
-        tree_to_face[4, tree] = 9 # first face dimensions are oppositely oriented, add 6
-      elseif direction == 5
-        target = 4
-        tree_to_tree[4, tree] = linear_indices[cell_x, 1, cell_z, target] - 1
-        tree_to_face[4, tree] = 2
-      else # direction == 6
-        target = 4
-        tree_to_tree[4, tree] = linear_indices[end - cell_x + 1, end, cell_z, target] - 1
-        tree_to_face[4, tree] = 9 # first face dimensions are oppositely oriented, add 6
-      end
-
-      # Negative z-direction
-      if cell_z > 1
-        tree_to_tree[5, tree] = linear_indices[cell_x, cell_y, cell_z - 1, direction] - 1
-        tree_to_face[5, tree] = 5
-      else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
-        tree_to_tree[5, tree] = tree - 1
-        tree_to_face[5, tree] = 4
-      end
-
-      # Positive z-direction
-      if cell_z < n_cells_z
-        tree_to_tree[6, tree] = linear_indices[cell_x, cell_y, cell_z + 1, direction] - 1
-        tree_to_face[6, tree] = 4
-      else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
-        tree_to_tree[6, tree] = tree - 1
-        tree_to_face[6, tree] = 5
-      end
+    n_cells_x = n_cells_y = trees_per_face_dimension
+    n_cells_z = layers
+
+    linear_indices = LinearIndices((trees_per_face_dimension, trees_per_face_dimension,
+                                    layers, 6))
+
+    # Vertices represent the coordinates of the forest. This is used by `p4est`
+    # to write VTK files.
+    # Trixi.jl doesn't use the coordinates from `p4est`, so the vertices can be empty.
+    n_vertices = 0
+    n_trees = 6 * n_cells_x * n_cells_y * n_cells_z
+    # No edge connectivity is needed
+    n_edges = 0
+    # No corner connectivity is needed
+    n_corners = 0
+    vertices = C_NULL
+    tree_to_vertex = C_NULL
+
+    tree_to_tree = Array{p4est_topidx_t, 2}(undef, 6, n_trees)
+    tree_to_face = Array{Int8, 2}(undef, 6, n_trees)
+
+    # Illustration of the local coordinates of each face. ξ and η are the first
+    # local coordinates of each face. The third local coordinate ζ is always
+    # pointing outwards, which yields a right-handed coordinate system for each face.
+    #               ┌────────────────────────────────────────────────────┐
+    #              ╱│                                                   ╱│
+    #             ╱ │                       ξ <───┐                    ╱ │
+    #            ╱  │                            ╱                    ╱  │
+    #           ╱   │                4 (+y)     V                    ╱   │
+    #          ╱    │                          η                    ╱    │
+    #         ╱     │                                              ╱     │
+    #        ╱      │                                             ╱      │
+    #       ╱       │                                            ╱       │
+    #      ╱        │                                           ╱        │
+    #     ╱         │                    5 (-z)   η            ╱         │
+    #    ╱          │                             ↑           ╱          │
+    #   ╱           │                             │          ╱           │
+    #  ╱            │                       ξ <───┘         ╱            │
+    # ┌────────────────────────────────────────────────────┐    2 (+x)   │
+    # │             │                                      │             │
+    # │             │                                      │      ξ      │
+    # │             │                                      │      ↑      │
+    # │    1 (-x)   │                                      │      │      │
+    # │             │                                      │      │      │
+    # │     ╱│      │                                      │     ╱       │
+    # │    V │      │                                      │    V        │
+    # │   η  ↓      │                                      │   η         │
+    # │      ξ      └──────────────────────────────────────│─────────────┘
+    # │            ╱         η   6 (+z)                    │            ╱
+    # │           ╱          ↑                             │           ╱
+    # │          ╱           │                             │          ╱
+    # │         ╱            └───> ξ                       │         ╱
+    # │        ╱                                           │        ╱
+    # │       ╱                                            │       ╱ Global coordinates:
+    # │      ╱                                             │      ╱        y
+    # │     ╱                      ┌───> ξ                 │     ╱         ↑
+    # │    ╱                      ╱                        │    ╱          │
+    # │   ╱                      V      3 (-y)             │   ╱           │
+    # │  ╱                      η                          │  ╱            └─────> x
+    # │ ╱                                                  │ ╱            ╱
+    # │╱                                                   │╱            V
+    # └────────────────────────────────────────────────────┘            z
+    for direction in 1:6
+        for cell_z in 1:n_cells_z, cell_y in 1:n_cells_y, cell_x in 1:n_cells_x
+            tree = linear_indices[cell_x, cell_y, cell_z, direction]
+
+            # Subtract 1 because `p4est` uses zero-based indexing
+            # Negative x-direction
+            if cell_x > 1 # Connect to tree at the same face
+                tree_to_tree[1, tree] = linear_indices[cell_x - 1, cell_y, cell_z,
+                                                       direction] - 1
+                tree_to_face[1, tree] = 1
+            elseif direction == 1 # This is the -x face
+                target = 4
+                tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1
+                tree_to_face[1, tree] = 1
+            elseif direction == 2 # This is the +x face
+                target = 3
+                tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1
+                tree_to_face[1, tree] = 1
+            elseif direction == 3 # This is the -y face
+                target = 1
+                tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1
+                tree_to_face[1, tree] = 1
+            elseif direction == 4 # This is the +y face
+                target = 2
+                tree_to_tree[1, tree] = linear_indices[end, cell_y, cell_z, target] - 1
+                tree_to_face[1, tree] = 1
+            elseif direction == 5 # This is the -z face
+                target = 2
+                tree_to_tree[1, tree] = linear_indices[cell_y, 1, cell_z, target] - 1
+                tree_to_face[1, tree] = 2
+            else # direction == 6, this is the +z face
+                target = 1
+                tree_to_tree[1, tree] = linear_indices[end - cell_y + 1, end, cell_z,
+                                                       target] - 1
+                tree_to_face[1, tree] = 9 # first face dimensions are oppositely oriented, add 6
+            end
+
+            # Positive x-direction
+            if cell_x < n_cells_x # Connect to tree at the same face
+                tree_to_tree[2, tree] = linear_indices[cell_x + 1, cell_y, cell_z,
+                                                       direction] - 1
+                tree_to_face[2, tree] = 0
+            elseif direction == 1 # This is the -x face
+                target = 3
+                tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1
+                tree_to_face[2, tree] = 0
+            elseif direction == 2 # This is the +x face
+                target = 4
+                tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1
+                tree_to_face[2, tree] = 0
+            elseif direction == 3 # This is the -y face
+                target = 2
+                tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1
+                tree_to_face[2, tree] = 0
+            elseif direction == 4 # This is the +y face
+                target = 1
+                tree_to_tree[2, tree] = linear_indices[1, cell_y, cell_z, target] - 1
+                tree_to_face[2, tree] = 0
+            elseif direction == 5 # This is the -z face
+                target = 1
+                tree_to_tree[2, tree] = linear_indices[end - cell_y + 1, 1, cell_z,
+                                                       target] - 1
+                tree_to_face[2, tree] = 8 # first face dimensions are oppositely oriented, add 6
+            else # direction == 6, this is the +z face
+                target = 2
+                tree_to_tree[2, tree] = linear_indices[cell_y, end, cell_z, target] - 1
+                tree_to_face[2, tree] = 3
+            end
+
+            # Negative y-direction
+            if cell_y > 1 # Connect to tree at the same face
+                tree_to_tree[3, tree] = linear_indices[cell_x, cell_y - 1, cell_z,
+                                                       direction] - 1
+                tree_to_face[3, tree] = 3
+            elseif direction == 1
+                target = 5
+                tree_to_tree[3, tree] = linear_indices[end, end - cell_x + 1, cell_z,
+                                                       target] - 1
+                tree_to_face[3, tree] = 7 # first face dimensions are oppositely oriented, add 6
+            elseif direction == 2
+                target = 5
+                tree_to_tree[3, tree] = linear_indices[1, cell_x, cell_z, target] - 1
+                tree_to_face[3, tree] = 0
+            elseif direction == 3
+                target = 5
+                tree_to_tree[3, tree] = linear_indices[end - cell_x + 1, 1, cell_z,
+                                                       target] - 1
+                tree_to_face[3, tree] = 8 # first face dimensions are oppositely oriented, add 6
+            elseif direction == 4
+                target = 5
+                tree_to_tree[3, tree] = linear_indices[cell_x, end, cell_z, target] - 1
+                tree_to_face[3, tree] = 3
+            elseif direction == 5
+                target = 3
+                tree_to_tree[3, tree] = linear_indices[end - cell_x + 1, 1, cell_z,
+                                                       target] - 1
+                tree_to_face[3, tree] = 8 # first face dimensions are oppositely oriented, add 6
+            else # direction == 6
+                target = 3
+                tree_to_tree[3, tree] = linear_indices[cell_x, end, cell_z, target] - 1
+                tree_to_face[3, tree] = 3
+            end
+
+            # Positive y-direction
+            if cell_y < n_cells_y # Connect to tree at the same face
+                tree_to_tree[4, tree] = linear_indices[cell_x, cell_y + 1, cell_z,
+                                                       direction] - 1
+                tree_to_face[4, tree] = 2
+            elseif direction == 1
+                target = 6
+                tree_to_tree[4, tree] = linear_indices[1, end - cell_x + 1, cell_z,
+                                                       target] - 1
+                tree_to_face[4, tree] = 6 # first face dimensions are oppositely oriented, add 6
+            elseif direction == 2
+                target = 6
+                tree_to_tree[4, tree] = linear_indices[end, cell_x, cell_z, target] - 1
+                tree_to_face[4, tree] = 1
+            elseif direction == 3
+                target = 6
+                tree_to_tree[4, tree] = linear_indices[cell_x, 1, cell_z, target] - 1
+                tree_to_face[4, tree] = 2
+            elseif direction == 4
+                target = 6
+                tree_to_tree[4, tree] = linear_indices[end - cell_x + 1, end, cell_z,
+                                                       target] - 1
+                tree_to_face[4, tree] = 9 # first face dimensions are oppositely oriented, add 6
+            elseif direction == 5
+                target = 4
+                tree_to_tree[4, tree] = linear_indices[cell_x, 1, cell_z, target] - 1
+                tree_to_face[4, tree] = 2
+            else # direction == 6
+                target = 4
+                tree_to_tree[4, tree] = linear_indices[end - cell_x + 1, end, cell_z,
+                                                       target] - 1
+                tree_to_face[4, tree] = 9 # first face dimensions are oppositely oriented, add 6
+            end
+
+            # Negative z-direction
+            if cell_z > 1
+                tree_to_tree[5, tree] = linear_indices[cell_x, cell_y, cell_z - 1,
+                                                       direction] - 1
+                tree_to_face[5, tree] = 5
+            else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
+                tree_to_tree[5, tree] = tree - 1
+                tree_to_face[5, tree] = 4
+            end
+
+            # Positive z-direction
+            if cell_z < n_cells_z
+                tree_to_tree[6, tree] = linear_indices[cell_x, cell_y, cell_z + 1,
+                                                       direction] - 1
+                tree_to_face[6, tree] = 4
+            else # Non-periodic boundary, tree and face point to themselves (zero-based indexing)
+                tree_to_tree[6, tree] = tree - 1
+                tree_to_face[6, tree] = 5
+            end
+        end
     end
-  end
-
-  tree_to_edge = C_NULL
-  # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0."
-  # We don't need edge connectivity, so this is a trivial case.
-  ett_offset = zeros(p4est_topidx_t, 1)
-  edge_to_tree = C_NULL
-  edge_to_edge = C_NULL
-
-  tree_to_corner = C_NULL
-  # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0."
-  # We don't need corner connectivity, so this is a trivial case.
-  ctt_offset = zeros(p4est_topidx_t, 1)
-
-  corner_to_tree = C_NULL
-  corner_to_corner = C_NULL
-
-  connectivity = p8est_connectivity_new_copy(n_vertices, n_trees, n_corners, n_edges,
-                                     vertices, tree_to_vertex,
-                                     tree_to_tree, tree_to_face,
-                                     tree_to_edge, ett_offset,
-                                     edge_to_tree, edge_to_edge,
-                                     tree_to_corner, ctt_offset,
-                                     corner_to_tree, corner_to_corner)
-
-  @assert p8est_connectivity_is_valid(connectivity) == 1
-
-  return connectivity
-end
 
+    tree_to_edge = C_NULL
+    # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0."
+    # We don't need edge connectivity, so this is a trivial case.
+    ett_offset = zeros(p4est_topidx_t, 1)
+    edge_to_tree = C_NULL
+    edge_to_edge = C_NULL
+
+    tree_to_corner = C_NULL
+    # `p4est` docs: "in trivial cases it is just a pointer to a p4est_topix value of 0."
+    # We don't need corner connectivity, so this is a trivial case.
+    ctt_offset = zeros(p4est_topidx_t, 1)
+
+    corner_to_tree = C_NULL
+    corner_to_corner = C_NULL
+
+    connectivity = p8est_connectivity_new_copy(n_vertices, n_trees, n_corners, n_edges,
+                                               vertices, tree_to_vertex,
+                                               tree_to_tree, tree_to_face,
+                                               tree_to_edge, ett_offset,
+                                               edge_to_tree, edge_to_edge,
+                                               tree_to_corner, ctt_offset,
+                                               corner_to_tree, corner_to_corner)
+
+    @assert p8est_connectivity_is_valid(connectivity) == 1
+
+    return connectivity
+end
 
 # Calculate physical coordinates of each node of a structured mesh.
 # This function assumes a structured mesh with trees in row order.
 # 2D version
 function calc_tree_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4},
                                      nodes, mapping, trees_per_dimension)
-  linear_indices = LinearIndices(trees_per_dimension)
-
-  # Get cell length in reference mesh
-  dx = 2 / trees_per_dimension[1]
-  dy = 2 / trees_per_dimension[2]
-
-  for cell_y in 1:trees_per_dimension[2], cell_x in 1:trees_per_dimension[1]
-    tree_id = linear_indices[cell_x, cell_y]
+    linear_indices = LinearIndices(trees_per_dimension)
 
-    # Calculate node coordinates of reference mesh
-    cell_x_offset = -1 + (cell_x-1) * dx + dx/2
-    cell_y_offset = -1 + (cell_y-1) * dy + dy/2
+    # Get cell length in reference mesh
+    dx = 2 / trees_per_dimension[1]
+    dy = 2 / trees_per_dimension[2]
 
-    for j in eachindex(nodes), i in eachindex(nodes)
-      # node_coordinates are the mapped reference node coordinates
-      node_coordinates[:, i, j, tree_id] .= mapping(cell_x_offset + dx/2 * nodes[i],
-                                                    cell_y_offset + dy/2 * nodes[j])
+    for cell_y in 1:trees_per_dimension[2], cell_x in 1:trees_per_dimension[1]
+        tree_id = linear_indices[cell_x, cell_y]
+
+        # Calculate node coordinates of reference mesh
+        cell_x_offset = -1 + (cell_x - 1) * dx + dx / 2
+        cell_y_offset = -1 + (cell_y - 1) * dy + dy / 2
+
+        for j in eachindex(nodes), i in eachindex(nodes)
+            # node_coordinates are the mapped reference node coordinates
+            node_coordinates[:, i, j, tree_id] .= mapping(cell_x_offset +
+                                                          dx / 2 * nodes[i],
+                                                          cell_y_offset +
+                                                          dy / 2 * nodes[j])
+        end
     end
-  end
 end
 
 # 3D version
 function calc_tree_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5},
                                      nodes, mapping, trees_per_dimension)
-  linear_indices = LinearIndices(trees_per_dimension)
-
-  # Get cell length in reference mesh
-  dx = 2 / trees_per_dimension[1]
-  dy = 2 / trees_per_dimension[2]
-  dz = 2 / trees_per_dimension[3]
-
-  for cell_z in 1:trees_per_dimension[3],
-      cell_y in 1:trees_per_dimension[2],
-      cell_x in 1:trees_per_dimension[1]
-
-    tree_id = linear_indices[cell_x, cell_y, cell_z]
-
-    # Calculate node coordinates of reference mesh
-    cell_x_offset = -1 + (cell_x-1) * dx + dx/2
-    cell_y_offset = -1 + (cell_y-1) * dy + dy/2
-    cell_z_offset = -1 + (cell_z-1) * dz + dz/2
-
-    for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes)
-      # node_coordinates are the mapped reference node coordinates
-      node_coordinates[:, i, j, k, tree_id] .= mapping(cell_x_offset + dx/2 * nodes[i],
-                                                       cell_y_offset + dy/2 * nodes[j],
-                                                       cell_z_offset + dz/2 * nodes[k])
+    linear_indices = LinearIndices(trees_per_dimension)
+
+    # Get cell length in reference mesh
+    dx = 2 / trees_per_dimension[1]
+    dy = 2 / trees_per_dimension[2]
+    dz = 2 / trees_per_dimension[3]
+
+    for cell_z in 1:trees_per_dimension[3],
+        cell_y in 1:trees_per_dimension[2],
+        cell_x in 1:trees_per_dimension[1]
+
+        tree_id = linear_indices[cell_x, cell_y, cell_z]
+
+        # Calculate node coordinates of reference mesh
+        cell_x_offset = -1 + (cell_x - 1) * dx + dx / 2
+        cell_y_offset = -1 + (cell_y - 1) * dy + dy / 2
+        cell_z_offset = -1 + (cell_z - 1) * dz + dz / 2
+
+        for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes)
+            # node_coordinates are the mapped reference node coordinates
+            node_coordinates[:, i, j, k, tree_id] .= mapping(cell_x_offset +
+                                                             dx / 2 * nodes[i],
+                                                             cell_y_offset +
+                                                             dy / 2 * nodes[j],
+                                                             cell_z_offset +
+                                                             dz / 2 * nodes[k])
+        end
     end
-  end
 end
 
-
 # Calculate physical coordinates of each node of an unstructured mesh.
 # Extract corners of each tree from the connectivity,
 # interpolate to requested interpolation nodes,
@@ -1031,389 +1065,408 @@ end
 # 2D version
 function calc_tree_node_coordinates!(node_coordinates::AbstractArray{RealT, 4},
                                      nodes, mapping,
-                                     vertices, tree_to_vertex) where RealT
-  nodes_in = [-1.0, 1.0]
-  matrix = polynomial_interpolation_matrix(nodes_in, nodes)
-  data_in = Array{RealT, 3}(undef, 2, 2, 2)
-  tmp1 = zeros(RealT, 2, length(nodes), length(nodes_in))
-
-  for tree in 1:size(tree_to_vertex, 2)
-    # Tree vertices are stored in Z-order, ignore z-coordinate in 2D, zero-based indexing
-    @views data_in[:, 1, 1] .= vertices[1:2, tree_to_vertex[1, tree] + 1]
-    @views data_in[:, 2, 1] .= vertices[1:2, tree_to_vertex[2, tree] + 1]
-    @views data_in[:, 1, 2] .= vertices[1:2, tree_to_vertex[3, tree] + 1]
-    @views data_in[:, 2, 2] .= vertices[1:2, tree_to_vertex[4, tree] + 1]
-
-    # Interpolate corner coordinates to specified nodes
-    multiply_dimensionwise!(
-      view(node_coordinates, :, :, :, tree),
-      matrix, matrix,
-      data_in,
-      tmp1
-    )
-  end
-
-  map_node_coordinates!(node_coordinates, mapping)
+                                     vertices, tree_to_vertex) where {RealT}
+    nodes_in = [-1.0, 1.0]
+    matrix = polynomial_interpolation_matrix(nodes_in, nodes)
+    data_in = Array{RealT, 3}(undef, 2, 2, 2)
+    tmp1 = zeros(RealT, 2, length(nodes), length(nodes_in))
+
+    for tree in 1:size(tree_to_vertex, 2)
+        # Tree vertices are stored in Z-order, ignore z-coordinate in 2D, zero-based indexing
+        @views data_in[:, 1, 1] .= vertices[1:2, tree_to_vertex[1, tree] + 1]
+        @views data_in[:, 2, 1] .= vertices[1:2, tree_to_vertex[2, tree] + 1]
+        @views data_in[:, 1, 2] .= vertices[1:2, tree_to_vertex[3, tree] + 1]
+        @views data_in[:, 2, 2] .= vertices[1:2, tree_to_vertex[4, tree] + 1]
+
+        # Interpolate corner coordinates to specified nodes
+        multiply_dimensionwise!(view(node_coordinates, :, :, :, tree),
+                                matrix, matrix,
+                                data_in,
+                                tmp1)
+    end
+
+    map_node_coordinates!(node_coordinates, mapping)
 end
 
 function map_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, mapping)
-  for tree in axes(node_coordinates, 4),
-      j in axes(node_coordinates, 3),
-      i in axes(node_coordinates, 2)
+    for tree in axes(node_coordinates, 4),
+        j in axes(node_coordinates, 3),
+        i in axes(node_coordinates, 2)
 
-    node_coordinates[:, i, j, tree] .= mapping(node_coordinates[1, i, j, tree],
-                                               node_coordinates[2, i, j, tree])
-  end
+        node_coordinates[:, i, j, tree] .= mapping(node_coordinates[1, i, j, tree],
+                                                   node_coordinates[2, i, j, tree])
+    end
 
-  return node_coordinates
+    return node_coordinates
 end
 
-function map_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, mapping::Nothing)
-  return node_coordinates
+function map_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4},
+                               mapping::Nothing)
+    return node_coordinates
 end
 
 # 3D version
 function calc_tree_node_coordinates!(node_coordinates::AbstractArray{RealT, 5},
                                      nodes, mapping,
-                                     vertices, tree_to_vertex) where RealT
-  nodes_in = [-1.0, 1.0]
-  matrix = polynomial_interpolation_matrix(nodes_in, nodes)
-  data_in = Array{RealT, 4}(undef, 3, 2, 2, 2)
-
-  for tree in 1:size(tree_to_vertex, 2)
-    # Tree vertices are stored in Z-order, zero-based indexing
-    @views data_in[:, 1, 1, 1] .= vertices[:, tree_to_vertex[1, tree] + 1]
-    @views data_in[:, 2, 1, 1] .= vertices[:, tree_to_vertex[2, tree] + 1]
-    @views data_in[:, 1, 2, 1] .= vertices[:, tree_to_vertex[3, tree] + 1]
-    @views data_in[:, 2, 2, 1] .= vertices[:, tree_to_vertex[4, tree] + 1]
-    @views data_in[:, 1, 1, 2] .= vertices[:, tree_to_vertex[5, tree] + 1]
-    @views data_in[:, 2, 1, 2] .= vertices[:, tree_to_vertex[6, tree] + 1]
-    @views data_in[:, 1, 2, 2] .= vertices[:, tree_to_vertex[7, tree] + 1]
-    @views data_in[:, 2, 2, 2] .= vertices[:, tree_to_vertex[8, tree] + 1]
-
-    # Interpolate corner coordinates to specified nodes
-    multiply_dimensionwise!(
-      view(node_coordinates, :, :, :, :, tree),
-      matrix, matrix, matrix,
-      data_in
-    )
-  end
-
-  map_node_coordinates!(node_coordinates, mapping)
+                                     vertices, tree_to_vertex) where {RealT}
+    nodes_in = [-1.0, 1.0]
+    matrix = polynomial_interpolation_matrix(nodes_in, nodes)
+    data_in = Array{RealT, 4}(undef, 3, 2, 2, 2)
+
+    for tree in 1:size(tree_to_vertex, 2)
+        # Tree vertices are stored in Z-order, zero-based indexing
+        @views data_in[:, 1, 1, 1] .= vertices[:, tree_to_vertex[1, tree] + 1]
+        @views data_in[:, 2, 1, 1] .= vertices[:, tree_to_vertex[2, tree] + 1]
+        @views data_in[:, 1, 2, 1] .= vertices[:, tree_to_vertex[3, tree] + 1]
+        @views data_in[:, 2, 2, 1] .= vertices[:, tree_to_vertex[4, tree] + 1]
+        @views data_in[:, 1, 1, 2] .= vertices[:, tree_to_vertex[5, tree] + 1]
+        @views data_in[:, 2, 1, 2] .= vertices[:, tree_to_vertex[6, tree] + 1]
+        @views data_in[:, 1, 2, 2] .= vertices[:, tree_to_vertex[7, tree] + 1]
+        @views data_in[:, 2, 2, 2] .= vertices[:, tree_to_vertex[8, tree] + 1]
+
+        # Interpolate corner coordinates to specified nodes
+        multiply_dimensionwise!(view(node_coordinates, :, :, :, :, tree),
+                                matrix, matrix, matrix,
+                                data_in)
+    end
+
+    map_node_coordinates!(node_coordinates, mapping)
 end
 
 function map_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, mapping)
-  for tree in axes(node_coordinates, 5),
-      k in axes(node_coordinates, 4),
-      j in axes(node_coordinates, 3),
-      i in axes(node_coordinates, 2)
-
-    node_coordinates[:, i, j, k, tree] .= mapping(node_coordinates[1, i, j, k, tree],
-                                                  node_coordinates[2, i, j, k, tree],
-                                                  node_coordinates[3, i, j, k, tree])
-  end
+    for tree in axes(node_coordinates, 5),
+        k in axes(node_coordinates, 4),
+        j in axes(node_coordinates, 3),
+        i in axes(node_coordinates, 2)
+
+        node_coordinates[:, i, j, k, tree] .= mapping(node_coordinates[1, i, j, k,
+                                                                       tree],
+                                                      node_coordinates[2, i, j, k,
+                                                                       tree],
+                                                      node_coordinates[3, i, j, k,
+                                                                       tree])
+    end
 
-  return node_coordinates
+    return node_coordinates
 end
 
-function map_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, mapping::Nothing)
-  return node_coordinates
+function map_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5},
+                               mapping::Nothing)
+    return node_coordinates
 end
 
-
 # Calculate physical coordinates of each node of a cubed sphere mesh.
 function calc_tree_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5},
-                                     nodes, trees_per_face_dimension, layers, inner_radius, thickness)
-  n_cells_x = n_cells_y = trees_per_face_dimension
-  n_cells_z = layers
-
-  linear_indices = LinearIndices((n_cells_x, n_cells_y, n_cells_z, 6))
-
-  # Get cell length in reference mesh
-  dx = 2 / n_cells_x
-  dy = 2 / n_cells_y
-  dz = 2 / n_cells_z
-
-  for direction in 1:6
-    for cell_z in 1:n_cells_z, cell_y in 1:n_cells_y, cell_x in 1:n_cells_x
-      tree = linear_indices[cell_x, cell_y, cell_z, direction]
-
-      x_offset = -1 + (cell_x - 1) * dx + dx/2
-      y_offset = -1 + (cell_y - 1) * dy + dy/2
-      z_offset = -1 + (cell_z - 1) * dz + dz/2
-
-      for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes)
-        # node_coordinates are the mapped reference node coordinates
-        node_coordinates[:, i, j, k, tree] .= cubed_sphere_mapping(
-          x_offset + dx/2 * nodes[i],
-          y_offset + dy/2 * nodes[j],
-          z_offset + dz/2 * nodes[k],
-          inner_radius, thickness, direction)
-      end
+                                     nodes, trees_per_face_dimension, layers,
+                                     inner_radius, thickness)
+    n_cells_x = n_cells_y = trees_per_face_dimension
+    n_cells_z = layers
+
+    linear_indices = LinearIndices((n_cells_x, n_cells_y, n_cells_z, 6))
+
+    # Get cell length in reference mesh
+    dx = 2 / n_cells_x
+    dy = 2 / n_cells_y
+    dz = 2 / n_cells_z
+
+    for direction in 1:6
+        for cell_z in 1:n_cells_z, cell_y in 1:n_cells_y, cell_x in 1:n_cells_x
+            tree = linear_indices[cell_x, cell_y, cell_z, direction]
+
+            x_offset = -1 + (cell_x - 1) * dx + dx / 2
+            y_offset = -1 + (cell_y - 1) * dy + dy / 2
+            z_offset = -1 + (cell_z - 1) * dz + dz / 2
+
+            for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes)
+                # node_coordinates are the mapped reference node coordinates
+                node_coordinates[:, i, j, k, tree] .= cubed_sphere_mapping(x_offset +
+                                                                           dx / 2 *
+                                                                           nodes[i],
+                                                                           y_offset +
+                                                                           dy / 2 *
+                                                                           nodes[j],
+                                                                           z_offset +
+                                                                           dz / 2 *
+                                                                           nodes[k],
+                                                                           inner_radius,
+                                                                           thickness,
+                                                                           direction)
+            end
+        end
     end
-  end
 end
 
 # Map the computational coordinates xi, eta, zeta to the specified side of a cubed sphere
 # with the specified inner radius and thickness.
 function cubed_sphere_mapping(xi, eta, zeta, inner_radius, thickness, direction)
-  alpha = xi * pi/4
-  beta = eta * pi/4
+    alpha = xi * pi / 4
+    beta = eta * pi / 4
 
-  # Equiangular projection
-  x = tan(alpha)
-  y = tan(beta)
+    # Equiangular projection
+    x = tan(alpha)
+    y = tan(beta)
 
-  # Coordinates on unit cube per direction, see illustration above in the function connectivity_cubed_sphere
-  cube_coordinates = (SVector(-1, -x, y),
-                      SVector( 1,  x, y),
-                      SVector( x, -1, y),
-                      SVector(-x,  1, y),
-                      SVector(-x, y, -1),
-                      SVector( x, y,  1))
+    # Coordinates on unit cube per direction, see illustration above in the function connectivity_cubed_sphere
+    cube_coordinates = (SVector(-1, -x, y),
+                        SVector(1, x, y),
+                        SVector(x, -1, y),
+                        SVector(-x, 1, y),
+                        SVector(-x, y, -1),
+                        SVector(x, y, 1))
 
-  # Radius on cube surface
-  r = sqrt(1 + x^2 + y^2)
+    # Radius on cube surface
+    r = sqrt(1 + x^2 + y^2)
 
-  # Radius of the sphere
-  R = inner_radius + thickness * (0.5 * (zeta + 1))
+    # Radius of the sphere
+    R = inner_radius + thickness * (0.5 * (zeta + 1))
 
-  # Projection onto the sphere
-  return R / r * cube_coordinates[direction]
+    # Projection onto the sphere
+    return R / r * cube_coordinates[direction]
 end
 
-
 # Calculate physical coordinates of each element of an unstructured mesh read
 # in from a HOHQMesh file. This calculation is done with the transfinite interpolation
 # routines found in `mappings_geometry_curved_2d.jl` or `mappings_geometry_straight_2d.jl`
 function calc_tree_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4},
                                      file_lines::Vector{String}, nodes, vertices, RealT)
-  # Get the number of trees and the number of interpolation nodes
-  n_trees = last(size(node_coordinates))
-  nnodes = length(nodes)
-
-  # Setup the starting file index to read in element indices and the additional
-  # curved boundary information provided by HOHQMesh.
-  file_idx = findfirst(contains("** mesh polynomial degree"), file_lines) + 1
-
-  # Create a work set of Gamma curves to create the node coordinates
-  CurvedSurfaceT = CurvedSurface{RealT}
-  surface_curves = Array{CurvedSurfaceT}(undef, 4)
-
-  # Create other work arrays to perform the mesh construction
-  element_node_ids = Array{Int}(undef, 4)
-  curved_check = Vector{Int}(undef, 4)
-  quad_vertices = Array{RealT}(undef, (4, 2))
-  quad_vertices_flipped = Array{RealT}(undef, (4, 2))
-  curve_values = Array{RealT}(undef, (nnodes, 2))
-
-  # Create the barycentric weights used for the surface interpolations
-  bary_weights_ = barycentric_weights(nodes)
-  bary_weights = SVector{nnodes}(bary_weights_)
-
-  # Loop through all the trees, i.e., the elements generated by HOHQMesh and create the node coordinates.
-  # When we extract information from the `current_line` we start at index 2 in order to
-  # avoid the Abaqus comment character "** "
-  for tree in 1:n_trees
-    # Pull the vertex node IDs
-    current_line        = split(file_lines[file_idx])
-    element_node_ids[1] = parse(Int, current_line[2])
-    element_node_ids[2] = parse(Int, current_line[3])
-    element_node_ids[3] = parse(Int, current_line[4])
-    element_node_ids[4] = parse(Int, current_line[5])
-
-    # Pull the (x,y) values of the four vertices of the current tree out of the global vertices array
-    for i in 1:4
-      quad_vertices[i, :] .= vertices[1:2, element_node_ids[i]]
-    end
-    # Pull the information to check if boundary is curved in order to read in additional data
-    file_idx += 1
-    current_line    = split(file_lines[file_idx])
-    curved_check[1] = parse(Int, current_line[2])
-    curved_check[2] = parse(Int, current_line[3])
-    curved_check[3] = parse(Int, current_line[4])
-    curved_check[4] = parse(Int, current_line[5])
-    if sum(curved_check) == 0
-      # Create the node coordinates on this particular element
-      calc_node_coordinates!(node_coordinates, tree, nodes, quad_vertices)
-    else
-      # Quadrilateral element has at least one curved side
-      # Flip node ordering to make sure the element is right-handed for the interpolations
-      m1 = 1
-      m2 = 2
-      @views quad_vertices_flipped[1, :] .= quad_vertices[4, :]
-      @views quad_vertices_flipped[2, :] .= quad_vertices[2, :]
-      @views quad_vertices_flipped[3, :] .= quad_vertices[3, :]
-      @views quad_vertices_flipped[4, :] .= quad_vertices[1, :]
-      for i in 1:4
-        if curved_check[i] == 0
-          # When curved_check[i] is 0 then the "curve" from vertex `i` to vertex `i+1` is a straight line.
-          # Evaluate a linear interpolant between the two points at each of the nodes.
-          for k in 1:nnodes
-            curve_values[k, 1] = linear_interpolate(nodes[k], quad_vertices_flipped[m1, 1], quad_vertices_flipped[m2, 1])
-            curve_values[k, 2] = linear_interpolate(nodes[k], quad_vertices_flipped[m1, 2], quad_vertices_flipped[m2, 2])
-          end
-        else
-          # When curved_check[i] is 1 this curved boundary information is supplied by the mesh
-          # generator. So we just read it into a work array
-          for k in 1:nnodes
-            file_idx += 1
-            current_line = split(file_lines[file_idx])
-            curve_values[k, 1] = parse(RealT,current_line[2])
-            curve_values[k, 2] = parse(RealT,current_line[3])
-          end
+    # Get the number of trees and the number of interpolation nodes
+    n_trees = last(size(node_coordinates))
+    nnodes = length(nodes)
+
+    # Setup the starting file index to read in element indices and the additional
+    # curved boundary information provided by HOHQMesh.
+    file_idx = findfirst(contains("** mesh polynomial degree"), file_lines) + 1
+
+    # Create a work set of Gamma curves to create the node coordinates
+    CurvedSurfaceT = CurvedSurface{RealT}
+    surface_curves = Array{CurvedSurfaceT}(undef, 4)
+
+    # Create other work arrays to perform the mesh construction
+    element_node_ids = Array{Int}(undef, 4)
+    curved_check = Vector{Int}(undef, 4)
+    quad_vertices = Array{RealT}(undef, (4, 2))
+    quad_vertices_flipped = Array{RealT}(undef, (4, 2))
+    curve_values = Array{RealT}(undef, (nnodes, 2))
+
+    # Create the barycentric weights used for the surface interpolations
+    bary_weights_ = barycentric_weights(nodes)
+    bary_weights = SVector{nnodes}(bary_weights_)
+
+    # Loop through all the trees, i.e., the elements generated by HOHQMesh and create the node coordinates.
+    # When we extract information from the `current_line` we start at index 2 in order to
+    # avoid the Abaqus comment character "** "
+    for tree in 1:n_trees
+        # Pull the vertex node IDs
+        current_line = split(file_lines[file_idx])
+        element_node_ids[1] = parse(Int, current_line[2])
+        element_node_ids[2] = parse(Int, current_line[3])
+        element_node_ids[3] = parse(Int, current_line[4])
+        element_node_ids[4] = parse(Int, current_line[5])
+
+        # Pull the (x,y) values of the four vertices of the current tree out of the global vertices array
+        for i in 1:4
+            quad_vertices[i, :] .= vertices[1:2, element_node_ids[i]]
         end
-        # Construct the curve interpolant for the current side
-        surface_curves[i] = CurvedSurfaceT(nodes, bary_weights, copy(curve_values))
-        # Indexing update that contains a "flip" to ensure correct element orientation.
-        # If we need to construct the straight line "curves" when curved_check[i] == 0
-        m1 += 1
-        if i == 3
-          m2 = 1
+        # Pull the information to check if boundary is curved in order to read in additional data
+        file_idx += 1
+        current_line = split(file_lines[file_idx])
+        curved_check[1] = parse(Int, current_line[2])
+        curved_check[2] = parse(Int, current_line[3])
+        curved_check[3] = parse(Int, current_line[4])
+        curved_check[4] = parse(Int, current_line[5])
+        if sum(curved_check) == 0
+            # Create the node coordinates on this particular element
+            calc_node_coordinates!(node_coordinates, tree, nodes, quad_vertices)
         else
-          m2 += 1
+            # Quadrilateral element has at least one curved side
+            # Flip node ordering to make sure the element is right-handed for the interpolations
+            m1 = 1
+            m2 = 2
+            @views quad_vertices_flipped[1, :] .= quad_vertices[4, :]
+            @views quad_vertices_flipped[2, :] .= quad_vertices[2, :]
+            @views quad_vertices_flipped[3, :] .= quad_vertices[3, :]
+            @views quad_vertices_flipped[4, :] .= quad_vertices[1, :]
+            for i in 1:4
+                if curved_check[i] == 0
+                    # When curved_check[i] is 0 then the "curve" from vertex `i` to vertex `i+1` is a straight line.
+                    # Evaluate a linear interpolant between the two points at each of the nodes.
+                    for k in 1:nnodes
+                        curve_values[k, 1] = linear_interpolate(nodes[k],
+                                                                quad_vertices_flipped[m1,
+                                                                                      1],
+                                                                quad_vertices_flipped[m2,
+                                                                                      1])
+                        curve_values[k, 2] = linear_interpolate(nodes[k],
+                                                                quad_vertices_flipped[m1,
+                                                                                      2],
+                                                                quad_vertices_flipped[m2,
+                                                                                      2])
+                    end
+                else
+                    # When curved_check[i] is 1 this curved boundary information is supplied by the mesh
+                    # generator. So we just read it into a work array
+                    for k in 1:nnodes
+                        file_idx += 1
+                        current_line = split(file_lines[file_idx])
+                        curve_values[k, 1] = parse(RealT, current_line[2])
+                        curve_values[k, 2] = parse(RealT, current_line[3])
+                    end
+                end
+                # Construct the curve interpolant for the current side
+                surface_curves[i] = CurvedSurfaceT(nodes, bary_weights,
+                                                   copy(curve_values))
+                # Indexing update that contains a "flip" to ensure correct element orientation.
+                # If we need to construct the straight line "curves" when curved_check[i] == 0
+                m1 += 1
+                if i == 3
+                    m2 = 1
+                else
+                    m2 += 1
+                end
+            end
+            # Create the node coordinates on this particular element
+            calc_node_coordinates!(node_coordinates, tree, nodes, surface_curves)
         end
-      end
-      # Create the node coordinates on this particular element
-      calc_node_coordinates!(node_coordinates, tree, nodes, surface_curves)
+        # Move file index to the next tree
+        file_idx += 1
     end
-    # Move file index to the next tree
-    file_idx += 1
-   end
 
-  return file_idx
+    return file_idx
 end
 
-
 # Calculate physical coordinates of each element of an unstructured mesh read
 # in from a HOHQMesh file. This calculation is done with the transfinite interpolation
 # routines found in `transfinite_mappings_3d.jl`
 function calc_tree_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5},
                                      file_lines::Vector{String}, nodes, vertices, RealT)
-  # Get the number of trees and the number of interpolation nodes
-  n_trees = last(size(node_coordinates))
-  nnodes = length(nodes)
-
-  # Setup the starting file index to read in element indices and the additional
-  # curved boundary information provided by HOHQMesh.
-  file_idx = findfirst(contains("** mesh polynomial degree"), file_lines) + 1
-
-  # Create a work set of Gamma curves to create the node coordinates
-  CurvedFaceT = CurvedFace{RealT}
-  face_curves = Array{CurvedFaceT}(undef, 6)
-
-  # Create other work arrays to perform the mesh construction
-  element_node_ids = Array{Int}(undef, 8)
-  curved_check = Vector{Int}(undef, 6)
-  hex_vertices = Array{RealT}(undef, (3, 8))
-  face_vertices = Array{RealT}(undef, (3, 4))
-  curve_values = Array{RealT}(undef, (3, nnodes, nnodes))
-
-  # Create the barycentric weights used for the surface interpolations
-  bary_weights_ = barycentric_weights(nodes)
-  bary_weights = SVector{nnodes}(bary_weights_)
-
-  # Loop through all the trees, i.e., the elements generated by HOHQMesh and create the node coordinates.
-  # When we extract information from the `current_line` we start at index 2 in order to
-  # avoid the Abaqus comment character "** "
-  for tree in 1:n_trees
-    # pull the vertex node IDs
-    current_line        = split(file_lines[file_idx])
-    element_node_ids[1] = parse(Int, current_line[2])
-    element_node_ids[2] = parse(Int, current_line[3])
-    element_node_ids[3] = parse(Int, current_line[4])
-    element_node_ids[4] = parse(Int, current_line[5])
-    element_node_ids[5] = parse(Int, current_line[6])
-    element_node_ids[6] = parse(Int, current_line[7])
-    element_node_ids[7] = parse(Int, current_line[8])
-    element_node_ids[8] = parse(Int, current_line[9])
-
-    # Pull the (x, y, z) values of the eight vertices of the current tree out of the global vertices array
-    for i in 1:8
-      hex_vertices[:, i] .= vertices[:, element_node_ids[i]]
-    end
-    # Pull the information to check if boundary is curved in order to read in additional data
-    file_idx += 1
-    current_line    = split(file_lines[file_idx])
-    curved_check[1] = parse(Int, current_line[2])
-    curved_check[2] = parse(Int, current_line[3])
-    curved_check[3] = parse(Int, current_line[4])
-    curved_check[4] = parse(Int, current_line[5])
-    curved_check[5] = parse(Int, current_line[6])
-    curved_check[6] = parse(Int, current_line[7])
-    if sum(curved_check) == 0
-      # Create the node coordinates on this element
-      calc_node_coordinates!(node_coordinates, tree, nodes, hex_vertices)
-    else
-      # Hexahedral element has at least one curved side
-      for face in 1:6
-        if curved_check[face] == 0
-          # Face is a flat plane. Evaluate a bilinear interpolant between the four vertices of the face at each of the nodes.
-          get_vertices_for_bilinear_interpolant!(face_vertices, face, hex_vertices)
-          for q in 1:nnodes, p in 1:nnodes
-            @views bilinear_interpolation!(curve_values[:, p, q], face_vertices, nodes[p], nodes[q])
-          end
-        else # curved_check[face] == 1
-          # Curved face boundary information is supplied by the mesh file. Just read it into a work array
-          for q in 1:nnodes, p in 1:nnodes
-            file_idx += 1
-            current_line = split(file_lines[file_idx])
-            curve_values[1, p, q] = parse(RealT,current_line[2])
-            curve_values[2, p, q] = parse(RealT,current_line[3])
-            curve_values[3, p, q] = parse(RealT,current_line[4])
-          end
+    # Get the number of trees and the number of interpolation nodes
+    n_trees = last(size(node_coordinates))
+    nnodes = length(nodes)
+
+    # Setup the starting file index to read in element indices and the additional
+    # curved boundary information provided by HOHQMesh.
+    file_idx = findfirst(contains("** mesh polynomial degree"), file_lines) + 1
+
+    # Create a work set of Gamma curves to create the node coordinates
+    CurvedFaceT = CurvedFace{RealT}
+    face_curves = Array{CurvedFaceT}(undef, 6)
+
+    # Create other work arrays to perform the mesh construction
+    element_node_ids = Array{Int}(undef, 8)
+    curved_check = Vector{Int}(undef, 6)
+    hex_vertices = Array{RealT}(undef, (3, 8))
+    face_vertices = Array{RealT}(undef, (3, 4))
+    curve_values = Array{RealT}(undef, (3, nnodes, nnodes))
+
+    # Create the barycentric weights used for the surface interpolations
+    bary_weights_ = barycentric_weights(nodes)
+    bary_weights = SVector{nnodes}(bary_weights_)
+
+    # Loop through all the trees, i.e., the elements generated by HOHQMesh and create the node coordinates.
+    # When we extract information from the `current_line` we start at index 2 in order to
+    # avoid the Abaqus comment character "** "
+    for tree in 1:n_trees
+        # pull the vertex node IDs
+        current_line = split(file_lines[file_idx])
+        element_node_ids[1] = parse(Int, current_line[2])
+        element_node_ids[2] = parse(Int, current_line[3])
+        element_node_ids[3] = parse(Int, current_line[4])
+        element_node_ids[4] = parse(Int, current_line[5])
+        element_node_ids[5] = parse(Int, current_line[6])
+        element_node_ids[6] = parse(Int, current_line[7])
+        element_node_ids[7] = parse(Int, current_line[8])
+        element_node_ids[8] = parse(Int, current_line[9])
+
+        # Pull the (x, y, z) values of the eight vertices of the current tree out of the global vertices array
+        for i in 1:8
+            hex_vertices[:, i] .= vertices[:, element_node_ids[i]]
+        end
+        # Pull the information to check if boundary is curved in order to read in additional data
+        file_idx += 1
+        current_line = split(file_lines[file_idx])
+        curved_check[1] = parse(Int, current_line[2])
+        curved_check[2] = parse(Int, current_line[3])
+        curved_check[3] = parse(Int, current_line[4])
+        curved_check[4] = parse(Int, current_line[5])
+        curved_check[5] = parse(Int, current_line[6])
+        curved_check[6] = parse(Int, current_line[7])
+        if sum(curved_check) == 0
+            # Create the node coordinates on this element
+            calc_node_coordinates!(node_coordinates, tree, nodes, hex_vertices)
+        else
+            # Hexahedral element has at least one curved side
+            for face in 1:6
+                if curved_check[face] == 0
+                    # Face is a flat plane.
+                    # Evaluate a bilinear interpolant between the four vertices
+                    # of the face at each of the nodes.
+                    get_vertices_for_bilinear_interpolant!(face_vertices, face,
+                                                           hex_vertices)
+                    for q in 1:nnodes, p in 1:nnodes
+                        @views bilinear_interpolation!(curve_values[:, p, q],
+                                                       face_vertices, nodes[p],
+                                                       nodes[q])
+                    end
+                else # curved_check[face] == 1
+                    # Curved face boundary information is supplied by 
+                    # the mesh file. Just read it into a work array
+                    for q in 1:nnodes, p in 1:nnodes
+                        file_idx += 1
+                        current_line = split(file_lines[file_idx])
+                        curve_values[1, p, q] = parse(RealT, current_line[2])
+                        curve_values[2, p, q] = parse(RealT, current_line[3])
+                        curve_values[3, p, q] = parse(RealT, current_line[4])
+                    end
+                end
+                # Construct the curve interpolant for the current side
+                face_curves[face] = CurvedFaceT(nodes, bary_weights, copy(curve_values))
+            end
+            # Create the node coordinates on this particular element
+            calc_node_coordinates!(node_coordinates, tree, nodes, face_curves)
         end
-        # Construct the curve interpolant for the current side
-        face_curves[face] = CurvedFaceT(nodes, bary_weights, copy(curve_values))
-      end
-      # Create the node coordinates on this particular element
-      calc_node_coordinates!(node_coordinates, tree, nodes, face_curves)
+        # Move file index to the next tree
+        file_idx += 1
     end
-    # Move file index to the next tree
-    file_idx += 1
-   end
 
-  return file_idx
+    return file_idx
 end
 
-
 # Given the eight `hex_vertices` for a hexahedral element extract
 # the four `face_vertices` for a particular `face_index`.
 function get_vertices_for_bilinear_interpolant!(face_vertices, face_index, hex_vertices)
-  if face_index == 1
-    @views face_vertices[:, 1] .= hex_vertices[:, 1]
-    @views face_vertices[:, 2] .= hex_vertices[:, 2]
-    @views face_vertices[:, 3] .= hex_vertices[:, 6]
-    @views face_vertices[:, 4] .= hex_vertices[:, 5]
-  elseif face_index == 2
-    @views face_vertices[:, 1] .= hex_vertices[:, 4]
-    @views face_vertices[:, 2] .= hex_vertices[:, 3]
-    @views face_vertices[:, 3] .= hex_vertices[:, 7]
-    @views face_vertices[:, 4] .= hex_vertices[:, 8]
-  elseif face_index == 3
-    @views face_vertices[:, 1] .= hex_vertices[:, 1]
-    @views face_vertices[:, 2] .= hex_vertices[:, 2]
-    @views face_vertices[:, 3] .= hex_vertices[:, 3]
-    @views face_vertices[:, 4] .= hex_vertices[:, 4]
-  elseif face_index == 4
-    @views face_vertices[:, 1] .= hex_vertices[:, 2]
-    @views face_vertices[:, 2] .= hex_vertices[:, 3]
-    @views face_vertices[:, 3] .= hex_vertices[:, 6]
-    @views face_vertices[:, 4] .= hex_vertices[:, 7]
-  elseif face_index == 5
-    @views face_vertices[:, 1] .= hex_vertices[:, 5]
-    @views face_vertices[:, 2] .= hex_vertices[:, 6]
-    @views face_vertices[:, 3] .= hex_vertices[:, 7]
-    @views face_vertices[:, 4] .= hex_vertices[:, 8]
-  else # face_index == 6
-    @views face_vertices[:, 1] .= hex_vertices[:, 1]
-    @views face_vertices[:, 2] .= hex_vertices[:, 4]
-    @views face_vertices[:, 3] .= hex_vertices[:, 8]
-    @views face_vertices[:, 4] .= hex_vertices[:, 5]
-  end
+    if face_index == 1
+        @views face_vertices[:, 1] .= hex_vertices[:, 1]
+        @views face_vertices[:, 2] .= hex_vertices[:, 2]
+        @views face_vertices[:, 3] .= hex_vertices[:, 6]
+        @views face_vertices[:, 4] .= hex_vertices[:, 5]
+    elseif face_index == 2
+        @views face_vertices[:, 1] .= hex_vertices[:, 4]
+        @views face_vertices[:, 2] .= hex_vertices[:, 3]
+        @views face_vertices[:, 3] .= hex_vertices[:, 7]
+        @views face_vertices[:, 4] .= hex_vertices[:, 8]
+    elseif face_index == 3
+        @views face_vertices[:, 1] .= hex_vertices[:, 1]
+        @views face_vertices[:, 2] .= hex_vertices[:, 2]
+        @views face_vertices[:, 3] .= hex_vertices[:, 3]
+        @views face_vertices[:, 4] .= hex_vertices[:, 4]
+    elseif face_index == 4
+        @views face_vertices[:, 1] .= hex_vertices[:, 2]
+        @views face_vertices[:, 2] .= hex_vertices[:, 3]
+        @views face_vertices[:, 3] .= hex_vertices[:, 6]
+        @views face_vertices[:, 4] .= hex_vertices[:, 7]
+    elseif face_index == 5
+        @views face_vertices[:, 1] .= hex_vertices[:, 5]
+        @views face_vertices[:, 2] .= hex_vertices[:, 6]
+        @views face_vertices[:, 3] .= hex_vertices[:, 7]
+        @views face_vertices[:, 4] .= hex_vertices[:, 8]
+    else # face_index == 6
+        @views face_vertices[:, 1] .= hex_vertices[:, 1]
+        @views face_vertices[:, 2] .= hex_vertices[:, 4]
+        @views face_vertices[:, 3] .= hex_vertices[:, 8]
+        @views face_vertices[:, 4] .= hex_vertices[:, 5]
+    end
 end
 
-
 # Evaluate a bilinear interpolant at a point (u,v) given the four vertices where the face is right-handed
 #      4                3
 #      o----------------o
@@ -1427,293 +1480,330 @@ end
 #      1                2
 # and return the 3D coordinate point (x, y, z)
 function bilinear_interpolation!(coordinate, face_vertices, u, v)
-  for j in 1:3
-    coordinate[j] = 0.25 * (  face_vertices[j,1] * (1 - u) * (1 - v)
-                            + face_vertices[j,2] * (1 + u) * (1 - v)
-                            + face_vertices[j,3] * (1 + u) * (1 + v)
-                            + face_vertices[j,4] * (1 - u) * (1 + v) )
-  end
+    for j in 1:3
+        coordinate[j] = 0.25 * (face_vertices[j, 1] * (1 - u) * (1 - v)
+                         + face_vertices[j, 2] * (1 + u) * (1 - v)
+                         + face_vertices[j, 3] * (1 + u) * (1 + v)
+                         + face_vertices[j, 4] * (1 - u) * (1 + v))
+    end
 end
 
-
-function balance!(mesh::P4estMesh{2}, init_fn=C_NULL)
-  p4est_balance(mesh.p4est, P4EST_CONNECT_FACE, init_fn)
-  # Due to a bug in `p4est`, the forest needs to be rebalanced twice sometimes
-  # See https://github.com/cburstedde/p4est/issues/112
-  p4est_balance(mesh.p4est, P4EST_CONNECT_FACE, init_fn)
+function balance!(mesh::P4estMesh{2}, init_fn = C_NULL)
+    p4est_balance(mesh.p4est, P4EST_CONNECT_FACE, init_fn)
+    # Due to a bug in `p4est`, the forest needs to be rebalanced twice sometimes
+    # See https://github.com/cburstedde/p4est/issues/112
+    p4est_balance(mesh.p4est, P4EST_CONNECT_FACE, init_fn)
 end
 
-function balance!(mesh::P4estMesh{3}, init_fn=C_NULL)
-  p8est_balance(mesh.p4est, P8EST_CONNECT_FACE, init_fn)
+function balance!(mesh::P4estMesh{3}, init_fn = C_NULL)
+    p8est_balance(mesh.p4est, P8EST_CONNECT_FACE, init_fn)
 end
 
-function partition!(mesh::P4estMesh{2}; weight_fn=C_NULL)
-  p4est_partition(mesh.p4est, Int(mesh.p4est_partition_allow_for_coarsening), weight_fn)
+function partition!(mesh::P4estMesh{2}; weight_fn = C_NULL)
+    p4est_partition(mesh.p4est, Int(mesh.p4est_partition_allow_for_coarsening),
+                    weight_fn)
 end
 
-function partition!(mesh::P4estMesh{3}; weight_fn=C_NULL)
-  p8est_partition(mesh.p4est, Int(mesh.p4est_partition_allow_for_coarsening), weight_fn)
+function partition!(mesh::P4estMesh{3}; weight_fn = C_NULL)
+    p8est_partition(mesh.p4est, Int(mesh.p4est_partition_allow_for_coarsening),
+                    weight_fn)
 end
 
-
 function update_ghost_layer!(mesh::P4estMesh)
-  ghost_destroy_p4est(mesh.ghost)
-  mesh.ghost = ghost_new_p4est(mesh.p4est)
+    ghost_destroy_p4est(mesh.ghost)
+    mesh.ghost = ghost_new_p4est(mesh.p4est)
 end
 
-
 function init_fn(p4est, which_tree, quadrant)
-  # Unpack quadrant's user data ([global quad ID, controller_value])
-  ptr = Ptr{Int}(unsafe_load(quadrant.p.user_data))
+    # Unpack quadrant's user data ([global quad ID, controller_value])
+    ptr = Ptr{Int}(unsafe_load(quadrant.p.user_data))
 
-  # Initialize quad ID as -1 and controller_value as 0 (don't refine or coarsen)
-  unsafe_store!(ptr, -1, 1)
-  unsafe_store!(ptr, 0, 2)
+    # Initialize quad ID as -1 and controller_value as 0 (don't refine or coarsen)
+    unsafe_store!(ptr, -1, 1)
+    unsafe_store!(ptr, 0, 2)
 
-  return nothing
+    return nothing
 end
 
 # 2D
-cfunction(::typeof(init_fn), ::Val{2}) = @cfunction(init_fn, Cvoid, (Ptr{p4est_t}, Ptr{p4est_topidx_t}, Ptr{p4est_quadrant_t}))
+function cfunction(::typeof(init_fn), ::Val{2})
+    @cfunction(init_fn, Cvoid,
+               (Ptr{p4est_t}, Ptr{p4est_topidx_t}, Ptr{p4est_quadrant_t}))
+end
 # 3D
-cfunction(::typeof(init_fn), ::Val{3}) = @cfunction(init_fn, Cvoid, (Ptr{p8est_t}, Ptr{p4est_topidx_t}, Ptr{p8est_quadrant_t}))
+function cfunction(::typeof(init_fn), ::Val{3})
+    @cfunction(init_fn, Cvoid,
+               (Ptr{p8est_t}, Ptr{p4est_topidx_t}, Ptr{p8est_quadrant_t}))
+end
 
 function refine_fn(p4est, which_tree, quadrant)
-  # Controller value has been copied to the quadrant's user data storage before.
-  # Unpack quadrant's user data ([global quad ID, controller_value]).
-  ptr = Ptr{Int}(unsafe_load(quadrant.p.user_data))
-  controller_value = unsafe_load(ptr, 2)
-
-  if controller_value > 0
-    # return true (refine)
-    return Cint(1)
-  else
-    # return false (don't refine)
-    return Cint(0)
-  end
+    # Controller value has been copied to the quadrant's user data storage before.
+    # Unpack quadrant's user data ([global quad ID, controller_value]).
+    ptr = Ptr{Int}(unsafe_load(quadrant.p.user_data))
+    controller_value = unsafe_load(ptr, 2)
+
+    if controller_value > 0
+        # return true (refine)
+        return Cint(1)
+    else
+        # return false (don't refine)
+        return Cint(0)
+    end
 end
 
 # 2D
-cfunction(::typeof(refine_fn), ::Val{2}) = @cfunction(refine_fn, Cint, (Ptr{p4est_t}, Ptr{p4est_topidx_t}, Ptr{p4est_quadrant_t}))
+function cfunction(::typeof(refine_fn), ::Val{2})
+    @cfunction(refine_fn, Cint,
+               (Ptr{p4est_t}, Ptr{p4est_topidx_t}, Ptr{p4est_quadrant_t}))
+end
 # 3D
-cfunction(::typeof(refine_fn), ::Val{3}) = @cfunction(refine_fn, Cint, (Ptr{p8est_t}, Ptr{p4est_topidx_t}, Ptr{p8est_quadrant_t}))
+function cfunction(::typeof(refine_fn), ::Val{3})
+    @cfunction(refine_fn, Cint,
+               (Ptr{p8est_t}, Ptr{p4est_topidx_t}, Ptr{p8est_quadrant_t}))
+end
 
 # Refine marked cells and rebalance forest.
 # Return a list of all cells that have been refined during refinement or rebalancing.
 function refine!(mesh::P4estMesh)
-  # Copy original element IDs to quad user data storage
-  original_n_cells = ncells(mesh)
-  save_original_ids(mesh)
+    # Copy original element IDs to quad user data storage
+    original_n_cells = ncells(mesh)
+    save_original_ids(mesh)
 
-  init_fn_c = cfunction(init_fn, Val(ndims(mesh)))
-  refine_fn_c = cfunction(refine_fn, Val(ndims(mesh)))
+    init_fn_c = cfunction(init_fn, Val(ndims(mesh)))
+    refine_fn_c = cfunction(refine_fn, Val(ndims(mesh)))
 
-  # Refine marked cells
-  @trixi_timeit timer() "refine" refine_p4est!(mesh.p4est, false, refine_fn_c, init_fn_c)
+    # Refine marked cells
+    @trixi_timeit timer() "refine" refine_p4est!(mesh.p4est, false, refine_fn_c,
+                                                 init_fn_c)
 
-  @trixi_timeit timer() "rebalance" balance!(mesh, init_fn_c)
+    @trixi_timeit timer() "rebalance" balance!(mesh, init_fn_c)
 
-  return collect_changed_cells(mesh, original_n_cells)
+    return collect_changed_cells(mesh, original_n_cells)
 end
 
-
 function coarsen_fn(p4est, which_tree, quadrants_ptr)
-  quadrants = unsafe_wrap_quadrants(quadrants_ptr, p4est)
-
-  # Controller value has been copied to the quadrant's user data storage before.
-  # Load controller value from quadrant's user data ([global quad ID, controller_value]).
-  controller_value(i) = unsafe_load(Ptr{Int}(unsafe_load(quadrants[i].p.user_data)), 2)
-
-  # `p4est` calls this function for each 2^ndims quads that could be coarsened to a single one.
-  # Only coarsen if all these 2^ndims quads have been marked for coarsening.
-  if all(i -> controller_value(i) < 0, eachindex(quadrants))
-    # return true (coarsen)
-    return Cint(1)
-  else
-    # return false (don't coarsen)
-    return Cint(0)
-  end
+    quadrants = unsafe_wrap_quadrants(quadrants_ptr, p4est)
+
+    # Controller value has been copied to the quadrant's user data storage before.
+    # Load controller value from quadrant's user data ([global quad ID, controller_value]).
+    function controller_value(i)
+        unsafe_load(Ptr{Int}(unsafe_load(quadrants[i].p.user_data)), 2)
+    end
+
+    # `p4est` calls this function for each 2^ndims quads that could be coarsened to a single one.
+    # Only coarsen if all these 2^ndims quads have been marked for coarsening.
+    if all(i -> controller_value(i) < 0, eachindex(quadrants))
+        # return true (coarsen)
+        return Cint(1)
+    else
+        # return false (don't coarsen)
+        return Cint(0)
+    end
 end
 
 # 2D
-unsafe_wrap_quadrants(quadrants_ptr, ::Ptr{p4est_t}) = unsafe_wrap(Array, quadrants_ptr, 4)
+function unsafe_wrap_quadrants(quadrants_ptr, ::Ptr{p4est_t})
+    unsafe_wrap(Array, quadrants_ptr, 4)
+end
 # 3D
-unsafe_wrap_quadrants(quadrants_ptr, ::Ptr{p8est_t}) = unsafe_wrap(Array, quadrants_ptr, 8)
+function unsafe_wrap_quadrants(quadrants_ptr, ::Ptr{p8est_t})
+    unsafe_wrap(Array, quadrants_ptr, 8)
+end
 
 # 2D
-cfunction(::typeof(coarsen_fn), ::Val{2}) = @cfunction(coarsen_fn, Cint, (Ptr{p4est_t}, Ptr{p4est_topidx_t}, Ptr{Ptr{p4est_quadrant_t}}))
+function cfunction(::typeof(coarsen_fn), ::Val{2})
+    @cfunction(coarsen_fn, Cint,
+               (Ptr{p4est_t}, Ptr{p4est_topidx_t}, Ptr{Ptr{p4est_quadrant_t}}))
+end
 # 3D
-cfunction(::typeof(coarsen_fn), ::Val{3}) = @cfunction(coarsen_fn, Cint, (Ptr{p8est_t}, Ptr{p4est_topidx_t}, Ptr{Ptr{p8est_quadrant_t}}))
+function cfunction(::typeof(coarsen_fn), ::Val{3})
+    @cfunction(coarsen_fn, Cint,
+               (Ptr{p8est_t}, Ptr{p4est_topidx_t}, Ptr{Ptr{p8est_quadrant_t}}))
+end
 
 # Coarsen marked cells if the forest will stay balanced.
 # Return a list of all cells that have been coarsened.
 function coarsen!(mesh::P4estMesh)
-  # Copy original element IDs to quad user data storage
-  original_n_cells = ncells(mesh)
-  save_original_ids(mesh)
-
-  # Coarsen marked cells
-  coarsen_fn_c = cfunction(coarsen_fn, Val(ndims(mesh)))
-  init_fn_c = cfunction(init_fn, Val(ndims(mesh)))
-
-  @trixi_timeit timer() "coarsen!" coarsen_p4est!(mesh.p4est, false, coarsen_fn_c, init_fn_c)
-
-  # IDs of newly created cells (one-based)
-  new_cells = collect_new_cells(mesh)
-  # Old IDs of cells that have been coarsened (one-based)
-  coarsened_cells_vec = collect_changed_cells(mesh, original_n_cells)
-  # 2^ndims changed cells should have been coarsened to one new cell.
-  # This matrix will store the IDs of all cells that have been coarsened to cell new_cells[i]
-  # in the i-th column.
-  coarsened_cells = reshape(coarsened_cells_vec, 2^ndims(mesh), length(new_cells))
-
-  # Save new original IDs to find out what changed after balancing
-  intermediate_n_cells = ncells(mesh)
-  save_original_ids(mesh)
-
-  @trixi_timeit timer() "rebalance" balance!(mesh, init_fn_c)
-
-  refined_cells = collect_changed_cells(mesh, intermediate_n_cells)
-
-  # Some cells may have been coarsened even though they unbalanced the forest.
-  # These cells have now been refined again by p4est_balance.
-  # refined_cells contains the intermediate IDs (ID of coarse cell
-  # between coarsening and balancing) of these cells.
-  # Find original ID of each cell that has been coarsened and then refined again.
-  for refined_cell in refined_cells
-    # i-th cell of the ones that have been created by coarsening has been refined again
-    i = findfirst(==(refined_cell), new_cells)
-
-    # Remove IDs of the 2^ndims cells that have been coarsened to this cell
-    coarsened_cells[:, i] .= -1
-  end
-
-  # Return all IDs of cells that have been coarsened but not refined again by balancing
-  return coarsened_cells_vec[coarsened_cells_vec .>= 0]
-end
+    # Copy original element IDs to quad user data storage
+    original_n_cells = ncells(mesh)
+    save_original_ids(mesh)
+
+    # Coarsen marked cells
+    coarsen_fn_c = cfunction(coarsen_fn, Val(ndims(mesh)))
+    init_fn_c = cfunction(init_fn, Val(ndims(mesh)))
+
+    @trixi_timeit timer() "coarsen!" coarsen_p4est!(mesh.p4est, false, coarsen_fn_c,
+                                                    init_fn_c)
+
+    # IDs of newly created cells (one-based)
+    new_cells = collect_new_cells(mesh)
+    # Old IDs of cells that have been coarsened (one-based)
+    coarsened_cells_vec = collect_changed_cells(mesh, original_n_cells)
+    # 2^ndims changed cells should have been coarsened to one new cell.
+    # This matrix will store the IDs of all cells that have been coarsened to cell new_cells[i]
+    # in the i-th column.
+    coarsened_cells = reshape(coarsened_cells_vec, 2^ndims(mesh), length(new_cells))
+
+    # Save new original IDs to find out what changed after balancing
+    intermediate_n_cells = ncells(mesh)
+    save_original_ids(mesh)
+
+    @trixi_timeit timer() "rebalance" balance!(mesh, init_fn_c)
+
+    refined_cells = collect_changed_cells(mesh, intermediate_n_cells)
+
+    # Some cells may have been coarsened even though they unbalanced the forest.
+    # These cells have now been refined again by p4est_balance.
+    # refined_cells contains the intermediate IDs (ID of coarse cell
+    # between coarsening and balancing) of these cells.
+    # Find original ID of each cell that has been coarsened and then refined again.
+    for refined_cell in refined_cells
+        # i-th cell of the ones that have been created by coarsening has been refined again
+        i = findfirst(==(refined_cell), new_cells)
+
+        # Remove IDs of the 2^ndims cells that have been coarsened to this cell
+        coarsened_cells[:, i] .= -1
+    end
 
+    # Return all IDs of cells that have been coarsened but not refined again by balancing
+    return coarsened_cells_vec[coarsened_cells_vec .>= 0]
+end
 
 # Copy global quad ID to quad's user data storage, will be called below
 function save_original_id_iter_volume(info, user_data)
-  info_obj = unsafe_load(info)
+    info_obj = unsafe_load(info)
 
-  # Load tree from global trees array, one-based indexing
-  tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1)
-  # Quadrant numbering offset of this quadrant
-  offset = tree.quadrants_offset
-  # Global quad ID
-  quad_id = offset + info_obj.quadid
+    # Load tree from global trees array, one-based indexing
+    tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1)
+    # Quadrant numbering offset of this quadrant
+    offset = tree.quadrants_offset
+    # Global quad ID
+    quad_id = offset + info_obj.quadid
 
-  # Unpack quadrant's user data ([global quad ID, controller_value])
-  ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data))
-  # Save global quad ID
-  unsafe_store!(ptr, quad_id, 1)
+    # Unpack quadrant's user data ([global quad ID, controller_value])
+    ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data))
+    # Save global quad ID
+    unsafe_store!(ptr, quad_id, 1)
 
-  return nothing
+    return nothing
 end
 
 # 2D
-cfunction(::typeof(save_original_id_iter_volume), ::Val{2}) = @cfunction(save_original_id_iter_volume, Cvoid, (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(save_original_id_iter_volume), ::Val{2})
+    @cfunction(save_original_id_iter_volume, Cvoid,
+               (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid}))
+end
 # 3D
-cfunction(::typeof(save_original_id_iter_volume), ::Val{3}) = @cfunction(save_original_id_iter_volume, Cvoid, (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(save_original_id_iter_volume), ::Val{3})
+    @cfunction(save_original_id_iter_volume, Cvoid,
+               (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid}))
+end
 
 # Copy old element IDs to each quad's user data storage
 function save_original_ids(mesh::P4estMesh)
-  iter_volume_c = cfunction(save_original_id_iter_volume, Val(ndims(mesh)))
+    iter_volume_c = cfunction(save_original_id_iter_volume, Val(ndims(mesh)))
 
-  iterate_p4est(mesh.p4est, C_NULL; iter_volume_c=iter_volume_c)
+    iterate_p4est(mesh.p4est, C_NULL; iter_volume_c = iter_volume_c)
 end
 
-
 # Extract information about which cells have been changed
 function collect_changed_iter_volume(info, user_data)
-  info_obj = unsafe_load(info)
-
-  # The original element ID has been saved to user_data before.
-  # Load original quad ID from quad's user data ([global quad ID, controller_value]).
-  quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data))
-  original_id = unsafe_load(quad_data_ptr, 1)
-
-  # original_id of cells that have been newly created is -1
-  if original_id >= 0
-    # Unpack user_data = original_cells
-    user_data_ptr = Ptr{Int}(user_data)
-
-    # If quad has an original_id, it existed before refinement/coarsening,
-    # and therefore wasn't changed.
-    # Mark original_id as "not changed during refinement/coarsening" in original_cells
-    unsafe_store!(user_data_ptr, 0, original_id + 1)
-  end
+    info_obj = unsafe_load(info)
+
+    # The original element ID has been saved to user_data before.
+    # Load original quad ID from quad's user data ([global quad ID, controller_value]).
+    quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data))
+    original_id = unsafe_load(quad_data_ptr, 1)
+
+    # original_id of cells that have been newly created is -1
+    if original_id >= 0
+        # Unpack user_data = original_cells
+        user_data_ptr = Ptr{Int}(user_data)
+
+        # If quad has an original_id, it existed before refinement/coarsening,
+        # and therefore wasn't changed.
+        # Mark original_id as "not changed during refinement/coarsening" in original_cells
+        unsafe_store!(user_data_ptr, 0, original_id + 1)
+    end
 
-  return nothing
+    return nothing
 end
 
 # 2D
-cfunction(::typeof(collect_changed_iter_volume), ::Val{2}) = @cfunction(collect_changed_iter_volume, Cvoid, (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(collect_changed_iter_volume), ::Val{2})
+    @cfunction(collect_changed_iter_volume, Cvoid,
+               (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid}))
+end
 # 3D
-cfunction(::typeof(collect_changed_iter_volume), ::Val{3}) = @cfunction(collect_changed_iter_volume, Cvoid, (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(collect_changed_iter_volume), ::Val{3})
+    @cfunction(collect_changed_iter_volume, Cvoid,
+               (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid}))
+end
 
 function collect_changed_cells(mesh::P4estMesh, original_n_cells)
-  original_cells = collect(1:original_n_cells)
+    original_cells = collect(1:original_n_cells)
 
-  # Iterate over all quads and set original cells that haven't been changed to zero
-  iter_volume_c = cfunction(collect_changed_iter_volume, Val(ndims(mesh)))
+    # Iterate over all quads and set original cells that haven't been changed to zero
+    iter_volume_c = cfunction(collect_changed_iter_volume, Val(ndims(mesh)))
 
-  iterate_p4est(mesh.p4est, original_cells; iter_volume_c=iter_volume_c)
+    iterate_p4est(mesh.p4est, original_cells; iter_volume_c = iter_volume_c)
 
-  # Changed cells are all that haven't been set to zero above
-  changed_original_cells = original_cells[original_cells .> 0]
+    # Changed cells are all that haven't been set to zero above
+    changed_original_cells = original_cells[original_cells .> 0]
 
-  return changed_original_cells
+    return changed_original_cells
 end
 
-
 # Extract newly created cells
 function collect_new_iter_volume(info, user_data)
-  info_obj = unsafe_load(info)
-
-  # The original element ID has been saved to user_data before.
-  # Unpack quadrant's user data ([global quad ID, controller_value]).
-  quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data))
-  original_id = unsafe_load(quad_data_ptr, 1)
-
-  # original_id of cells that have been newly created is -1
-  if original_id < 0
-    # Load tree from global trees array, one-based indexing
-    tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1)
-    # Quadrant numbering offset of this quadrant
-    offset = tree.quadrants_offset
-    # Global quad ID
-    quad_id = offset + info_obj.quadid
-
-    # Unpack user_data = original_cells
-    user_data_ptr = Ptr{Int}(user_data)
-
-    # Mark cell as "newly created during refinement/coarsening/balancing"
-    unsafe_store!(user_data_ptr, 1, quad_id + 1)
-  end
+    info_obj = unsafe_load(info)
+
+    # The original element ID has been saved to user_data before.
+    # Unpack quadrant's user data ([global quad ID, controller_value]).
+    quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data))
+    original_id = unsafe_load(quad_data_ptr, 1)
+
+    # original_id of cells that have been newly created is -1
+    if original_id < 0
+        # Load tree from global trees array, one-based indexing
+        tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1)
+        # Quadrant numbering offset of this quadrant
+        offset = tree.quadrants_offset
+        # Global quad ID
+        quad_id = offset + info_obj.quadid
+
+        # Unpack user_data = original_cells
+        user_data_ptr = Ptr{Int}(user_data)
+
+        # Mark cell as "newly created during refinement/coarsening/balancing"
+        unsafe_store!(user_data_ptr, 1, quad_id + 1)
+    end
 
-  return nothing
+    return nothing
 end
 
 # 2D
-cfunction(::typeof(collect_new_iter_volume), ::Val{2}) = @cfunction(collect_new_iter_volume, Cvoid, (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(collect_new_iter_volume), ::Val{2})
+    @cfunction(collect_new_iter_volume, Cvoid,
+               (Ptr{p4est_iter_volume_info_t}, Ptr{Cvoid}))
+end
 # 3D
-cfunction(::typeof(collect_new_iter_volume), ::Val{3}) = @cfunction(collect_new_iter_volume, Cvoid, (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(collect_new_iter_volume), ::Val{3})
+    @cfunction(collect_new_iter_volume, Cvoid,
+               (Ptr{p8est_iter_volume_info_t}, Ptr{Cvoid}))
+end
 
 function collect_new_cells(mesh::P4estMesh)
-  cell_is_new = zeros(Int, ncells(mesh))
+    cell_is_new = zeros(Int, ncells(mesh))
 
-  # Iterate over all quads and set original cells that have been changed to one
-  iter_volume_c = cfunction(collect_new_iter_volume, Val(ndims(mesh)))
+    # Iterate over all quads and set original cells that have been changed to one
+    iter_volume_c = cfunction(collect_new_iter_volume, Val(ndims(mesh)))
 
-  iterate_p4est(mesh.p4est, cell_is_new; iter_volume_c=iter_volume_c)
+    iterate_p4est(mesh.p4est, cell_is_new; iter_volume_c = iter_volume_c)
 
-  # Changed cells are all that haven't been set to zero above
-  new_cells = findall(==(1), cell_is_new)
+    # Changed cells are all that haven't been set to zero above
+    new_cells = findall(==(1), cell_is_new)
 
-  return new_cells
+    return new_cells
 end
-
-
 end # @muladd
diff --git a/src/meshes/parallel_tree.jl b/src/meshes/parallel_tree.jl
index 22f9e1e6975..83d99c4d110 100644
--- a/src/meshes/parallel_tree.jl
+++ b/src/meshes/parallel_tree.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Composite type that represents a NDIMS-dimensional tree (parallel version).
 #
@@ -26,211 +26,211 @@
 # way. Also, depth-first ordering *might* not by guaranteed during
 # refinement/coarsening operations.
 mutable struct ParallelTree{NDIMS} <: AbstractTree{NDIMS}
-  parent_ids::Vector{Int}
-  child_ids::Matrix{Int}
-  neighbor_ids::Matrix{Int}
-  levels::Vector{Int}
-  coordinates::Matrix{Float64}
-  original_cell_ids::Vector{Int}
-  mpi_ranks::Vector{Int}
-
-  capacity::Int
-  length::Int
-  dummy::Int
-
-  center_level_0::SVector{NDIMS, Float64}
-  length_level_0::Float64
-  periodicity::NTuple{NDIMS, Bool}
-
-  function ParallelTree{NDIMS}(capacity::Integer) where NDIMS
-    # Verify that NDIMS is an integer
-    @assert NDIMS isa Integer
-
-    # Create instance
-    t = new()
-
-    # Initialize fields with defaults
-    # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations
-    t.parent_ids = fill(typemin(Int), capacity + 1)
-    t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1)
-    t.neighbor_ids = fill(typemin(Int), 2*NDIMS, capacity + 1)
-    t.levels = fill(typemin(Int), capacity + 1)
-    t.coordinates = fill(NaN, NDIMS, capacity + 1)
-    t.original_cell_ids = fill(typemin(Int), capacity + 1)
-    t.mpi_ranks = fill(typemin(Int), capacity + 1)
-
-    t.capacity = capacity
-    t.length = 0
-    t.dummy = capacity + 1
-
-    t.center_level_0 = SVector(ntuple(_ -> NaN, NDIMS))
-    t.length_level_0 = NaN
-
-    return t
-  end
+    parent_ids::Vector{Int}
+    child_ids::Matrix{Int}
+    neighbor_ids::Matrix{Int}
+    levels::Vector{Int}
+    coordinates::Matrix{Float64}
+    original_cell_ids::Vector{Int}
+    mpi_ranks::Vector{Int}
+
+    capacity::Int
+    length::Int
+    dummy::Int
+
+    center_level_0::SVector{NDIMS, Float64}
+    length_level_0::Float64
+    periodicity::NTuple{NDIMS, Bool}
+
+    function ParallelTree{NDIMS}(capacity::Integer) where {NDIMS}
+        # Verify that NDIMS is an integer
+        @assert NDIMS isa Integer
+
+        # Create instance
+        t = new()
+
+        # Initialize fields with defaults
+        # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations
+        t.parent_ids = fill(typemin(Int), capacity + 1)
+        t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1)
+        t.neighbor_ids = fill(typemin(Int), 2 * NDIMS, capacity + 1)
+        t.levels = fill(typemin(Int), capacity + 1)
+        t.coordinates = fill(NaN, NDIMS, capacity + 1)
+        t.original_cell_ids = fill(typemin(Int), capacity + 1)
+        t.mpi_ranks = fill(typemin(Int), capacity + 1)
+
+        t.capacity = capacity
+        t.length = 0
+        t.dummy = capacity + 1
+
+        t.center_level_0 = SVector(ntuple(_ -> NaN, NDIMS))
+        t.length_level_0 = NaN
+
+        return t
+    end
 end
 
-
 # Constructor for passing the dimension as an argument
-ParallelTree(::Val{NDIMS}, args...) where NDIMS = ParallelTree{NDIMS}(args...)
+ParallelTree(::Val{NDIMS}, args...) where {NDIMS} = ParallelTree{NDIMS}(args...)
 
 # Create and initialize tree
 function ParallelTree{NDIMS}(capacity::Int, center::AbstractArray{Float64},
-                 length::Real, periodicity=true) where NDIMS
-  # Create instance
-  t = ParallelTree{NDIMS}(capacity)
+                             length::Real, periodicity = true) where {NDIMS}
+    # Create instance
+    t = ParallelTree{NDIMS}(capacity)
 
-  # Initialize root cell
-  init!(t, center, length, periodicity)
+    # Initialize root cell
+    init!(t, center, length, periodicity)
 
-  return t
+    return t
 end
 
 # Constructor accepting a single number as center (as opposed to an array) for 1D
-ParallelTree{1}(cap::Int, center::Real, len::Real, periodicity=true) = ParallelTree{1}(cap, [convert(Float64, center)], len, periodicity)
-
+function ParallelTree{1}(cap::Int, center::Real, len::Real, periodicity = true)
+    ParallelTree{1}(cap, [convert(Float64, center)], len, periodicity)
+end
 
 # Clear tree with deleting data structures, store center and length, and create root cell
-function init!(t::ParallelTree, center::AbstractArray{Float64}, length::Real, periodicity=true)
-  clear!(t)
-
-  # Set domain information
-  t.center_level_0 = center
-  t.length_level_0 = length
-
-  # Create root cell
-  t.length += 1
-  t.parent_ids[1] = 0
-  t.child_ids[:, 1] .= 0
-  t.levels[1] = 0
-  set_cell_coordinates!(t, t.center_level_0, 1)
-  t.original_cell_ids[1] = 0
-  t.mpi_ranks[1] = typemin(Int)
-
-  # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor
-  if all(periodicity)
-    # Also catches case where periodicity = true
-    t.neighbor_ids[:, 1] .= 1
-    t.periodicity = ntuple(x->true, ndims(t))
-  elseif !any(periodicity)
-    # Also catches case where periodicity = false
-    t.neighbor_ids[:, 1] .= 0
-    t.periodicity = ntuple(x->false, ndims(t))
-  else
-    # Default case if periodicity is an iterable
-    for dimension in 1:ndims(t)
-      if periodicity[dimension]
-        t.neighbor_ids[2 * dimension - 1, 1] = 1
-        t.neighbor_ids[2 * dimension - 0, 1] = 1
-      else
-        t.neighbor_ids[2 * dimension - 1, 1] = 0
-        t.neighbor_ids[2 * dimension - 0, 1] = 0
-      end
+function init!(t::ParallelTree, center::AbstractArray{Float64}, length::Real,
+               periodicity = true)
+    clear!(t)
+
+    # Set domain information
+    t.center_level_0 = center
+    t.length_level_0 = length
+
+    # Create root cell
+    t.length += 1
+    t.parent_ids[1] = 0
+    t.child_ids[:, 1] .= 0
+    t.levels[1] = 0
+    set_cell_coordinates!(t, t.center_level_0, 1)
+    t.original_cell_ids[1] = 0
+    t.mpi_ranks[1] = typemin(Int)
+
+    # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor
+    if all(periodicity)
+        # Also catches case where periodicity = true
+        t.neighbor_ids[:, 1] .= 1
+        t.periodicity = ntuple(x -> true, ndims(t))
+    elseif !any(periodicity)
+        # Also catches case where periodicity = false
+        t.neighbor_ids[:, 1] .= 0
+        t.periodicity = ntuple(x -> false, ndims(t))
+    else
+        # Default case if periodicity is an iterable
+        for dimension in 1:ndims(t)
+            if periodicity[dimension]
+                t.neighbor_ids[2 * dimension - 1, 1] = 1
+                t.neighbor_ids[2 * dimension - 0, 1] = 1
+            else
+                t.neighbor_ids[2 * dimension - 1, 1] = 0
+                t.neighbor_ids[2 * dimension - 0, 1] = 0
+            end
+        end
+
+        t.periodicity = Tuple(periodicity)
     end
-
-    t.periodicity = Tuple(periodicity)
-  end
 end
 
-
 # Convenience output for debugging
 function Base.show(io::IO, ::MIME"text/plain", t::ParallelTree)
-  @nospecialize t # reduce precompilation time
-
-  l = t.length
-  println(io, '*'^20)
-  println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])")
-  println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))")
-  println(io, "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))")
-  println(io, "t.levels[1:l] = $(t.levels[1:l])")
-  println(io, "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))")
-  println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])")
-  println(io, "t.mpi_ranks[1:l] = $(t.mpi_ranks[1:l])")
-  println(io, "t.capacity = $(t.capacity)")
-  println(io, "t.length = $(t.length)")
-  println(io, "t.dummy = $(t.dummy)")
-  println(io, "t.center_level_0 = $(t.center_level_0)")
-  println(io, "t.length_level_0 = $(t.length_level_0)")
-  println(io, '*'^20)
+    @nospecialize t # reduce precompilation time
+
+    l = t.length
+    println(io, '*'^20)
+    println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])")
+    println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))")
+    println(io,
+            "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))")
+    println(io, "t.levels[1:l] = $(t.levels[1:l])")
+    println(io,
+            "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))")
+    println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])")
+    println(io, "t.mpi_ranks[1:l] = $(t.mpi_ranks[1:l])")
+    println(io, "t.capacity = $(t.capacity)")
+    println(io, "t.length = $(t.length)")
+    println(io, "t.dummy = $(t.dummy)")
+    println(io, "t.center_level_0 = $(t.center_level_0)")
+    println(io, "t.length_level_0 = $(t.length_level_0)")
+    println(io, '*'^20)
 end
 
-
 # Check if cell is own cell, i.e., belongs to this MPI rank
 is_own_cell(t::ParallelTree, cell_id) = t.mpi_ranks[cell_id] == mpi_rank()
 
-
 # Return an array with the ids of all leaf cells for a given rank
-leaf_cells_by_rank(t::ParallelTree, rank) = filter_leaf_cells(t) do cell_id
-                                              t.mpi_ranks[cell_id] == rank
-                                            end
+leaf_cells_by_rank(t::ParallelTree, rank) =
+    filter_leaf_cells(t) do cell_id
+        t.mpi_ranks[cell_id] == rank
+    end
 
 # Return an array with the ids of all local leaf cells
 local_leaf_cells(t::ParallelTree) = leaf_cells_by_rank(t, mpi_rank())
 
-
 # Set information for child cell `child_id` based on parent cell `cell_id` (except neighbors)
 function init_child!(t::ParallelTree, cell_id, child, child_id)
-  t.parent_ids[child_id] = cell_id
-  t.child_ids[child, cell_id] = child_id
-  t.child_ids[:, child_id] .= 0
-  t.levels[child_id] = t.levels[cell_id] + 1
-  set_cell_coordinates!(t,
-    child_coordinates(t, cell_coordinates(t, cell_id), length_at_cell(t, cell_id), child), child_id)
-  t.original_cell_ids[child_id] = 0
-  t.mpi_ranks[child_id] = t.mpi_ranks[cell_id]
-
-  return nothing
+    t.parent_ids[child_id] = cell_id
+    t.child_ids[child, cell_id] = child_id
+    t.child_ids[:, child_id] .= 0
+    t.levels[child_id] = t.levels[cell_id] + 1
+    set_cell_coordinates!(t,
+                          child_coordinates(t, cell_coordinates(t, cell_id),
+                                            length_at_cell(t, cell_id), child),
+                          child_id)
+    t.original_cell_ids[child_id] = 0
+    t.mpi_ranks[child_id] = t.mpi_ranks[cell_id]
+
+    return nothing
 end
 
-
 # Reset range of cells to values that are prone to cause errors as soon as they are used.
 #
 # Rationale: If an invalid cell is accidentally used, we want to know it as soon as possible.
 function invalidate!(t::ParallelTree, first::Int, last::Int)
-  @assert first > 0
-  @assert last <= t.capacity + 1
-
-  # Integer values are set to smallest negative value, floating point values to NaN
-  t.parent_ids[first:last] .= typemin(Int)
-  t.child_ids[:, first:last] .= typemin(Int)
-  t.neighbor_ids[:, first:last] .= typemin(Int)
-  t.levels[first:last] .= typemin(Int)
-  t.coordinates[:, first:last] .= NaN
-  t.original_cell_ids[first:last] .= typemin(Int)
-  t.mpi_ranks[first:last] .= typemin(Int)
-
-  return nothing
+    @assert first > 0
+    @assert last <= t.capacity + 1
+
+    # Integer values are set to smallest negative value, floating point values to NaN
+    t.parent_ids[first:last] .= typemin(Int)
+    t.child_ids[:, first:last] .= typemin(Int)
+    t.neighbor_ids[:, first:last] .= typemin(Int)
+    t.levels[first:last] .= typemin(Int)
+    t.coordinates[:, first:last] .= NaN
+    t.original_cell_ids[first:last] .= typemin(Int)
+    t.mpi_ranks[first:last] .= typemin(Int)
+
+    return nothing
 end
 
-
 # Raw copy operation for ranges of cells.
 #
 # This method is used by the higher-level copy operations for AbstractContainer
-function raw_copy!(target::ParallelTree, source::ParallelTree, first::Int, last::Int, destination::Int)
-  copy_data!(target.parent_ids, source.parent_ids, first, last, destination)
-  copy_data!(target.child_ids, source.child_ids, first, last, destination,
-             n_children_per_cell(target))
-  copy_data!(target.neighbor_ids, source.neighbor_ids, first, last,
-             destination, n_directions(target))
-  copy_data!(target.levels, source.levels, first, last, destination)
-  copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target))
-  copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination)
-  copy_data!(target.mpi_ranks, source.mpi_ranks, first, last, destination)
+function raw_copy!(target::ParallelTree, source::ParallelTree, first::Int, last::Int,
+                   destination::Int)
+    copy_data!(target.parent_ids, source.parent_ids, first, last, destination)
+    copy_data!(target.child_ids, source.child_ids, first, last, destination,
+               n_children_per_cell(target))
+    copy_data!(target.neighbor_ids, source.neighbor_ids, first, last,
+               destination, n_directions(target))
+    copy_data!(target.levels, source.levels, first, last, destination)
+    copy_data!(target.coordinates, source.coordinates, first, last, destination,
+               ndims(target))
+    copy_data!(target.original_cell_ids, source.original_cell_ids, first, last,
+               destination)
+    copy_data!(target.mpi_ranks, source.mpi_ranks, first, last, destination)
 end
 
-
 # Reset data structures by recreating all internal storage containers and invalidating all elements
-function reset_data_structures!(t::ParallelTree{NDIMS}) where NDIMS
-  t.parent_ids = Vector{Int}(undef, t.capacity + 1)
-  t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1)
-  t.neighbor_ids = Matrix{Int}(undef, 2*NDIMS, t.capacity + 1)
-  t.levels = Vector{Int}(undef, t.capacity + 1)
-  t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1)
-  t.original_cell_ids = Vector{Int}(undef, t.capacity + 1)
-  t.mpi_ranks = Vector{Int}(undef, t.capacity + 1)
-
-  invalidate!(t, 1, capacity(t) + 1)
+function reset_data_structures!(t::ParallelTree{NDIMS}) where {NDIMS}
+    t.parent_ids = Vector{Int}(undef, t.capacity + 1)
+    t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1)
+    t.neighbor_ids = Matrix{Int}(undef, 2 * NDIMS, t.capacity + 1)
+    t.levels = Vector{Int}(undef, t.capacity + 1)
+    t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1)
+    t.original_cell_ids = Vector{Int}(undef, t.capacity + 1)
+    t.mpi_ranks = Vector{Int}(undef, t.capacity + 1)
+
+    invalidate!(t, 1, capacity(t) + 1)
 end
-
-
 end # @muladd
diff --git a/src/meshes/parallel_tree_mesh.jl b/src/meshes/parallel_tree_mesh.jl
index 0bad9befedf..050e419680c 100644
--- a/src/meshes/parallel_tree_mesh.jl
+++ b/src/meshes/parallel_tree_mesh.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     partition!(mesh::ParallelTreeMesh, allow_coarsening=true)
@@ -13,91 +13,91 @@ based on leaf cell count and tree structure.
 If `allow_coarsening` is `true`, the algorithm will keep leaf cells together
 on one rank when needed for local coarsening (i.e. when all children of a cell are leaves).
 """
-function partition!(mesh::ParallelTreeMesh; allow_coarsening=true)
-  # Determine number of leaf cells per rank
-  leaves = leaf_cells(mesh.tree)
-  @assert length(leaves) > mpi_nranks() "Too many ranks to properly partition the mesh!"
-  n_leaves_per_rank = OffsetArray(fill(div(length(leaves), mpi_nranks()), mpi_nranks()),
-                                  0:(mpi_nranks() - 1))
-  for d in 0:(rem(length(leaves), mpi_nranks()) - 1)
-    n_leaves_per_rank[d] += 1
-  end
-  @assert sum(n_leaves_per_rank) == length(leaves)
-
-  # Assign MPI ranks to all cells such that all ancestors of each cell - if not yet assigned to a
-  # rank - belong to the same rank
-  mesh.first_cell_by_rank = similar(n_leaves_per_rank)
-  mesh.n_cells_by_rank = similar(n_leaves_per_rank)
-
-  leaf_count = 0
-  mesh.first_cell_by_rank[0] = 1
-  # Iterate over all ranks
-  for d in 0:(mpi_nranks() - 1)
-    leaf_count += n_leaves_per_rank[d]
-    last_id = leaves[leaf_count]
-    parent_id = mesh.tree.parent_ids[last_id]
-
-    # Check if all children of the last parent are leaves
-    if allow_coarsening &&
-        all(id -> is_leaf(mesh.tree, id), @view mesh.tree.child_ids[:, parent_id]) &&
-        d < length(n_leaves_per_rank) - 1
-
-      # To keep children of parent together if they are all leaves,
-      # all children are added to this rank
-      additional_cells = (last_id+1):mesh.tree.child_ids[end, parent_id]
-      if length(additional_cells) > 0
-        last_id = additional_cells[end]
-
-        additional_leaves = count(id -> is_leaf(mesh.tree, id), additional_cells)
-        leaf_count += additional_leaves
-        # Add leaves to this rank, remove from next rank
-        n_leaves_per_rank[d] += additional_leaves
-        n_leaves_per_rank[d+1] -= additional_leaves
-      end
+function partition!(mesh::ParallelTreeMesh; allow_coarsening = true)
+    # Determine number of leaf cells per rank
+    leaves = leaf_cells(mesh.tree)
+    @assert length(leaves)>mpi_nranks() "Too many ranks to properly partition the mesh!"
+    n_leaves_per_rank = OffsetArray(fill(div(length(leaves), mpi_nranks()),
+                                         mpi_nranks()),
+                                    0:(mpi_nranks() - 1))
+    for d in 0:(rem(length(leaves), mpi_nranks()) - 1)
+        n_leaves_per_rank[d] += 1
     end
-
-    @assert all(n -> n > 0, n_leaves_per_rank) "Too many ranks to properly partition the mesh!"
-
-    mesh.n_cells_by_rank[d] = last_id - mesh.first_cell_by_rank[d] + 1
-    mesh.tree.mpi_ranks[mesh.first_cell_by_rank[d]:last_id] .= d
-
-    # Set first cell of next rank
-    if d < length(n_leaves_per_rank) - 1
-      mesh.first_cell_by_rank[d+1] = mesh.first_cell_by_rank[d] + mesh.n_cells_by_rank[d]
+    @assert sum(n_leaves_per_rank) == length(leaves)
+
+    # Assign MPI ranks to all cells such that all ancestors of each cell - if not yet assigned to a
+    # rank - belong to the same rank
+    mesh.first_cell_by_rank = similar(n_leaves_per_rank)
+    mesh.n_cells_by_rank = similar(n_leaves_per_rank)
+
+    leaf_count = 0
+    mesh.first_cell_by_rank[0] = 1
+    # Iterate over all ranks
+    for d in 0:(mpi_nranks() - 1)
+        leaf_count += n_leaves_per_rank[d]
+        last_id = leaves[leaf_count]
+        parent_id = mesh.tree.parent_ids[last_id]
+
+        # Check if all children of the last parent are leaves
+        if allow_coarsening &&
+           all(id -> is_leaf(mesh.tree, id), @view mesh.tree.child_ids[:, parent_id]) &&
+           d < length(n_leaves_per_rank) - 1
+
+            # To keep children of parent together if they are all leaves,
+            # all children are added to this rank
+            additional_cells = (last_id + 1):mesh.tree.child_ids[end, parent_id]
+            if length(additional_cells) > 0
+                last_id = additional_cells[end]
+
+                additional_leaves = count(id -> is_leaf(mesh.tree, id),
+                                          additional_cells)
+                leaf_count += additional_leaves
+                # Add leaves to this rank, remove from next rank
+                n_leaves_per_rank[d] += additional_leaves
+                n_leaves_per_rank[d + 1] -= additional_leaves
+            end
+        end
+
+        @assert all(n -> n > 0, n_leaves_per_rank) "Too many ranks to properly partition the mesh!"
+
+        mesh.n_cells_by_rank[d] = last_id - mesh.first_cell_by_rank[d] + 1
+        mesh.tree.mpi_ranks[mesh.first_cell_by_rank[d]:last_id] .= d
+
+        # Set first cell of next rank
+        if d < length(n_leaves_per_rank) - 1
+            mesh.first_cell_by_rank[d + 1] = mesh.first_cell_by_rank[d] +
+                                             mesh.n_cells_by_rank[d]
+        end
     end
-  end
 
-  @assert all(x->x >= 0, mesh.tree.mpi_ranks[1:length(mesh.tree)])
-  @assert sum(mesh.n_cells_by_rank) == length(mesh.tree)
+    @assert all(x -> x >= 0, mesh.tree.mpi_ranks[1:length(mesh.tree)])
+    @assert sum(mesh.n_cells_by_rank) == length(mesh.tree)
 
-  return nothing
+    return nothing
 end
 
-
 function get_restart_mesh_filename(restart_filename, mpi_parallel::True)
-  # Get directory name
-  dirname, _ = splitdir(restart_filename)
-
-  if mpi_isroot()
-    # Read mesh filename from restart file
-    mesh_file = ""
-    h5open(restart_filename, "r") do file
-      mesh_file = read(attributes(file)["mesh_file"])
+    # Get directory name
+    dirname, _ = splitdir(restart_filename)
+
+    if mpi_isroot()
+        # Read mesh filename from restart file
+        mesh_file = ""
+        h5open(restart_filename, "r") do file
+            mesh_file = read(attributes(file)["mesh_file"])
+        end
+
+        buffer = Vector{UInt8}(mesh_file)
+        MPI.Bcast!(Ref(length(buffer)), mpi_root(), mpi_comm())
+        MPI.Bcast!(buffer, mpi_root(), mpi_comm())
+    else # non-root ranks
+        count = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())
+        buffer = Vector{UInt8}(undef, count[])
+        MPI.Bcast!(buffer, mpi_root(), mpi_comm())
+        mesh_file = String(buffer)
     end
 
-    buffer = Vector{UInt8}(mesh_file)
-    MPI.Bcast!(Ref(length(buffer)), mpi_root(), mpi_comm())
-    MPI.Bcast!(buffer, mpi_root(), mpi_comm())
-  else # non-root ranks
-    count = MPI.Bcast!(Ref(0), mpi_root(), mpi_comm())
-    buffer = Vector{UInt8}(undef, count[])
-    MPI.Bcast!(buffer, mpi_root(), mpi_comm())
-    mesh_file = String(buffer)
-  end
-
-  # Construct and return filename
-  return joinpath(dirname, mesh_file)
+    # Construct and return filename
+    return joinpath(dirname, mesh_file)
 end
-
-
 end # @muladd
diff --git a/src/meshes/serial_tree.jl b/src/meshes/serial_tree.jl
index a6d9eff37fc..143ac19f6ee 100644
--- a/src/meshes/serial_tree.jl
+++ b/src/meshes/serial_tree.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Composite type that represents a NDIMS-dimensional tree (serial version).
 #
@@ -26,190 +26,191 @@
 # way. Also, depth-first ordering *might* not by guaranteed during
 # refinement/coarsening operations.
 mutable struct SerialTree{NDIMS} <: AbstractTree{NDIMS}
-  parent_ids::Vector{Int}
-  child_ids::Matrix{Int}
-  neighbor_ids::Matrix{Int}
-  levels::Vector{Int}
-  coordinates::Matrix{Float64}
-  original_cell_ids::Vector{Int}
-
-  capacity::Int
-  length::Int
-  dummy::Int
-
-  center_level_0::SVector{NDIMS, Float64}
-  length_level_0::Float64
-  periodicity::NTuple{NDIMS, Bool}
-
-  function SerialTree{NDIMS}(capacity::Integer) where NDIMS
-    # Verify that NDIMS is an integer
-    @assert NDIMS isa Integer
-
-    # Create instance
-    t = new()
-
-    # Initialize fields with defaults
-    # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations
-    t.parent_ids = fill(typemin(Int), capacity + 1)
-    t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1)
-    t.neighbor_ids = fill(typemin(Int), 2*NDIMS, capacity + 1)
-    t.levels = fill(typemin(Int), capacity + 1)
-    t.coordinates = fill(NaN, NDIMS, capacity + 1)
-    t.original_cell_ids = fill(typemin(Int), capacity + 1)
-
-    t.capacity = capacity
-    t.length = 0
-    t.dummy = capacity + 1
-
-    t.center_level_0 = SVector(ntuple(_ -> NaN, NDIMS))
-    t.length_level_0 = NaN
-
-    return t
-  end
+    parent_ids::Vector{Int}
+    child_ids::Matrix{Int}
+    neighbor_ids::Matrix{Int}
+    levels::Vector{Int}
+    coordinates::Matrix{Float64}
+    original_cell_ids::Vector{Int}
+
+    capacity::Int
+    length::Int
+    dummy::Int
+
+    center_level_0::SVector{NDIMS, Float64}
+    length_level_0::Float64
+    periodicity::NTuple{NDIMS, Bool}
+
+    function SerialTree{NDIMS}(capacity::Integer) where {NDIMS}
+        # Verify that NDIMS is an integer
+        @assert NDIMS isa Integer
+
+        # Create instance
+        t = new()
+
+        # Initialize fields with defaults
+        # Note: length as capacity + 1 is to use `capacity + 1` as temporary storage for swap operations
+        t.parent_ids = fill(typemin(Int), capacity + 1)
+        t.child_ids = fill(typemin(Int), 2^NDIMS, capacity + 1)
+        t.neighbor_ids = fill(typemin(Int), 2 * NDIMS, capacity + 1)
+        t.levels = fill(typemin(Int), capacity + 1)
+        t.coordinates = fill(NaN, NDIMS, capacity + 1)
+        t.original_cell_ids = fill(typemin(Int), capacity + 1)
+
+        t.capacity = capacity
+        t.length = 0
+        t.dummy = capacity + 1
+
+        t.center_level_0 = SVector(ntuple(_ -> NaN, NDIMS))
+        t.length_level_0 = NaN
+
+        return t
+    end
 end
 
-
 # Constructor for passing the dimension as an argument
-SerialTree(::Val{NDIMS}, args...) where NDIMS = SerialTree{NDIMS}(args...)
+SerialTree(::Val{NDIMS}, args...) where {NDIMS} = SerialTree{NDIMS}(args...)
 
 # Create and initialize tree
 function SerialTree{NDIMS}(capacity::Int, center::AbstractArray{Float64},
-                 length::Real, periodicity=true) where NDIMS
-  # Create instance
-  t = SerialTree{NDIMS}(capacity)
+                           length::Real, periodicity = true) where {NDIMS}
+    # Create instance
+    t = SerialTree{NDIMS}(capacity)
 
-  # Initialize root cell
-  init!(t, center, length, periodicity)
+    # Initialize root cell
+    init!(t, center, length, periodicity)
 
-  return t
+    return t
 end
 
 # Constructor accepting a single number as center (as opposed to an array) for 1D
-SerialTree{1}(cap::Int, center::Real, len::Real, periodicity=true) = SerialTree{1}(cap, [convert(Float64, center)], len, periodicity)
-
+function SerialTree{1}(cap::Int, center::Real, len::Real, periodicity = true)
+    SerialTree{1}(cap, [convert(Float64, center)], len, periodicity)
+end
 
 # Clear tree with deleting data structures, store center and length, and create root cell
-function init!(t::SerialTree, center::AbstractArray{Float64}, length::Real, periodicity=true)
-  clear!(t)
-
-  # Set domain information
-  t.center_level_0 = center
-  t.length_level_0 = length
-
-  # Create root cell
-  t.length += 1
-  t.parent_ids[1] = 0
-  t.child_ids[:, 1] .= 0
-  t.levels[1] = 0
-  set_cell_coordinates!(t, t.center_level_0, 1)
-  t.original_cell_ids[1] = 0
-
-  # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor
-  if all(periodicity)
-    # Also catches case where periodicity = true
-    t.neighbor_ids[:, 1] .= 1
-    t.periodicity = ntuple(x->true, ndims(t))
-  elseif !any(periodicity)
-    # Also catches case where periodicity = false
-    t.neighbor_ids[:, 1] .= 0
-    t.periodicity = ntuple(x->false, ndims(t))
-  else
-    # Default case if periodicity is an iterable
-    for dimension in 1:ndims(t)
-      if periodicity[dimension]
-        t.neighbor_ids[2 * dimension - 1, 1] = 1
-        t.neighbor_ids[2 * dimension - 0, 1] = 1
-      else
-        t.neighbor_ids[2 * dimension - 1, 1] = 0
-        t.neighbor_ids[2 * dimension - 0, 1] = 0
-      end
+function init!(t::SerialTree, center::AbstractArray{Float64}, length::Real,
+               periodicity = true)
+    clear!(t)
+
+    # Set domain information
+    t.center_level_0 = center
+    t.length_level_0 = length
+
+    # Create root cell
+    t.length += 1
+    t.parent_ids[1] = 0
+    t.child_ids[:, 1] .= 0
+    t.levels[1] = 0
+    set_cell_coordinates!(t, t.center_level_0, 1)
+    t.original_cell_ids[1] = 0
+
+    # Set neighbor ids: for each periodic direction, the level-0 cell is its own neighbor
+    if all(periodicity)
+        # Also catches case where periodicity = true
+        t.neighbor_ids[:, 1] .= 1
+        t.periodicity = ntuple(x -> true, ndims(t))
+    elseif !any(periodicity)
+        # Also catches case where periodicity = false
+        t.neighbor_ids[:, 1] .= 0
+        t.periodicity = ntuple(x -> false, ndims(t))
+    else
+        # Default case if periodicity is an iterable
+        for dimension in 1:ndims(t)
+            if periodicity[dimension]
+                t.neighbor_ids[2 * dimension - 1, 1] = 1
+                t.neighbor_ids[2 * dimension - 0, 1] = 1
+            else
+                t.neighbor_ids[2 * dimension - 1, 1] = 0
+                t.neighbor_ids[2 * dimension - 0, 1] = 0
+            end
+        end
+
+        t.periodicity = Tuple(periodicity)
     end
-
-    t.periodicity = Tuple(periodicity)
-  end
 end
 
-
 # Convenience output for debugging
 function Base.show(io::IO, ::MIME"text/plain", t::SerialTree)
-  @nospecialize t # reduce precompilation time
-
-  l = t.length
-  println(io, '*'^20)
-  println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])")
-  println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))")
-  println(io, "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))")
-  println(io, "t.levels[1:l] = $(t.levels[1:l])")
-  println(io, "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))")
-  println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])")
-  println(io, "t.capacity = $(t.capacity)")
-  println(io, "t.length = $(t.length)")
-  println(io, "t.dummy = $(t.dummy)")
-  println(io, "t.center_level_0 = $(t.center_level_0)")
-  println(io, "t.length_level_0 = $(t.length_level_0)")
-  println(io, '*'^20)
+    @nospecialize t # reduce precompilation time
+
+    l = t.length
+    println(io, '*'^20)
+    println(io, "t.parent_ids[1:l] = $(t.parent_ids[1:l])")
+    println(io, "transpose(t.child_ids[:, 1:l]) = $(transpose(t.child_ids[:, 1:l]))")
+    println(io,
+            "transpose(t.neighbor_ids[:, 1:l]) = $(transpose(t.neighbor_ids[:, 1:l]))")
+    println(io, "t.levels[1:l] = $(t.levels[1:l])")
+    println(io,
+            "transpose(t.coordinates[:, 1:l]) = $(transpose(t.coordinates[:, 1:l]))")
+    println(io, "t.original_cell_ids[1:l] = $(t.original_cell_ids[1:l])")
+    println(io, "t.capacity = $(t.capacity)")
+    println(io, "t.length = $(t.length)")
+    println(io, "t.dummy = $(t.dummy)")
+    println(io, "t.center_level_0 = $(t.center_level_0)")
+    println(io, "t.length_level_0 = $(t.length_level_0)")
+    println(io, '*'^20)
 end
 
-
 # Set information for child cell `child_id` based on parent cell `cell_id` (except neighbors)
 function init_child!(t::SerialTree, cell_id, child, child_id)
-  t.parent_ids[child_id] = cell_id
-  t.child_ids[child, cell_id] = child_id
-  t.child_ids[:, child_id] .= 0
-  t.levels[child_id] = t.levels[cell_id] + 1
-  set_cell_coordinates!(t,
-    child_coordinates(t, cell_coordinates(t, cell_id), length_at_cell(t, cell_id), child), child_id)
-  t.original_cell_ids[child_id] = 0
-
-  return nothing
+    t.parent_ids[child_id] = cell_id
+    t.child_ids[child, cell_id] = child_id
+    t.child_ids[:, child_id] .= 0
+    t.levels[child_id] = t.levels[cell_id] + 1
+    set_cell_coordinates!(t,
+                          child_coordinates(t, cell_coordinates(t, cell_id),
+                                            length_at_cell(t, cell_id), child),
+                          child_id)
+    t.original_cell_ids[child_id] = 0
+
+    return nothing
 end
 
-
 # Reset range of cells to values that are prone to cause errors as soon as they are used.
 #
 # Rationale: If an invalid cell is accidentally used, we want to know it as soon as possible.
 function invalidate!(t::SerialTree, first::Int, last::Int)
-  @assert first > 0
-  @assert last <= t.capacity + 1
-
-  # Integer values are set to smallest negative value, floating point values to NaN
-  t.parent_ids[first:last] .= typemin(Int)
-  t.child_ids[:, first:last] .= typemin(Int)
-  t.neighbor_ids[:, first:last] .= typemin(Int)
-  t.levels[first:last] .= typemin(Int)
-  t.coordinates[:, first:last] .= NaN
-  t.original_cell_ids[first:last] .= typemin(Int)
-
-  return nothing
+    @assert first > 0
+    @assert last <= t.capacity + 1
+
+    # Integer values are set to smallest negative value, floating point values to NaN
+    t.parent_ids[first:last] .= typemin(Int)
+    t.child_ids[:, first:last] .= typemin(Int)
+    t.neighbor_ids[:, first:last] .= typemin(Int)
+    t.levels[first:last] .= typemin(Int)
+    t.coordinates[:, first:last] .= NaN
+    t.original_cell_ids[first:last] .= typemin(Int)
+
+    return nothing
 end
 
-
 # Raw copy operation for ranges of cells.
 #
 # This method is used by the higher-level copy operations for AbstractContainer
-function raw_copy!(target::SerialTree, source::SerialTree, first::Int, last::Int, destination::Int)
-  copy_data!(target.parent_ids, source.parent_ids, first, last, destination)
-  copy_data!(target.child_ids, source.child_ids, first, last, destination,
-             n_children_per_cell(target))
-  copy_data!(target.neighbor_ids, source.neighbor_ids, first, last,
-             destination, n_directions(target))
-  copy_data!(target.levels, source.levels, first, last, destination)
-  copy_data!(target.coordinates, source.coordinates, first, last, destination, ndims(target))
-  copy_data!(target.original_cell_ids, source.original_cell_ids, first, last, destination)
+function raw_copy!(target::SerialTree, source::SerialTree, first::Int, last::Int,
+                   destination::Int)
+    copy_data!(target.parent_ids, source.parent_ids, first, last, destination)
+    copy_data!(target.child_ids, source.child_ids, first, last, destination,
+               n_children_per_cell(target))
+    copy_data!(target.neighbor_ids, source.neighbor_ids, first, last,
+               destination, n_directions(target))
+    copy_data!(target.levels, source.levels, first, last, destination)
+    copy_data!(target.coordinates, source.coordinates, first, last, destination,
+               ndims(target))
+    copy_data!(target.original_cell_ids, source.original_cell_ids, first, last,
+               destination)
 end
 
-
 # Reset data structures by recreating all internal storage containers and invalidating all elements
-function reset_data_structures!(t::SerialTree{NDIMS}) where NDIMS
-  t.parent_ids = Vector{Int}(undef, t.capacity + 1)
-  t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1)
-  t.neighbor_ids = Matrix{Int}(undef, 2*NDIMS, t.capacity + 1)
-  t.levels = Vector{Int}(undef, t.capacity + 1)
-  t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1)
-  t.original_cell_ids = Vector{Int}(undef, t.capacity + 1)
-
-  invalidate!(t, 1, capacity(t) + 1)
+function reset_data_structures!(t::SerialTree{NDIMS}) where {NDIMS}
+    t.parent_ids = Vector{Int}(undef, t.capacity + 1)
+    t.child_ids = Matrix{Int}(undef, 2^NDIMS, t.capacity + 1)
+    t.neighbor_ids = Matrix{Int}(undef, 2 * NDIMS, t.capacity + 1)
+    t.levels = Vector{Int}(undef, t.capacity + 1)
+    t.coordinates = Matrix{Float64}(undef, NDIMS, t.capacity + 1)
+    t.original_cell_ids = Vector{Int}(undef, t.capacity + 1)
+
+    invalidate!(t, 1, capacity(t) + 1)
 end
-
-
 end # @muladd
diff --git a/src/meshes/structured_mesh.jl b/src/meshes/structured_mesh.jl
index 32c4b6cc459..5872681933a 100644
--- a/src/meshes/structured_mesh.jl
+++ b/src/meshes/structured_mesh.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     StructuredMesh{NDIMS} <: AbstractMesh{NDIMS}
@@ -13,16 +13,15 @@ A structured curved mesh.
 Different numbers of cells per dimension are possible and arbitrary functions
 can be used as domain faces.
 """
-mutable struct StructuredMesh{NDIMS, RealT<:Real} <: AbstractMesh{NDIMS}
-  cells_per_dimension::NTuple{NDIMS, Int}
-  mapping::Any # Not relevant for performance
-  mapping_as_string::String
-  periodicity::NTuple{NDIMS, Bool}
-  current_filename::String
-  unsaved_changes::Bool
+mutable struct StructuredMesh{NDIMS, RealT <: Real} <: AbstractMesh{NDIMS}
+    cells_per_dimension::NTuple{NDIMS, Int}
+    mapping::Any # Not relevant for performance
+    mapping_as_string::String
+    periodicity::NTuple{NDIMS, Bool}
+    current_filename::String
+    unsaved_changes::Bool
 end
 
-
 """
     StructuredMesh(cells_per_dimension, mapping; RealT=Float64, unsaved_changes=true, mapping_as_string=mapping2string(mapping, length(cells_per_dimension)))
 
@@ -44,25 +43,28 @@ Create a StructuredMesh of the given size and shape that uses `RealT` as coordin
                                The code string must define the mapping function with the name `mapping`.
                                This will be changed in the future, see https://github.com/trixi-framework/Trixi.jl/issues/541.
 """
-function StructuredMesh(cells_per_dimension, mapping; RealT=Float64, periodicity=true, unsaved_changes=true,
-                    mapping_as_string=mapping2string(mapping, length(cells_per_dimension)))
-  NDIMS = length(cells_per_dimension)
-
-  # Convert periodicity to a Tuple of a Bool for every dimension
-  if all(periodicity)
-    # Also catches case where periodicity = true
-    periodicity = ntuple(_->true, NDIMS)
-  elseif !any(periodicity)
-    # Also catches case where periodicity = false
-    periodicity = ntuple(_->false, NDIMS)
-  else
-    # Default case if periodicity is an iterable
-    periodicity = Tuple(periodicity)
-  end
-
-  return StructuredMesh{NDIMS, RealT}(Tuple(cells_per_dimension), mapping, mapping_as_string, periodicity, "", unsaved_changes)
-end
+function StructuredMesh(cells_per_dimension, mapping; RealT = Float64,
+                        periodicity = true, unsaved_changes = true,
+                        mapping_as_string = mapping2string(mapping,
+                                                           length(cells_per_dimension)))
+    NDIMS = length(cells_per_dimension)
+
+    # Convert periodicity to a Tuple of a Bool for every dimension
+    if all(periodicity)
+        # Also catches case where periodicity = true
+        periodicity = ntuple(_ -> true, NDIMS)
+    elseif !any(periodicity)
+        # Also catches case where periodicity = false
+        periodicity = ntuple(_ -> false, NDIMS)
+    else
+        # Default case if periodicity is an iterable
+        periodicity = Tuple(periodicity)
+    end
 
+    return StructuredMesh{NDIMS, RealT}(Tuple(cells_per_dimension), mapping,
+                                        mapping_as_string, periodicity, "",
+                                        unsaved_changes)
+end
 
 """
     StructuredMesh(cells_per_dimension, faces; RealT=Float64, unsaved_changes=true, faces_as_string=faces2string(faces))
@@ -83,28 +85,30 @@ Create a StructuredMesh of the given size and shape that uses `RealT` as coordin
 - `periodicity`: either a `Bool` deciding if all of the boundaries are periodic or an `NTuple{NDIMS, Bool}` deciding for
                  each dimension if the boundaries in this dimension are periodic.
 """
-function StructuredMesh(cells_per_dimension, faces::Tuple; RealT=Float64, periodicity=true)
-  NDIMS = length(cells_per_dimension)
+function StructuredMesh(cells_per_dimension, faces::Tuple; RealT = Float64,
+                        periodicity = true)
+    NDIMS = length(cells_per_dimension)
 
-  validate_faces(faces)
+    validate_faces(faces)
 
-  # Use the transfinite mapping with the correct number of arguments
-  mapping = transfinite_mapping(faces)
+    # Use the transfinite mapping with the correct number of arguments
+    mapping = transfinite_mapping(faces)
 
-  # Collect definitions of face functions in one string (separated by semicolons)
-  face2substring(face) = code_string(face, ntuple(_ -> Float64, NDIMS-1))
-  join_semicolon(strings) = join(strings, "; ")
+    # Collect definitions of face functions in one string (separated by semicolons)
+    face2substring(face) = code_string(face, ntuple(_ -> Float64, NDIMS - 1))
+    join_semicolon(strings) = join(strings, "; ")
 
-  faces_definition = faces .|> face2substring .|> string |> join_semicolon
+    faces_definition = faces .|> face2substring .|> string |> join_semicolon
 
-  # Include faces definition in `mapping_as_string` to allow for evaluation
-  # without knowing the face functions
-  mapping_as_string = "$faces_definition; faces = $(string(faces)); mapping = transfinite_mapping(faces)"
+    # Include faces definition in `mapping_as_string` to allow for evaluation
+    # without knowing the face functions
+    mapping_as_string = "$faces_definition; faces = $(string(faces)); mapping = transfinite_mapping(faces)"
 
-  return StructuredMesh(cells_per_dimension, mapping; RealT=RealT, periodicity=periodicity, mapping_as_string=mapping_as_string)
+    return StructuredMesh(cells_per_dimension, mapping; RealT = RealT,
+                          periodicity = periodicity,
+                          mapping_as_string = mapping_as_string)
 end
 
-
 """
     StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max; periodicity=true)
 
@@ -117,20 +121,24 @@ Create a StructuredMesh that represents a uncurved structured mesh with a rectan
 - `periodicity`: either a `Bool` deciding if all of the boundaries are periodic or an `NTuple{NDIMS, Bool}` deciding for
                  each dimension if the boundaries in this dimension are periodic.
 """
-function StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max; periodicity=true)
-  NDIMS = length(cells_per_dimension)
-  RealT = promote_type(eltype(coordinates_min), eltype(coordinates_max))
-
-  mapping = coordinates2mapping(coordinates_min, coordinates_max)
-  mapping_as_string = "coordinates_min = $coordinates_min; " *
-                      "coordinates_max = $coordinates_max; " *
-                      "mapping = coordinates2mapping(coordinates_min, coordinates_max)"
-  return StructuredMesh(cells_per_dimension, mapping; RealT=RealT, periodicity=periodicity, mapping_as_string=mapping_as_string)
+function StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max;
+                        periodicity = true)
+    NDIMS = length(cells_per_dimension)
+    RealT = promote_type(eltype(coordinates_min), eltype(coordinates_max))
+
+    mapping = coordinates2mapping(coordinates_min, coordinates_max)
+    mapping_as_string = "coordinates_min = $coordinates_min; " *
+                        "coordinates_max = $coordinates_max; " *
+                        "mapping = coordinates2mapping(coordinates_min, coordinates_max)"
+    return StructuredMesh(cells_per_dimension, mapping; RealT = RealT,
+                          periodicity = periodicity,
+                          mapping_as_string = mapping_as_string)
 end
 
-
 # Extract a string of the code that defines the mapping function
-mapping2string(mapping, ndims) = string(code_string(mapping, ntuple(_ -> Float64, ndims)))
+function mapping2string(mapping, ndims)
+    string(code_string(mapping, ntuple(_ -> Float64, ndims)))
+end
 
 # An internal function wrapping `CodeTracking.code_string` with additional
 # error checking to avoid some problems when calling this function in
@@ -138,171 +146,175 @@ mapping2string(mapping, ndims) = string(code_string(mapping, ntuple(_ -> Float64
 # - https://github.com/trixi-framework/Trixi.jl/issues/931
 # - https://github.com/trixi-framework/Trixi.jl/pull/1084
 function code_string(f, t)
-  try
-    return CodeTracking.code_string(f, t)
-  catch e
-    return ""
-  end
+    try
+        return CodeTracking.code_string(f, t)
+    catch e
+        return ""
+    end
 end
 
 # Interpolate linearly between left and right value where s should be between -1 and 1
-linear_interpolate(s, left_value, right_value) = 0.5 * ((1 - s) * left_value + (1 + s) * right_value)
-
+function linear_interpolate(s, left_value, right_value)
+    0.5 * ((1 - s) * left_value + (1 + s) * right_value)
+end
 
 # Convert min and max coordinates of a rectangle to the corresponding transformation mapping
 function coordinates2mapping(coordinates_min::NTuple{1}, coordinates_max::NTuple{1})
-  mapping(xi) = linear_interpolate(xi, coordinates_min[1], coordinates_max[1])
+    mapping(xi) = linear_interpolate(xi, coordinates_min[1], coordinates_max[1])
 end
 
 function coordinates2mapping(coordinates_min::NTuple{2}, coordinates_max::NTuple{2})
-  mapping(xi, eta) = SVector(linear_interpolate(xi,  coordinates_min[1], coordinates_max[1]),
-                             linear_interpolate(eta, coordinates_min[2], coordinates_max[2]))
+    function mapping(xi, eta)
+        SVector(linear_interpolate(xi, coordinates_min[1], coordinates_max[1]),
+                linear_interpolate(eta, coordinates_min[2], coordinates_max[2]))
+    end
 end
 
 function coordinates2mapping(coordinates_min::NTuple{3}, coordinates_max::NTuple{3})
-  mapping(xi, eta, zeta) = SVector(linear_interpolate(xi,   coordinates_min[1], coordinates_max[1]),
-                                   linear_interpolate(eta,  coordinates_min[2], coordinates_max[2]),
-                                   linear_interpolate(zeta, coordinates_min[3], coordinates_max[3]))
+    function mapping(xi, eta, zeta)
+        SVector(linear_interpolate(xi, coordinates_min[1], coordinates_max[1]),
+                linear_interpolate(eta, coordinates_min[2], coordinates_max[2]),
+                linear_interpolate(zeta, coordinates_min[3], coordinates_max[3]))
+    end
 end
 
-
 # In 1D
 # Linear mapping from the reference element to the domain described by the faces
 function linear_mapping(x, faces)
-  return linear_interpolate(x, faces[1](), faces[2]())
+    return linear_interpolate(x, faces[1](), faces[2]())
 end
 
-
 # In 2D
 # Bilinear mapping from the reference element to the domain described by the faces
 function bilinear_mapping(x, y, faces)
-  x1 = faces[1](-1) # Bottom left
-  x2 = faces[2](-1) # Bottom right
-  x3 = faces[1](1) # Top left
-  x4 = faces[2](1) # Top right
-
-  return 0.25 * (x1 * (1 - x) * (1 - y) +
-                 x2 * (1 + x) * (1 - y) +
-                 x3 * (1 - x) * (1 + y) +
-                 x4 * (1 + x) * (1 + y))
+    x1 = faces[1](-1) # Bottom left
+    x2 = faces[2](-1) # Bottom right
+    x3 = faces[1](1) # Top left
+    x4 = faces[2](1) # Top right
+
+    return 0.25 * (x1 * (1 - x) * (1 - y) +
+            x2 * (1 + x) * (1 - y) +
+            x3 * (1 - x) * (1 + y) +
+            x4 * (1 + x) * (1 + y))
 end
 
-
 # In 3D
 # Trilinear mapping from the reference element to the domain described by the faces
 function trilinear_mapping(x, y, z, faces)
-  x1 = faces[1](-1, -1) # mapped from (-1,-1,-1)
-  x2 = faces[2](-1, -1) # mapped from ( 1,-1,-1)
-  x3 = faces[1]( 1, -1) # mapped from (-1, 1,-1)
-  x4 = faces[2]( 1, -1) # mapped from ( 1, 1,-1)
-  x5 = faces[1](-1,  1) # mapped from (-1,-1, 1)
-  x6 = faces[2](-1,  1) # mapped from ( 1,-1, 1)
-  x7 = faces[1]( 1,  1) # mapped from (-1, 1, 1)
-  x8 = faces[2]( 1,  1) # mapped from ( 1, 1, 1)
-
-  return 0.125 * (x1 * (1 - x) * (1 - y) * (1 - z) +
-                  x2 * (1 + x) * (1 - y) * (1 - z) +
-                  x3 * (1 - x) * (1 + y) * (1 - z) +
-                  x4 * (1 + x) * (1 + y) * (1 - z) +
-                  x5 * (1 - x) * (1 - y) * (1 + z) +
-                  x6 * (1 + x) * (1 - y) * (1 + z) +
-                  x7 * (1 - x) * (1 + y) * (1 + z) +
-                  x8 * (1 + x) * (1 + y) * (1 + z) )
+    x1 = faces[1](-1, -1) # mapped from (-1,-1,-1)
+    x2 = faces[2](-1, -1) # mapped from ( 1,-1,-1)
+    x3 = faces[1](1, -1) # mapped from (-1, 1,-1)
+    x4 = faces[2](1, -1) # mapped from ( 1, 1,-1)
+    x5 = faces[1](-1, 1) # mapped from (-1,-1, 1)
+    x6 = faces[2](-1, 1) # mapped from ( 1,-1, 1)
+    x7 = faces[1](1, 1) # mapped from (-1, 1, 1)
+    x8 = faces[2](1, 1) # mapped from ( 1, 1, 1)
+
+    return 0.125 * (x1 * (1 - x) * (1 - y) * (1 - z) +
+            x2 * (1 + x) * (1 - y) * (1 - z) +
+            x3 * (1 - x) * (1 + y) * (1 - z) +
+            x4 * (1 + x) * (1 + y) * (1 - z) +
+            x5 * (1 - x) * (1 - y) * (1 + z) +
+            x6 * (1 + x) * (1 - y) * (1 + z) +
+            x7 * (1 - x) * (1 + y) * (1 + z) +
+            x8 * (1 + x) * (1 + y) * (1 + z))
 end
 
-
 # Use linear mapping in 1D
 transfinite_mapping(faces::NTuple{2, Any}) = x -> linear_mapping(x, faces)
 
 # In 2D
 # Transfinite mapping from the reference element to the domain described by the faces
 function transfinite_mapping(faces::NTuple{4, Any})
-  mapping(x, y) = (linear_interpolate(x, faces[1](y), faces[2](y)) +
-                   linear_interpolate(y, faces[3](x), faces[4](x)) -
-                   bilinear_mapping(x, y, faces))
+    function mapping(x, y)
+        (linear_interpolate(x, faces[1](y), faces[2](y)) +
+         linear_interpolate(y, faces[3](x), faces[4](x)) -
+         bilinear_mapping(x, y, faces))
+    end
 end
 
-
 # In 3D
 # Correction term for the Transfinite mapping
 function correction_term_3d(x, y, z, faces)
-  # Correction for x-terms
-  c_x = linear_interpolate(x, linear_interpolate(y, faces[3](-1, z), faces[4](-1, z)) +
-                              linear_interpolate(z, faces[5](-1, y), faces[6](-1, y)),
-                              linear_interpolate(y, faces[3]( 1, z), faces[4]( 1, z)) +
-                              linear_interpolate(z, faces[5]( 1, y), faces[6]( 1, y)) )
-
-  # Correction for y-terms
-  c_y = linear_interpolate(y, linear_interpolate(x, faces[1](-1,  z), faces[2](-1,  z)) +
-                              linear_interpolate(z, faces[5]( x, -1), faces[6]( x, -1)),
-                              linear_interpolate(x, faces[1]( 1,  z), faces[2]( 1,  z)) +
-                              linear_interpolate(z, faces[5]( x,  1), faces[6]( x,  1)) )
-
-  # Correction for x-terms
-  c_z = linear_interpolate(z, linear_interpolate(x, faces[1](y, -1), faces[2](y, -1)) +
-                              linear_interpolate(y, faces[3](x, -1), faces[4](x, -1)),
-                              linear_interpolate(x, faces[1](y,  1), faces[2](y,  1)) +
-                              linear_interpolate(y, faces[3](x,  1), faces[4](x,  1)) )
-
-  return 0.5 * (c_x + c_y + c_z)
+    # Correction for x-terms
+    c_x = linear_interpolate(x,
+                             linear_interpolate(y, faces[3](-1, z), faces[4](-1, z)) +
+                             linear_interpolate(z, faces[5](-1, y), faces[6](-1, y)),
+                             linear_interpolate(y, faces[3](1, z), faces[4](1, z)) +
+                             linear_interpolate(z, faces[5](1, y), faces[6](1, y)))
+
+    # Correction for y-terms
+    c_y = linear_interpolate(y,
+                             linear_interpolate(x, faces[1](-1, z), faces[2](-1, z)) +
+                             linear_interpolate(z, faces[5](x, -1), faces[6](x, -1)),
+                             linear_interpolate(x, faces[1](1, z), faces[2](1, z)) +
+                             linear_interpolate(z, faces[5](x, 1), faces[6](x, 1)))
+
+    # Correction for x-terms
+    c_z = linear_interpolate(z,
+                             linear_interpolate(x, faces[1](y, -1), faces[2](y, -1)) +
+                             linear_interpolate(y, faces[3](x, -1), faces[4](x, -1)),
+                             linear_interpolate(x, faces[1](y, 1), faces[2](y, 1)) +
+                             linear_interpolate(y, faces[3](x, 1), faces[4](x, 1)))
+
+    return 0.5 * (c_x + c_y + c_z)
 end
 
-
 # In 3D
 # Transfinite mapping from the reference element to the domain described by the faces
 function transfinite_mapping(faces::NTuple{6, Any})
-  mapping(x, y, z) =  (linear_interpolate(x, faces[1](y, z), faces[2](y, z)) +
-                       linear_interpolate(y, faces[3](x, z), faces[4](x, z)) +
-                       linear_interpolate(z, faces[5](x, y), faces[6](x, y)) -
-                       correction_term_3d(x, y, z, faces) +
-                       trilinear_mapping(x, y, z, faces))
+    function mapping(x, y, z)
+        (linear_interpolate(x, faces[1](y, z), faces[2](y, z)) +
+         linear_interpolate(y, faces[3](x, z), faces[4](x, z)) +
+         linear_interpolate(z, faces[5](x, y), faces[6](x, y)) -
+         correction_term_3d(x, y, z, faces) +
+         trilinear_mapping(x, y, z, faces))
+    end
 end
 
-
 function validate_faces(faces::NTuple{2, Any}) end
 
 function validate_faces(faces::NTuple{4, Any})
-  @assert faces[1](-1) ≈ faces[3](-1) "faces[1](-1) needs to match faces[3](-1) (bottom left corner)"
-  @assert faces[2](-1) ≈ faces[3]( 1) "faces[2](-1) needs to match faces[3](1) (bottom right corner)"
-  @assert faces[1]( 1) ≈ faces[4](-1) "faces[1](1) needs to match faces[4](-1) (top left corner)"
-  @assert faces[2]( 1) ≈ faces[4]( 1) "faces[2](1) needs to match faces[4](1) (top right corner)"
+    @assert faces[1](-1)≈faces[3](-1) "faces[1](-1) needs to match faces[3](-1) (bottom left corner)"
+    @assert faces[2](-1)≈faces[3](1) "faces[2](-1) needs to match faces[3](1) (bottom right corner)"
+    @assert faces[1](1)≈faces[4](-1) "faces[1](1) needs to match faces[4](-1) (top left corner)"
+    @assert faces[2](1)≈faces[4](1) "faces[2](1) needs to match faces[4](1) (top right corner)"
 end
 
 function validate_faces(faces::NTuple{6, Any})
-  @assert (faces[1](-1, -1) ≈
-           faces[3](-1, -1) ≈
-           faces[5](-1, -1)) "faces[1](-1, -1), faces[3](-1, -1) and faces[5](-1, -1) need to match at (-1, -1, -1) corner"
+    @assert (faces[1](-1, -1)≈
+             faces[3](-1, -1)≈
+             faces[5](-1, -1)) "faces[1](-1, -1), faces[3](-1, -1) and faces[5](-1, -1) need to match at (-1, -1, -1) corner"
 
-  @assert (faces[2](-1, -1) ≈
-           faces[3]( 1, -1) ≈
-           faces[5]( 1, -1)) "faces[2](-1, -1), faces[3](1, -1) and faces[5](1, -1) need to match at (1, -1, -1) corner"
+    @assert (faces[2](-1, -1)≈
+             faces[3](1, -1)≈
+             faces[5](1, -1)) "faces[2](-1, -1), faces[3](1, -1) and faces[5](1, -1) need to match at (1, -1, -1) corner"
 
-  @assert (faces[1]( 1, -1) ≈
-           faces[4](-1, -1) ≈
-           faces[5](-1,  1)) "faces[1](1, -1), faces[4](-1, -1) and faces[5](-1, 1) need to match at (-1, 1, -1) corner"
+    @assert (faces[1](1, -1)≈
+             faces[4](-1, -1)≈
+             faces[5](-1, 1)) "faces[1](1, -1), faces[4](-1, -1) and faces[5](-1, 1) need to match at (-1, 1, -1) corner"
 
-  @assert (faces[2]( 1, -1) ≈
-           faces[4]( 1, -1) ≈
-           faces[5]( 1,  1)) "faces[2](1, -1), faces[4](1, -1) and faces[5](1, 1) need to match at (1, 1, -1) corner"
+    @assert (faces[2](1, -1)≈
+             faces[4](1, -1)≈
+             faces[5](1, 1)) "faces[2](1, -1), faces[4](1, -1) and faces[5](1, 1) need to match at (1, 1, -1) corner"
 
-  @assert (faces[1](-1,  1) ≈
-           faces[3](-1,  1) ≈
-           faces[6](-1, -1)) "faces[1](-1, 1), faces[3](-1, 1) and faces[6](-1, -1) need to match at (-1, -1, 1) corner"
+    @assert (faces[1](-1, 1)≈
+             faces[3](-1, 1)≈
+             faces[6](-1, -1)) "faces[1](-1, 1), faces[3](-1, 1) and faces[6](-1, -1) need to match at (-1, -1, 1) corner"
 
-  @assert (faces[2](-1,  1) ≈
-           faces[3]( 1,  1) ≈
-           faces[6]( 1, -1)) "faces[2](-1, 1), faces[3](1, 1) and faces[6](1, -1) need to match at (1, -1, 1) corner"
+    @assert (faces[2](-1, 1)≈
+             faces[3](1, 1)≈
+             faces[6](1, -1)) "faces[2](-1, 1), faces[3](1, 1) and faces[6](1, -1) need to match at (1, -1, 1) corner"
 
-  @assert (faces[1]( 1,  1) ≈
-           faces[4](-1,  1) ≈
-           faces[6](-1,  1)) "faces[1](1, 1), faces[4](-1, 1) and faces[6](-1, 1) need to match at (-1, 1, 1) corner"
+    @assert (faces[1](1, 1)≈
+             faces[4](-1, 1)≈
+             faces[6](-1, 1)) "faces[1](1, 1), faces[4](-1, 1) and faces[6](-1, 1) need to match at (-1, 1, 1) corner"
 
-  @assert (faces[2]( 1,  1) ≈
-           faces[4]( 1,  1) ≈
-           faces[6]( 1,  1)) "faces[2](1, 1), faces[4](1, 1) and faces[6](1, 1) need to match at (1, 1, 1) corner"
+    @assert (faces[2](1, 1)≈
+             faces[4](1, 1)≈
+             faces[6](1, 1)) "faces[2](1, 1), faces[4](1, 1) and faces[6](1, 1) need to match at (1, 1, 1) corner"
 end
 
-
 # Check if mesh is periodic
 isperiodic(mesh::StructuredMesh) = all(mesh.periodicity)
 isperiodic(mesh::StructuredMesh, dimension) = mesh.periodicity[dimension]
@@ -314,28 +326,26 @@ Base.size(mesh::StructuredMesh, i) = mesh.cells_per_dimension[i]
 Base.axes(mesh::StructuredMesh) = map(Base.OneTo, mesh.cells_per_dimension)
 Base.axes(mesh::StructuredMesh, i) = Base.OneTo(mesh.cells_per_dimension[i])
 
-
 function Base.show(io::IO, mesh::StructuredMesh)
-  print(io, "StructuredMesh{", ndims(mesh), ", ", real(mesh), "}")
+    print(io, "StructuredMesh{", ndims(mesh), ", ", real(mesh), "}")
 end
 
-
 function Base.show(io::IO, ::MIME"text/plain", mesh::StructuredMesh)
-  if get(io, :compact, false)
-    show(io, mesh)
-  else
-    summary_header(io, "StructuredMesh{" * string(ndims(mesh)) * ", " * string(real(mesh)) * "}")
-    summary_line(io, "size", size(mesh))
-
-    summary_line(io, "mapping", "")
-    # Print code lines of mapping_as_string
-    mapping_lines = split(mesh.mapping_as_string, ";")
-    for i in eachindex(mapping_lines)
-      summary_line(increment_indent(io), "line $i", strip(mapping_lines[i]))
+    if get(io, :compact, false)
+        show(io, mesh)
+    else
+        summary_header(io,
+                       "StructuredMesh{" * string(ndims(mesh)) * ", " *
+                       string(real(mesh)) * "}")
+        summary_line(io, "size", size(mesh))
+
+        summary_line(io, "mapping", "")
+        # Print code lines of mapping_as_string
+        mapping_lines = split(mesh.mapping_as_string, ";")
+        for i in eachindex(mapping_lines)
+            summary_line(increment_indent(io), "line $i", strip(mapping_lines[i]))
+        end
+        summary_footer(io)
     end
-    summary_footer(io)
-  end
 end
-
-
 end # @muladd
diff --git a/src/meshes/surface_interpolant.jl b/src/meshes/surface_interpolant.jl
index ec38ee3f905..22d14e38c5c 100644
--- a/src/meshes/surface_interpolant.jl
+++ b/src/meshes/surface_interpolant.jl
@@ -3,125 +3,126 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 #     CurvedSurface{RealT<:Real}
 #
 # Contains the data needed to represent a curve with data points (x,y) as a Lagrange polynomial
 # interpolant written in barycentric form at a given set of nodes.
-struct CurvedSurface{RealT<:Real}
-  nodes               ::Vector{RealT}
-  barycentric_weights ::Vector{RealT}
-  coordinates         ::Array{RealT, 2} #[nnodes, ndims]
+struct CurvedSurface{RealT <: Real}
+    nodes               :: Vector{RealT}
+    barycentric_weights :: Vector{RealT}
+    coordinates         :: Array{RealT, 2} #[nnodes, ndims]
 end
 
-
 # evaluate the Gamma curve interpolant at a particular point s and return the (x,y) coordinate
 function evaluate_at(s, boundary_curve::CurvedSurface)
-
-   @unpack nodes, barycentric_weights, coordinates = boundary_curve
-
-   x_coordinate_at_s_on_boundary_curve = lagrange_interpolation(s, nodes, view(coordinates, :, 1),
-                                                                barycentric_weights)
-   y_coordinate_at_s_on_boundary_curve = lagrange_interpolation(s, nodes, view(coordinates, :, 2),
-                                                                barycentric_weights)
-
-   return x_coordinate_at_s_on_boundary_curve, y_coordinate_at_s_on_boundary_curve
+    @unpack nodes, barycentric_weights, coordinates = boundary_curve
+
+    x_coordinate_at_s_on_boundary_curve = lagrange_interpolation(s, nodes,
+                                                                 view(coordinates, :,
+                                                                      1),
+                                                                 barycentric_weights)
+    y_coordinate_at_s_on_boundary_curve = lagrange_interpolation(s, nodes,
+                                                                 view(coordinates, :,
+                                                                      2),
+                                                                 barycentric_weights)
+
+    return x_coordinate_at_s_on_boundary_curve, y_coordinate_at_s_on_boundary_curve
 end
 
-
 # evaluate the derivative of a Gamma curve interpolant at a particular point s
 # and return the (x,y) coordinate
 function derivative_at(s, boundary_curve::CurvedSurface)
-
-   @unpack nodes, barycentric_weights, coordinates = boundary_curve
-
-   x_coordinate_at_s_on_boundary_curve_prime = lagrange_interpolation_derivative(s, nodes,
-                                                                                 view(coordinates, :, 1),
-                                                                                 barycentric_weights)
-   y_coordinate_at_s_on_boundary_curve_prime = lagrange_interpolation_derivative(s, nodes,
-                                                                                 view(coordinates, :, 2),
-                                                                                 barycentric_weights)
-   return x_coordinate_at_s_on_boundary_curve_prime, y_coordinate_at_s_on_boundary_curve_prime
+    @unpack nodes, barycentric_weights, coordinates = boundary_curve
+
+    x_coordinate_at_s_on_boundary_curve_prime = lagrange_interpolation_derivative(s,
+                                                                                  nodes,
+                                                                                  view(coordinates,
+                                                                                       :,
+                                                                                       1),
+                                                                                  barycentric_weights)
+    y_coordinate_at_s_on_boundary_curve_prime = lagrange_interpolation_derivative(s,
+                                                                                  nodes,
+                                                                                  view(coordinates,
+                                                                                       :,
+                                                                                       2),
+                                                                                  barycentric_weights)
+    return x_coordinate_at_s_on_boundary_curve_prime,
+           y_coordinate_at_s_on_boundary_curve_prime
 end
 
-
 # Chebysehv-Gauss-Lobatto nodes and weights for use with curved boundaries
 function chebyshev_gauss_lobatto_nodes_weights(n_nodes::Integer)
 
-  # Initialize output
-  nodes   = zeros(n_nodes)
-  weights = zeros(n_nodes)
+    # Initialize output
+    nodes = zeros(n_nodes)
+    weights = zeros(n_nodes)
 
-  # Get polynomial degree for convenience
-  N = n_nodes - 1
+    # Get polynomial degree for convenience
+    N = n_nodes - 1
 
-  for j in 1:n_nodes
-    nodes[j]   = -cospi( (j-1) / N )
-    weights[j] = pi / N
-  end
-  weights[1]   = 0.5 * weights[1]
-  weights[end] = 0.5 * weights[end]
+    for j in 1:n_nodes
+        nodes[j] = -cospi((j - 1) / N)
+        weights[j] = pi / N
+    end
+    weights[1] = 0.5 * weights[1]
+    weights[end] = 0.5 * weights[end]
 
-  return nodes, weights
+    return nodes, weights
 end
 
-
 # Calculate Lagrange interpolating polynomial of a function f(x) at a given point x for a given
 # node distribution.
 function lagrange_interpolation(x, nodes, fvals, wbary)
-# Barycentric two formulation of Lagrange interpolant
-  numerator   = zero(eltype(fvals))
-  denominator = zero(eltype(fvals))
+    # Barycentric two formulation of Lagrange interpolant
+    numerator = zero(eltype(fvals))
+    denominator = zero(eltype(fvals))
 
-  for j in eachindex(nodes)
-    if isapprox(x, nodes[j], rtol=eps(x))
-      return fvals[j]
+    for j in eachindex(nodes)
+        if isapprox(x, nodes[j], rtol = eps(x))
+            return fvals[j]
+        end
+        t = wbary[j] / (x - nodes[j])
+        numerator += t * fvals[j]
+        denominator += t
     end
-    t            = wbary[j] / ( x - nodes[j] )
-    numerator   += t * fvals[j]
-    denominator += t
-  end
 
-  return numerator/denominator
+    return numerator / denominator
 end
 
-
 # Calculate derivative of a Lagrange interpolating polynomial of a function f(x) at a given
 # point x for a given node distribution.
 function lagrange_interpolation_derivative(x, nodes, fvals, wbary)
+    at_node = false
+    numerator = zero(eltype(fvals))
+    i = 0
 
-  at_node   = false
-  numerator = zero(eltype(fvals))
-  i         = 0
-
-  for j in eachindex(nodes)
-    if isapprox(x, nodes[j])
-      at_node     = true
-      p           = fvals[j]
-      denominator = -wbary[j]
-      i           = j
-    end
-  end
-
-  if at_node
     for j in eachindex(nodes)
-      if j != i
-        numerator += wbary[j] * ( p - fvals[j] ) / ( x - nodes[j] )
-      end
+        if isapprox(x, nodes[j])
+            at_node = true
+            p = fvals[j]
+            denominator = -wbary[j]
+            i = j
+        end
     end
-  else
-    denominator = zero(eltype(fvals))
-    p = lagrange_interpolation(x, nodes, fvals, wbary)
-    for j in eachindex(nodes)
-      t            = wbary[j] / (x - nodes[j])
-      numerator   += t * ( p - fvals[j] ) / ( x - nodes[j] )
-      denominator += t
+
+    if at_node
+        for j in eachindex(nodes)
+            if j != i
+                numerator += wbary[j] * (p - fvals[j]) / (x - nodes[j])
+            end
+        end
+    else
+        denominator = zero(eltype(fvals))
+        p = lagrange_interpolation(x, nodes, fvals, wbary)
+        for j in eachindex(nodes)
+            t = wbary[j] / (x - nodes[j])
+            numerator += t * (p - fvals[j]) / (x - nodes[j])
+            denominator += t
+        end
     end
-  end
 
-  return numerator/denominator # p_prime
+    return numerator / denominator # p_prime
 end
-
-
 end # @muladd
diff --git a/src/meshes/transfinite_mappings_3d.jl b/src/meshes/transfinite_mappings_3d.jl
index 36ca3f95551..59a02f33e1a 100644
--- a/src/meshes/transfinite_mappings_3d.jl
+++ b/src/meshes/transfinite_mappings_3d.jl
@@ -3,6 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 # Illustration of the corner (circled), edge (braces), and face index numbering convention
 # used in these functions.
@@ -46,38 +47,38 @@
 #      │╱                                                   │╱           └─────> x
 #      ①───────────────────────{1}─────────────────────────②
 
-
 # Transfinite mapping formula from a point (xi, eta, zeta) in reference space [-1,1]^3 to a
 # physical coordinate (x, y, z) for a hexahedral element with straight sides
 function straight_side_hex_map(xi, eta, zeta, corner_points)
-
-  coordinate = zeros(eltype(xi), 3)
-  for j in 1:3
-    coordinate[j] += (0.125 * ( corner_points[j, 1] * (1 - xi) * (1 - eta) * (1 - zeta)
-                              + corner_points[j, 2] * (1 + xi) * (1 - eta) * (1 - zeta)
-                              + corner_points[j, 3] * (1 + xi) * (1 + eta) * (1 - zeta)
-                              + corner_points[j, 4] * (1 - xi) * (1 + eta) * (1 - zeta)
-                              + corner_points[j, 5] * (1 - xi) * (1 - eta) * (1 + zeta)
-                              + corner_points[j, 6] * (1 + xi) * (1 - eta) * (1 + zeta)
-                              + corner_points[j, 7] * (1 + xi) * (1 + eta) * (1 + zeta)
-                              + corner_points[j, 8] * (1 - xi) * (1 + eta) * (1 + zeta) ) )
-  end
-
-  return coordinate
+    coordinate = zeros(eltype(xi), 3)
+    for j in 1:3
+        coordinate[j] += (0.125 *
+                          (corner_points[j, 1] * (1 - xi) * (1 - eta) * (1 - zeta)
+                           + corner_points[j, 2] * (1 + xi) * (1 - eta) * (1 - zeta)
+                           + corner_points[j, 3] * (1 + xi) * (1 + eta) * (1 - zeta)
+                           + corner_points[j, 4] * (1 - xi) * (1 + eta) * (1 - zeta)
+                           + corner_points[j, 5] * (1 - xi) * (1 - eta) * (1 + zeta)
+                           + corner_points[j, 6] * (1 + xi) * (1 - eta) * (1 + zeta)
+                           + corner_points[j, 7] * (1 + xi) * (1 + eta) * (1 + zeta)
+                           + corner_points[j, 8] * (1 - xi) * (1 + eta) * (1 + zeta)))
+    end
+
+    return coordinate
 end
 
-
 # Construct the (x, y, z) node coordinates in the volume of a straight sided hexahedral element
-function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, element, nodes, corners)
-
-  for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes)
-    node_coordinates[:, i, j, k, element] .= straight_side_hex_map(nodes[i], nodes[j], nodes[k], corners)
-  end
-
-  return node_coordinates
+function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, element,
+                                nodes, corners)
+    for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes)
+        node_coordinates[:, i, j, k, element] .= straight_side_hex_map(nodes[i],
+                                                                       nodes[j],
+                                                                       nodes[k],
+                                                                       corners)
+    end
+
+    return node_coordinates
 end
 
-
 # Transfinite mapping formula from a point (xi, eta, zeta) in reference space [-1,1]^3 to a point
 # (x,y,z) in physical coordinate space for a hexahedral element with general curved sides
 # See Section 4.3
@@ -86,87 +87,88 @@ end
 #   transmission of waves from moving material interfaces
 #   [PhD thesis, Florida State University](https://diginole.lib.fsu.edu/islandora/object/fsu%3A185342)
 function transfinite_hex_map(xi, eta, zeta, face_curves::AbstractVector{<:CurvedFace})
-
-  coordinate = zeros(eltype(xi), 3)
-  face_values = zeros(eltype(xi), (3, 6))
-  edge_values = zeros(eltype(xi), (3, 12))
-  corners = zeros(eltype(xi), (3, 8))
-
-  # Compute values along the face edges
-  edge_values[:, 1] .= evaluate_at(SVector(xi,   -1), face_curves[1])
-  edge_values[:, 2] .= evaluate_at(SVector( 1, zeta), face_curves[1])
-  edge_values[:, 3] .= evaluate_at(SVector(xi,    1), face_curves[1])
-  edge_values[:, 4] .= evaluate_at(SVector(-1, zeta), face_curves[1])
-
-  edge_values[:, 5] .= evaluate_at(SVector(xi,   -1), face_curves[2])
-  edge_values[:, 6] .= evaluate_at(SVector( 1, zeta), face_curves[2])
-  edge_values[:, 7] .= evaluate_at(SVector(xi,    1), face_curves[2])
-  edge_values[:, 8] .= evaluate_at(SVector(-1, zeta), face_curves[2])
-
-  edge_values[:, 9]  .= evaluate_at(SVector(eta, -1), face_curves[6])
-  edge_values[:, 10] .= evaluate_at(SVector(eta, -1), face_curves[4])
-  edge_values[:, 11] .= evaluate_at(SVector(eta,  1), face_curves[4])
-  edge_values[:, 12] .= evaluate_at(SVector(eta,  1), face_curves[6])
-
-  # Compute values on the face
-  face_values[:, 1] .= evaluate_at(SVector( xi, zeta), face_curves[1])
-  face_values[:, 2] .= evaluate_at(SVector( xi, zeta), face_curves[2])
-  face_values[:, 3] .= evaluate_at(SVector( xi,  eta), face_curves[3])
-  face_values[:, 4] .= evaluate_at(SVector(eta, zeta), face_curves[4])
-  face_values[:, 5] .= evaluate_at(SVector( xi,  eta), face_curves[5])
-  face_values[:, 6] .= evaluate_at(SVector(eta, zeta), face_curves[6])
-
-  # Pull the eight corner values and compute the straight sided hex mapping
-  corners[:,1] .= face_curves[1].coordinates[:, 1,   1]
-  corners[:,2] .= face_curves[1].coordinates[:, end, 1]
-  corners[:,3] .= face_curves[2].coordinates[:, end, 1]
-  corners[:,4] .= face_curves[2].coordinates[:, 1,   1]
-  corners[:,5] .= face_curves[1].coordinates[:, 1,   end]
-  corners[:,6] .= face_curves[1].coordinates[:, end, end]
-  corners[:,7] .= face_curves[2].coordinates[:, end, end]
-  corners[:,8] .= face_curves[2].coordinates[:, 1,   end]
-
-  coordinate_straight = straight_side_hex_map(xi, eta, zeta, corners)
-
-  # Compute the transfinite mapping
-  for j in 1:3
-    # Linear interpolation between opposite faces
-    coordinate[j] = ( 0.5 * ( face_values[j, 6] * (1 - xi  ) + face_values[j, 4] * (1 + xi  )
-                            + face_values[j, 1] * (1 - eta ) + face_values[j, 2] * (1 + eta )
-                            + face_values[j, 3] * (1 - zeta) + face_values[j, 5] * (1 + zeta) ) )
-
-    # Edge corrections to ensure faces match
-    coordinate[j] -= ( 0.25 * ( edge_values[j, 1 ] * (1 - eta) * (1 - zeta)
-                              + edge_values[j, 2 ] * (1 + xi ) * (1 - eta )
-                              + edge_values[j, 3 ] * (1 - eta) * (1 + zeta)
-                              + edge_values[j, 4 ] * (1 - xi ) * (1 - eta )
-                              + edge_values[j, 5 ] * (1 + eta) * (1 - zeta)
-                              + edge_values[j, 6 ] * (1 + xi ) * (1 + eta )
-                              + edge_values[j, 7 ] * (1 + eta) * (1 + zeta)
-                              + edge_values[j, 8 ] * (1 - xi ) * (1 + eta )
-                              + edge_values[j, 9 ] * (1 - xi ) * (1 - zeta)
-                              + edge_values[j, 10] * (1 + xi ) * (1 - zeta)
-                              + edge_values[j, 11] * (1 + xi ) * (1 + zeta)
-                              + edge_values[j, 12] * (1 - xi ) * (1 + zeta) ) )
-
-    # Subtracted interior twice, so add back the straight-sided hexahedral mapping
-    coordinate[j] += coordinate_straight[j]
-  end
-
-  return coordinate
+    coordinate = zeros(eltype(xi), 3)
+    face_values = zeros(eltype(xi), (3, 6))
+    edge_values = zeros(eltype(xi), (3, 12))
+    corners = zeros(eltype(xi), (3, 8))
+
+    # Compute values along the face edges
+    edge_values[:, 1] .= evaluate_at(SVector(xi, -1), face_curves[1])
+    edge_values[:, 2] .= evaluate_at(SVector(1, zeta), face_curves[1])
+    edge_values[:, 3] .= evaluate_at(SVector(xi, 1), face_curves[1])
+    edge_values[:, 4] .= evaluate_at(SVector(-1, zeta), face_curves[1])
+
+    edge_values[:, 5] .= evaluate_at(SVector(xi, -1), face_curves[2])
+    edge_values[:, 6] .= evaluate_at(SVector(1, zeta), face_curves[2])
+    edge_values[:, 7] .= evaluate_at(SVector(xi, 1), face_curves[2])
+    edge_values[:, 8] .= evaluate_at(SVector(-1, zeta), face_curves[2])
+
+    edge_values[:, 9] .= evaluate_at(SVector(eta, -1), face_curves[6])
+    edge_values[:, 10] .= evaluate_at(SVector(eta, -1), face_curves[4])
+    edge_values[:, 11] .= evaluate_at(SVector(eta, 1), face_curves[4])
+    edge_values[:, 12] .= evaluate_at(SVector(eta, 1), face_curves[6])
+
+    # Compute values on the face
+    face_values[:, 1] .= evaluate_at(SVector(xi, zeta), face_curves[1])
+    face_values[:, 2] .= evaluate_at(SVector(xi, zeta), face_curves[2])
+    face_values[:, 3] .= evaluate_at(SVector(xi, eta), face_curves[3])
+    face_values[:, 4] .= evaluate_at(SVector(eta, zeta), face_curves[4])
+    face_values[:, 5] .= evaluate_at(SVector(xi, eta), face_curves[5])
+    face_values[:, 6] .= evaluate_at(SVector(eta, zeta), face_curves[6])
+
+    # Pull the eight corner values and compute the straight sided hex mapping
+    corners[:, 1] .= face_curves[1].coordinates[:, 1, 1]
+    corners[:, 2] .= face_curves[1].coordinates[:, end, 1]
+    corners[:, 3] .= face_curves[2].coordinates[:, end, 1]
+    corners[:, 4] .= face_curves[2].coordinates[:, 1, 1]
+    corners[:, 5] .= face_curves[1].coordinates[:, 1, end]
+    corners[:, 6] .= face_curves[1].coordinates[:, end, end]
+    corners[:, 7] .= face_curves[2].coordinates[:, end, end]
+    corners[:, 8] .= face_curves[2].coordinates[:, 1, end]
+
+    coordinate_straight = straight_side_hex_map(xi, eta, zeta, corners)
+
+    # Compute the transfinite mapping
+    for j in 1:3
+        # Linear interpolation between opposite faces
+        coordinate[j] = (0.5 *
+                         (face_values[j, 6] * (1 - xi) + face_values[j, 4] * (1 + xi)
+                          + face_values[j, 1] * (1 - eta) +
+                          face_values[j, 2] * (1 + eta)
+                          + face_values[j, 3] * (1 - zeta) +
+                          face_values[j, 5] * (1 + zeta)))
+
+        # Edge corrections to ensure faces match
+        coordinate[j] -= (0.25 * (edge_values[j, 1] * (1 - eta) * (1 - zeta)
+                           + edge_values[j, 2] * (1 + xi) * (1 - eta)
+                           + edge_values[j, 3] * (1 - eta) * (1 + zeta)
+                           + edge_values[j, 4] * (1 - xi) * (1 - eta)
+                           + edge_values[j, 5] * (1 + eta) * (1 - zeta)
+                           + edge_values[j, 6] * (1 + xi) * (1 + eta)
+                           + edge_values[j, 7] * (1 + eta) * (1 + zeta)
+                           + edge_values[j, 8] * (1 - xi) * (1 + eta)
+                           + edge_values[j, 9] * (1 - xi) * (1 - zeta)
+                           + edge_values[j, 10] * (1 + xi) * (1 - zeta)
+                           + edge_values[j, 11] * (1 + xi) * (1 + zeta)
+                           + edge_values[j, 12] * (1 - xi) * (1 + zeta)))
+
+        # Subtracted interior twice, so add back the straight-sided hexahedral mapping
+        coordinate[j] += coordinate_straight[j]
+    end
+
+    return coordinate
 end
 
-
 # Construct the (x, y, z) node coordinates in the volume of a curved sided hexahedral element
-function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, element, nodes,
+function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 5}, element,
+                                nodes,
                                 face_curves::AbstractVector{<:CurvedFace})
+    for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes)
+        node_coordinates[:, i, j, k, element] .= transfinite_hex_map(nodes[i], nodes[j],
+                                                                     nodes[k],
+                                                                     face_curves)
+    end
 
-  for k in eachindex(nodes), j in eachindex(nodes), i in eachindex(nodes)
-    node_coordinates[:, i, j, k, element] .= transfinite_hex_map(nodes[i], nodes[j], nodes[k], face_curves)
-  end
-
-  return node_coordinates
+    return node_coordinates
 end
-
-
 end # @muladd
diff --git a/src/meshes/tree_mesh.jl b/src/meshes/tree_mesh.jl
index 9872d1a590a..34794ded852 100644
--- a/src/meshes/tree_mesh.jl
+++ b/src/meshes/tree_mesh.jl
@@ -3,13 +3,12 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 include("abstract_tree.jl")
 include("serial_tree.jl")
 include("parallel_tree.jl")
 
-
 get_name(mesh::AbstractMesh) = mesh |> typeof |> nameof |> string
 
 # Composite type to hold the actual tree in addition to other mesh-related data
@@ -25,49 +24,55 @@ get_name(mesh::AbstractMesh) = mesh |> typeof |> nameof |> string
 
 A Cartesian mesh based on trees of hypercubes to support adaptive mesh refinement.
 """
-mutable struct TreeMesh{NDIMS, TreeType<:AbstractTree{NDIMS}} <: AbstractMesh{NDIMS}
-  tree::TreeType
-  current_filename::String
-  unsaved_changes::Bool
-  first_cell_by_rank::OffsetVector{Int, Vector{Int}}
-  n_cells_by_rank::OffsetVector{Int, Vector{Int}}
-
-  function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer) where {NDIMS, TreeType<:AbstractTree{NDIMS}}
-    # Create mesh
-    m = new()
-    m.tree = TreeType(n_cells_max)
-    m.current_filename = ""
-    m.unsaved_changes = true
-    m.first_cell_by_rank = OffsetVector(Int[], 0)
-    m.n_cells_by_rank = OffsetVector(Int[], 0)
-
-    return m
-  end
-
-  # TODO: Taal refactor, order of important arguments, use of n_cells_max?
-  # TODO: Taal refactor, allow other RealT for the mesh, not just Float64
-  # TODO: Taal refactor, use NTuple instead of domain_center::AbstractArray{Float64}
-  function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer, domain_center::AbstractArray{Float64},
-                                     domain_length, periodicity=true) where {NDIMS, TreeType<:AbstractTree{NDIMS}}
-    @assert NDIMS isa Integer && NDIMS > 0
+mutable struct TreeMesh{NDIMS, TreeType <: AbstractTree{NDIMS}} <: AbstractMesh{NDIMS}
+    tree::TreeType
+    current_filename::String
+    unsaved_changes::Bool
+    first_cell_by_rank::OffsetVector{Int, Vector{Int}}
+    n_cells_by_rank::OffsetVector{Int, Vector{Int}}
+
+    function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer) where {NDIMS,
+                                                                    TreeType <:
+                                                                    AbstractTree{NDIMS}}
+        # Create mesh
+        m = new()
+        m.tree = TreeType(n_cells_max)
+        m.current_filename = ""
+        m.unsaved_changes = true
+        m.first_cell_by_rank = OffsetVector(Int[], 0)
+        m.n_cells_by_rank = OffsetVector(Int[], 0)
+
+        return m
+    end
 
-    # Create mesh
-    m = new()
-    m.tree = TreeType(n_cells_max, domain_center, domain_length, periodicity)
-    m.current_filename = ""
-    m.unsaved_changes = true
-    m.first_cell_by_rank = OffsetVector(Int[], 0)
-    m.n_cells_by_rank = OffsetVector(Int[], 0)
-
-    return m
-  end
+    # TODO: Taal refactor, order of important arguments, use of n_cells_max?
+    # TODO: Taal refactor, allow other RealT for the mesh, not just Float64
+    # TODO: Taal refactor, use NTuple instead of domain_center::AbstractArray{Float64}
+    function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer,
+                                       domain_center::AbstractArray{Float64},
+                                       domain_length,
+                                       periodicity = true) where {NDIMS,
+                                                                  TreeType <:
+                                                                  AbstractTree{NDIMS}}
+        @assert NDIMS isa Integer && NDIMS > 0
+
+        # Create mesh
+        m = new()
+        m.tree = TreeType(n_cells_max, domain_center, domain_length, periodicity)
+        m.current_filename = ""
+        m.unsaved_changes = true
+        m.first_cell_by_rank = OffsetVector(Int[], 0)
+        m.n_cells_by_rank = OffsetVector(Int[], 0)
+
+        return m
+    end
 end
 
 const TreeMesh1D = TreeMesh{1, TreeType} where {TreeType <: AbstractTree{1}}
 const TreeMesh2D = TreeMesh{2, TreeType} where {TreeType <: AbstractTree{2}}
 const TreeMesh3D = TreeMesh{3, TreeType} where {TreeType <: AbstractTree{3}}
 
-const SerialTreeMesh{NDIMS}   = TreeMesh{NDIMS, <:SerialTree{NDIMS}}
+const SerialTreeMesh{NDIMS} = TreeMesh{NDIMS, <:SerialTree{NDIMS}}
 const ParallelTreeMesh{NDIMS} = TreeMesh{NDIMS, <:ParallelTree{NDIMS}}
 
 @inline mpi_parallel(mesh::SerialTreeMesh) = False()
@@ -75,144 +80,152 @@ const ParallelTreeMesh{NDIMS} = TreeMesh{NDIMS, <:ParallelTree{NDIMS}}
 
 partition!(mesh::SerialTreeMesh) = nothing
 
-
 # Constructor for passing the dimension and mesh type as an argument
-TreeMesh(::Type{TreeType}, args...) where {NDIMS, TreeType<:AbstractTree{NDIMS}} = TreeMesh{NDIMS, TreeType}(args...)
+function TreeMesh(::Type{TreeType},
+                  args...) where {NDIMS, TreeType <: AbstractTree{NDIMS}}
+    TreeMesh{NDIMS, TreeType}(args...)
+end
 
 # Constructor accepting a single number as center (as opposed to an array) for 1D
-function TreeMesh{1, TreeType}(n::Int, center::Real, len::Real, periodicity=true) where {TreeType<:AbstractTree{1}}
-  # TODO: Taal refactor, allow other RealT for the mesh, not just Float64
-  return TreeMesh{1, TreeType}(n, SVector{1,Float64}(center), len, periodicity)
+function TreeMesh{1, TreeType}(n::Int, center::Real, len::Real,
+                               periodicity = true) where {TreeType <: AbstractTree{1}}
+    # TODO: Taal refactor, allow other RealT for the mesh, not just Float64
+    return TreeMesh{1, TreeType}(n, SVector{1, Float64}(center), len, periodicity)
 end
 
-function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer, domain_center::NTuple{NDIMS,Real}, domain_length::Real, periodicity=true) where {NDIMS, TreeType<:AbstractTree{NDIMS}}
-  # TODO: Taal refactor, allow other RealT for the mesh, not just Float64
-  TreeMesh{NDIMS, TreeType}(n_cells_max, SVector{NDIMS,Float64}(domain_center), convert(Float64, domain_length), periodicity)
+function TreeMesh{NDIMS, TreeType}(n_cells_max::Integer,
+                                   domain_center::NTuple{NDIMS, Real},
+                                   domain_length::Real,
+                                   periodicity = true) where {NDIMS,
+                                                              TreeType <:
+                                                              AbstractTree{NDIMS}}
+    # TODO: Taal refactor, allow other RealT for the mesh, not just Float64
+    TreeMesh{NDIMS, TreeType}(n_cells_max, SVector{NDIMS, Float64}(domain_center),
+                              convert(Float64, domain_length), periodicity)
 end
 
-function TreeMesh(coordinates_min::NTuple{NDIMS,Real}, coordinates_max::NTuple{NDIMS,Real};
+function TreeMesh(coordinates_min::NTuple{NDIMS, Real},
+                  coordinates_max::NTuple{NDIMS, Real};
                   n_cells_max,
-                  periodicity=true,
+                  periodicity = true,
                   initial_refinement_level,
-                  refinement_patches=(),
-                  coarsening_patches=(),
-                  ) where {NDIMS}
-  # check arguments
-  if !(n_cells_max isa Integer && n_cells_max > 0)
-    throw(ArgumentError("`n_cells_max` must be a positive integer (provided `n_cells_max = $n_cells_max`)"))
-  end
-  if !(initial_refinement_level isa Integer && initial_refinement_level >= 0)
-    throw(ArgumentError("`initial_refinement_level` must be a non-negative integer (provided `initial_refinement_level = $initial_refinement_level`)"))
-  end
-
-  # Domain length is calculated as the maximum length in any axis direction
-  domain_center = @. (coordinates_min + coordinates_max) / 2
-  domain_length = maximum(coordinates_max .- coordinates_min)
-
-  # TODO: MPI, create nice interface for a parallel tree/mesh
-  if mpi_isparallel()
-    if mpi_isroot() && NDIMS == 3
-      println(stderr, "ERROR: TreeMesh3D does not support parallel execution with MPI")
-      MPI.Abort(mpi_comm(), 1)
+                  refinement_patches = (),
+                  coarsening_patches = ()) where {NDIMS}
+    # check arguments
+    if !(n_cells_max isa Integer && n_cells_max > 0)
+        throw(ArgumentError("`n_cells_max` must be a positive integer (provided `n_cells_max = $n_cells_max`)"))
+    end
+    if !(initial_refinement_level isa Integer && initial_refinement_level >= 0)
+        throw(ArgumentError("`initial_refinement_level` must be a non-negative integer (provided `initial_refinement_level = $initial_refinement_level`)"))
+    end
+
+    # Domain length is calculated as the maximum length in any axis direction
+    domain_center = @. (coordinates_min + coordinates_max) / 2
+    domain_length = maximum(coordinates_max .- coordinates_min)
+
+    # TODO: MPI, create nice interface for a parallel tree/mesh
+    if mpi_isparallel()
+        if mpi_isroot() && NDIMS == 3
+            println(stderr,
+                    "ERROR: TreeMesh3D does not support parallel execution with MPI")
+            MPI.Abort(mpi_comm(), 1)
+        end
+        TreeType = ParallelTree{NDIMS}
+    else
+        TreeType = SerialTree{NDIMS}
     end
-    TreeType = ParallelTree{NDIMS}
-  else
-    TreeType = SerialTree{NDIMS}
-  end
 
-  # Create mesh
-  mesh = @trixi_timeit timer() "creation" TreeMesh{NDIMS, TreeType}(n_cells_max, domain_center, domain_length, periodicity)
+    # Create mesh
+    mesh = @trixi_timeit timer() "creation" TreeMesh{NDIMS, TreeType}(n_cells_max,
+                                                                      domain_center,
+                                                                      domain_length,
+                                                                      periodicity)
 
-  # Initialize mesh
-  initialize!(mesh, initial_refinement_level, refinement_patches, coarsening_patches)
+    # Initialize mesh
+    initialize!(mesh, initial_refinement_level, refinement_patches, coarsening_patches)
 
-  return mesh
+    return mesh
 end
 
 function initialize!(mesh::TreeMesh, initial_refinement_level,
                      refinement_patches, coarsening_patches)
-  # Create initial refinement
-  @trixi_timeit timer() "initial refinement" refine_uniformly!(mesh.tree, initial_refinement_level)
-
-  # Apply refinement patches
-  @trixi_timeit timer() "refinement patches" for patch in refinement_patches
-    # TODO: Taal refactor, use multiple dispatch?
-    if patch.type == "box"
-      refine_box!(mesh.tree, patch.coordinates_min, patch.coordinates_max)
-    elseif patch.type == "sphere"
-      refine_sphere!(mesh.tree, patch.center, patch.radius)
-    else
-      error("unknown refinement patch type '$(patch.type)'")
+    # Create initial refinement
+    @trixi_timeit timer() "initial refinement" refine_uniformly!(mesh.tree,
+                                                                 initial_refinement_level)
+
+    # Apply refinement patches
+    @trixi_timeit timer() "refinement patches" for patch in refinement_patches
+        # TODO: Taal refactor, use multiple dispatch?
+        if patch.type == "box"
+            refine_box!(mesh.tree, patch.coordinates_min, patch.coordinates_max)
+        elseif patch.type == "sphere"
+            refine_sphere!(mesh.tree, patch.center, patch.radius)
+        else
+            error("unknown refinement patch type '$(patch.type)'")
+        end
     end
-  end
 
-  # Apply coarsening patches
-  @trixi_timeit timer() "coarsening patches" for patch in coarsening_patches
-    # TODO: Taal refactor, use multiple dispatch
-    if patch.type == "box"
-      coarsen_box!(mesh.tree, patch.coordinates_min, patch.coordinates_max)
-    else
-      error("unknown coarsening patch type '$(patch.type)'")
+    # Apply coarsening patches
+    @trixi_timeit timer() "coarsening patches" for patch in coarsening_patches
+        # TODO: Taal refactor, use multiple dispatch
+        if patch.type == "box"
+            coarsen_box!(mesh.tree, patch.coordinates_min, patch.coordinates_max)
+        else
+            error("unknown coarsening patch type '$(patch.type)'")
+        end
     end
-  end
 
-  # Partition the mesh among multiple MPI ranks (does nothing if run in serial)
-  partition!(mesh)
+    # Partition the mesh among multiple MPI ranks (does nothing if run in serial)
+    partition!(mesh)
 
-  return nothing
+    return nothing
 end
 
 function TreeMesh(coordinates_min::Real, coordinates_max::Real; kwargs...)
-  TreeMesh((coordinates_min,), (coordinates_max,); kwargs...)
+    TreeMesh((coordinates_min,), (coordinates_max,); kwargs...)
 end
 
-
 function Base.show(io::IO, mesh::TreeMesh{NDIMS, TreeType}) where {NDIMS, TreeType}
-  print(io, "TreeMesh{", NDIMS, ", ", TreeType, "} with length ", mesh.tree.length)
+    print(io, "TreeMesh{", NDIMS, ", ", TreeType, "} with length ", mesh.tree.length)
 end
 
-function Base.show(io::IO, ::MIME"text/plain", mesh::TreeMesh{NDIMS, TreeType}) where {NDIMS, TreeType}
-  if get(io, :compact, false)
-    show(io, mesh)
-  else
-    setup = [
-             "center" => mesh.tree.center_level_0,
-             "length" => mesh.tree.length_level_0,
-             "periodicity" => mesh.tree.periodicity,
-             "current #cells" => mesh.tree.length,
-             "maximum #cells" => mesh.tree.capacity,
-            ]
-    summary_box(io, "TreeMesh{" * string(NDIMS) * ", " * string(TreeType) * "}", setup)
-  end
+function Base.show(io::IO, ::MIME"text/plain",
+                   mesh::TreeMesh{NDIMS, TreeType}) where {NDIMS, TreeType}
+    if get(io, :compact, false)
+        show(io, mesh)
+    else
+        setup = [
+            "center" => mesh.tree.center_level_0,
+            "length" => mesh.tree.length_level_0,
+            "periodicity" => mesh.tree.periodicity,
+            "current #cells" => mesh.tree.length,
+            "maximum #cells" => mesh.tree.capacity,
+        ]
+        summary_box(io, "TreeMesh{" * string(NDIMS) * ", " * string(TreeType) * "}",
+                    setup)
+    end
 end
 
-
 @inline Base.ndims(mesh::TreeMesh) = ndims(mesh.tree)
 
-
-
 # Obtain the mesh filename from a restart file
 function get_restart_mesh_filename(restart_filename, mpi_parallel::False)
-  # Get directory name
-  dirname, _ = splitdir(restart_filename)
+    # Get directory name
+    dirname, _ = splitdir(restart_filename)
 
-  # Read mesh filename from restart file
-  mesh_file = ""
-  h5open(restart_filename, "r") do file
-    mesh_file = read(attributes(file)["mesh_file"])
-  end
+    # Read mesh filename from restart file
+    mesh_file = ""
+    h5open(restart_filename, "r") do file
+        mesh_file = read(attributes(file)["mesh_file"])
+    end
 
-  # Construct and return filename
-  return joinpath(dirname, mesh_file)
+    # Construct and return filename
+    return joinpath(dirname, mesh_file)
 end
 
-
 function total_volume(mesh::TreeMesh)
-  return mesh.tree.length_level_0^ndims(mesh)
+    return mesh.tree.length_level_0^ndims(mesh)
 end
 
-
 include("parallel_tree_mesh.jl")
-
-
 end # @muladd
diff --git a/src/meshes/unstructured_mesh.jl b/src/meshes/unstructured_mesh.jl
index 202abe8079b..c370c0f25f8 100644
--- a/src/meshes/unstructured_mesh.jl
+++ b/src/meshes/unstructured_mesh.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     UnstructuredMesh2D <: AbstractMesh{2}
@@ -15,211 +15,227 @@ An unstructured (possibly curved) quadrilateral mesh.
 All mesh information, neighbour coupling, and boundary curve information is read in
 from a mesh file `filename`.
 """
-mutable struct UnstructuredMesh2D{RealT<:Real, CurvedSurfaceT<:CurvedSurface{RealT}} <: AbstractMesh{2}
-  filename             ::String
-  n_corners            ::Int
-  n_surfaces           ::Int # total number of surfaces
-  n_interfaces         ::Int # number of interior surfaces
-  n_boundaries         ::Int # number of surfaces on the physical boundary
-  n_elements           ::Int
-  polydeg              ::Int
-  corners              ::Array{RealT, 2}  # [ndims, n_corners]
-  neighbour_information::Array{Int, 2}  # [neighbour node/element/edge ids, n_surfaces]
-  boundary_names       ::Array{Symbol, 2} # [local sides, n_elements]
-  periodicity          ::Bool
-  element_node_ids     ::Array{Int, 2} # [node ids, n_elements]
-  element_is_curved    ::Vector{Bool}
-  surface_curves       ::Array{CurvedSurfaceT, 2} # [local sides, n_elements]
-  current_filename     ::String
-  unsaved_changes      ::Bool # if true, the mesh will be saved for plotting
+mutable struct UnstructuredMesh2D{RealT <: Real, CurvedSurfaceT <: CurvedSurface{RealT}
+                                  } <: AbstractMesh{2}
+    filename              :: String
+    n_corners             :: Int
+    n_surfaces            :: Int # total number of surfaces
+    n_interfaces          :: Int # number of interior surfaces
+    n_boundaries          :: Int # number of surfaces on the physical boundary
+    n_elements            :: Int
+    polydeg               :: Int
+    corners               :: Array{RealT, 2}  # [ndims, n_corners]
+    neighbour_information :: Array{Int, 2}  # [neighbour node/element/edge ids, n_surfaces]
+    boundary_names        :: Array{Symbol, 2} # [local sides, n_elements]
+    periodicity           :: Bool
+    element_node_ids      :: Array{Int, 2} # [node ids, n_elements]
+    element_is_curved     :: Vector{Bool}
+    surface_curves        :: Array{CurvedSurfaceT, 2} # [local sides, n_elements]
+    current_filename      :: String
+    unsaved_changes       :: Bool # if true, the mesh will be saved for plotting
 end
 
-
 # constructor for an unstructured mesh read in from a file
 # TODO: this mesh file parsing and construction of the mesh skeleton can likely be improved in terms
 #       of performance
-function UnstructuredMesh2D(filename; RealT=Float64, periodicity=false, unsaved_changes=true)
-
-  # readin all the information from the mesh file into a string array
-  file_lines = readlines(open(filename))
-
-  # readin the number of nodes, number of interfaces, number of elements and local polynomial degree
-  current_line = split(file_lines[2])
-  n_corners = parse(Int, current_line[1])
-  n_surfaces = parse(Int, current_line[2])
-  n_elements = parse(Int, current_line[3])
-  mesh_polydeg = parse(Int, current_line[4])
-
-  mesh_nnodes = mesh_polydeg + 1
-
-  # The types of structs used in the following depend on information read from
-  # the mesh file. Thus, this cannot be type stable at all. Hence, we allocate
-  # the memory now and introduce a function barrier before continuing to read
-  # data from the file.
-  corner_nodes = Array{RealT}(undef, (2, n_corners))
-  interface_info = Array{Int}(undef, (6, n_surfaces))
-  element_node_ids = Array{Int}(undef, (4, n_elements))
-  curved_check = Vector{Int}(undef, 4)
-  quad_corners = Array{RealT}(undef, (4, 2))
-  quad_corners_flipped = Array{RealT}(undef, (4, 2))
-  curve_values = Array{RealT}(undef, (mesh_nnodes, 2))
-  element_is_curved = Array{Bool}(undef, n_elements)
-  CurvedSurfaceT = CurvedSurface{RealT}
-  surface_curves = Array{CurvedSurfaceT}(undef, (4, n_elements))
-  boundary_names = Array{Symbol}(undef, (4, n_elements))
-
-  # create the Chebyshev-Gauss-Lobatto nodes used to represent any curved boundaries that are
-  # required to construct the sides
-  cheby_nodes_, _ = chebyshev_gauss_lobatto_nodes_weights(mesh_nnodes)
-  bary_weights_   = barycentric_weights(cheby_nodes_)
-  cheby_nodes  = SVector{mesh_nnodes}(cheby_nodes_)
-  bary_weights = SVector{mesh_nnodes}(bary_weights_)
-
-  arrays = (; corner_nodes, interface_info, element_node_ids, curved_check,
+function UnstructuredMesh2D(filename; RealT = Float64, periodicity = false,
+                            unsaved_changes = true)
+
+    # readin all the information from the mesh file into a string array
+    file_lines = readlines(open(filename))
+
+    # readin the number of nodes, number of interfaces, number of elements and local polynomial degree
+    current_line = split(file_lines[2])
+    n_corners = parse(Int, current_line[1])
+    n_surfaces = parse(Int, current_line[2])
+    n_elements = parse(Int, current_line[3])
+    mesh_polydeg = parse(Int, current_line[4])
+
+    mesh_nnodes = mesh_polydeg + 1
+
+    # The types of structs used in the following depend on information read from
+    # the mesh file. Thus, this cannot be type stable at all. Hence, we allocate
+    # the memory now and introduce a function barrier before continuing to read
+    # data from the file.
+    corner_nodes = Array{RealT}(undef, (2, n_corners))
+    interface_info = Array{Int}(undef, (6, n_surfaces))
+    element_node_ids = Array{Int}(undef, (4, n_elements))
+    curved_check = Vector{Int}(undef, 4)
+    quad_corners = Array{RealT}(undef, (4, 2))
+    quad_corners_flipped = Array{RealT}(undef, (4, 2))
+    curve_values = Array{RealT}(undef, (mesh_nnodes, 2))
+    element_is_curved = Array{Bool}(undef, n_elements)
+    CurvedSurfaceT = CurvedSurface{RealT}
+    surface_curves = Array{CurvedSurfaceT}(undef, (4, n_elements))
+    boundary_names = Array{Symbol}(undef, (4, n_elements))
+
+    # create the Chebyshev-Gauss-Lobatto nodes used to represent any curved boundaries that are
+    # required to construct the sides
+    cheby_nodes_, _ = chebyshev_gauss_lobatto_nodes_weights(mesh_nnodes)
+    bary_weights_ = barycentric_weights(cheby_nodes_)
+    cheby_nodes = SVector{mesh_nnodes}(cheby_nodes_)
+    bary_weights = SVector{mesh_nnodes}(bary_weights_)
+
+    arrays = (; corner_nodes, interface_info, element_node_ids, curved_check,
               quad_corners, quad_corners_flipped, curve_values,
               element_is_curved, surface_curves, boundary_names)
-  counters = (; n_corners, n_surfaces, n_elements)
+    counters = (; n_corners, n_surfaces, n_elements)
 
-  n_boundaries = parse_mesh_file!(arrays, RealT, CurvedSurfaceT, file_lines, counters, cheby_nodes, bary_weights)
+    n_boundaries = parse_mesh_file!(arrays, RealT, CurvedSurfaceT, file_lines, counters,
+                                    cheby_nodes, bary_weights)
 
-  # get the number of internal interfaces in the mesh
-  if periodicity
-    n_interfaces = n_surfaces
-    n_boundaries = 0
-  else
-    n_interfaces = n_surfaces - n_boundaries
-  end
-
-  return UnstructuredMesh2D{RealT, CurvedSurfaceT}(
-    filename, n_corners, n_surfaces, n_interfaces, n_boundaries,
-    n_elements, mesh_polydeg, corner_nodes,
-    interface_info, boundary_names, periodicity,
-    element_node_ids, element_is_curved, surface_curves, "", unsaved_changes)
+    # get the number of internal interfaces in the mesh
+    if periodicity
+        n_interfaces = n_surfaces
+        n_boundaries = 0
+    else
+        n_interfaces = n_surfaces - n_boundaries
+    end
+
+    return UnstructuredMesh2D{RealT, CurvedSurfaceT}(filename, n_corners, n_surfaces,
+                                                     n_interfaces, n_boundaries,
+                                                     n_elements, mesh_polydeg,
+                                                     corner_nodes,
+                                                     interface_info, boundary_names,
+                                                     periodicity,
+                                                     element_node_ids,
+                                                     element_is_curved, surface_curves,
+                                                     "", unsaved_changes)
 end
 
-function parse_mesh_file!(arrays, RealT, CurvedSurfaceT, file_lines, counters, cheby_nodes, bary_weights)
-  @unpack ( corner_nodes, interface_info, element_node_ids, curved_check,
-            quad_corners, quad_corners_flipped, curve_values,
-            element_is_curved, surface_curves, boundary_names ) = arrays
-  @unpack n_corners, n_surfaces, n_elements = counters
-  mesh_nnodes = length(cheby_nodes)
-
-  # counter to step through the mesh file line by line
-  file_idx = 3
-
-  # readin an store the nodes that dictate the corners of the elements needed to construct the
-  # element geometry terms
-  for j in 1:n_corners
-    current_line = split(file_lines[file_idx])
-    corner_nodes[1, j] = parse(RealT, current_line[1])
-    corner_nodes[2, j] = parse(RealT, current_line[2])
-    file_idx += 1
-  end
-
-  # readin an store the nodes that dictate the interfaces, neighbour data, and orientations contains
-  # the following:
-  #    interface_info[1] = start node ID
-  #    interface_info[2] = end node ID
-  #    interface_info[3] = ID of the primary element
-  #    interface_info[4] = ID of the secondary element (if 0 then it is a physical boundary)
-  #    interface_info[5] = local side ID on the primary element
-  #    interface_info[6] = local side ID on the secondary element
-  # container to for the interface neighbour information and connectivity
-  n_boundaries = 0
-  for j in 1:n_surfaces
-    current_line = split(file_lines[file_idx])
-    interface_info[1, j] = parse(Int, current_line[1])
-    interface_info[2, j] = parse(Int, current_line[2])
-    interface_info[3, j] = parse(Int, current_line[3])
-    interface_info[4, j] = parse(Int, current_line[4])
-    interface_info[5, j] = parse(Int, current_line[5])
-    interface_info[6, j] = parse(Int, current_line[6])
-
-    # count the number of physical boundaries
-    if interface_info[4,j] == 0
-      n_boundaries += 1
+function parse_mesh_file!(arrays, RealT, CurvedSurfaceT, file_lines, counters,
+                          cheby_nodes, bary_weights)
+    @unpack (corner_nodes, interface_info, element_node_ids, curved_check,
+    quad_corners, quad_corners_flipped, curve_values,
+    element_is_curved, surface_curves, boundary_names) = arrays
+    @unpack n_corners, n_surfaces, n_elements = counters
+    mesh_nnodes = length(cheby_nodes)
+
+    # counter to step through the mesh file line by line
+    file_idx = 3
+
+    # readin an store the nodes that dictate the corners of the elements needed to construct the
+    # element geometry terms
+    for j in 1:n_corners
+        current_line = split(file_lines[file_idx])
+        corner_nodes[1, j] = parse(RealT, current_line[1])
+        corner_nodes[2, j] = parse(RealT, current_line[2])
+        file_idx += 1
     end
-    file_idx += 1
-  end
-
-  # work arrays to pull to correct corners of a given element (agnostic to curvature) and local
-  # copies of the curved boundary information
-
-  # readin an store the curved boundary information of the elements
-
-  for j in 1:n_elements
-    # pull the corner node IDs
-    current_line = split(file_lines[file_idx])
-    element_node_ids[1, j] = parse(Int, current_line[1])
-    element_node_ids[2, j] = parse(Int, current_line[2])
-    element_node_ids[3, j] = parse(Int, current_line[3])
-    element_node_ids[4, j] = parse(Int, current_line[4])
-    for i in 1:4
-      # pull the (x,y) values of these corners out of the nodes array
-      quad_corners[i, :] .= corner_nodes[:, element_node_ids[i, j]]
+
+    # readin an store the nodes that dictate the interfaces, neighbour data, and orientations contains
+    # the following:
+    #    interface_info[1] = start node ID
+    #    interface_info[2] = end node ID
+    #    interface_info[3] = ID of the primary element
+    #    interface_info[4] = ID of the secondary element (if 0 then it is a physical boundary)
+    #    interface_info[5] = local side ID on the primary element
+    #    interface_info[6] = local side ID on the secondary element
+    # container to for the interface neighbour information and connectivity
+    n_boundaries = 0
+    for j in 1:n_surfaces
+        current_line = split(file_lines[file_idx])
+        interface_info[1, j] = parse(Int, current_line[1])
+        interface_info[2, j] = parse(Int, current_line[2])
+        interface_info[3, j] = parse(Int, current_line[3])
+        interface_info[4, j] = parse(Int, current_line[4])
+        interface_info[5, j] = parse(Int, current_line[5])
+        interface_info[6, j] = parse(Int, current_line[6])
+
+        # count the number of physical boundaries
+        if interface_info[4, j] == 0
+            n_boundaries += 1
+        end
+        file_idx += 1
     end
-    # pull the information to check if boundary is curved in order to read in additional data
-    file_idx += 1
-    current_line = split(file_lines[file_idx])
-    curved_check[1] = parse(Int, current_line[1])
-    curved_check[2] = parse(Int, current_line[2])
-    curved_check[3] = parse(Int, current_line[3])
-    curved_check[4] = parse(Int, current_line[4])
-    if sum(curved_check) == 0
-      # quadrilateral element is straight sided
-      element_is_curved[j] = false
-      file_idx  += 1
-      # read all the boundary names
-      boundary_names[:, j] = map(Symbol, split(file_lines[file_idx]))
-    else
-      # quadrilateral element has at least one curved side
-      element_is_curved[j] = true
-
-      # flip node ordering to make sure the element is right-handed for the interpolations
-      m1 = 1
-      m2 = 2
-      @views quad_corners_flipped[1, :] .= quad_corners[4, :]
-      @views quad_corners_flipped[2, :] .= quad_corners[2, :]
-      @views quad_corners_flipped[3, :] .= quad_corners[3, :]
-      @views quad_corners_flipped[4, :] .= quad_corners[1, :]
-      for i in 1:4
-        if curved_check[i] == 0
-          # when curved_check[i] is 0 then the "curve" from corner `i` to corner `i+1` is a
-          # straight line. So we must construct the interpolant for this line
-          for k in 1:mesh_nnodes
-            curve_values[k, 1] = linear_interpolate(cheby_nodes[k], quad_corners_flipped[m1, 1], quad_corners_flipped[m2, 1])
-            curve_values[k, 2] = linear_interpolate(cheby_nodes[k], quad_corners_flipped[m1, 2], quad_corners_flipped[m2, 2])
-          end
-        else
-          # when curved_check[i] is 1 this curved boundary information is supplied by the mesh
-          # generator. So we just read it into a work array
-          for k in 1:mesh_nnodes
-            file_idx += 1
-            current_line = split(file_lines[file_idx])
-            curve_values[k, 1] = parse(RealT,current_line[1])
-            curve_values[k, 2] = parse(RealT,current_line[2])
-          end
+
+    # work arrays to pull to correct corners of a given element (agnostic to curvature) and local
+    # copies of the curved boundary information
+
+    # readin an store the curved boundary information of the elements
+
+    for j in 1:n_elements
+        # pull the corner node IDs
+        current_line = split(file_lines[file_idx])
+        element_node_ids[1, j] = parse(Int, current_line[1])
+        element_node_ids[2, j] = parse(Int, current_line[2])
+        element_node_ids[3, j] = parse(Int, current_line[3])
+        element_node_ids[4, j] = parse(Int, current_line[4])
+        for i in 1:4
+            # pull the (x,y) values of these corners out of the nodes array
+            quad_corners[i, :] .= corner_nodes[:, element_node_ids[i, j]]
         end
-        # construct the curve interpolant for the current side
-        surface_curves[i, j] = CurvedSurfaceT(cheby_nodes, bary_weights, copy(curve_values))
-        # indexing update that contains a "flip" to ensure correct element orientation
-        # if we need to construct the straight line "curves" when curved_check[i] == 0
-        m1 += 1
-        if i == 3
-          m2 = 1
+        # pull the information to check if boundary is curved in order to read in additional data
+        file_idx += 1
+        current_line = split(file_lines[file_idx])
+        curved_check[1] = parse(Int, current_line[1])
+        curved_check[2] = parse(Int, current_line[2])
+        curved_check[3] = parse(Int, current_line[3])
+        curved_check[4] = parse(Int, current_line[4])
+        if sum(curved_check) == 0
+            # quadrilateral element is straight sided
+            element_is_curved[j] = false
+            file_idx += 1
+            # read all the boundary names
+            boundary_names[:, j] = map(Symbol, split(file_lines[file_idx]))
         else
-          m2 += 1
+            # quadrilateral element has at least one curved side
+            element_is_curved[j] = true
+
+            # flip node ordering to make sure the element is right-handed for the interpolations
+            m1 = 1
+            m2 = 2
+            @views quad_corners_flipped[1, :] .= quad_corners[4, :]
+            @views quad_corners_flipped[2, :] .= quad_corners[2, :]
+            @views quad_corners_flipped[3, :] .= quad_corners[3, :]
+            @views quad_corners_flipped[4, :] .= quad_corners[1, :]
+            for i in 1:4
+                if curved_check[i] == 0
+                    # when curved_check[i] is 0 then the "curve" from corner `i` to corner `i+1` is a
+                    # straight line. So we must construct the interpolant for this line
+                    for k in 1:mesh_nnodes
+                        curve_values[k, 1] = linear_interpolate(cheby_nodes[k],
+                                                                quad_corners_flipped[m1,
+                                                                                     1],
+                                                                quad_corners_flipped[m2,
+                                                                                     1])
+                        curve_values[k, 2] = linear_interpolate(cheby_nodes[k],
+                                                                quad_corners_flipped[m1,
+                                                                                     2],
+                                                                quad_corners_flipped[m2,
+                                                                                     2])
+                    end
+                else
+                    # when curved_check[i] is 1 this curved boundary information is supplied by the mesh
+                    # generator. So we just read it into a work array
+                    for k in 1:mesh_nnodes
+                        file_idx += 1
+                        current_line = split(file_lines[file_idx])
+                        curve_values[k, 1] = parse(RealT, current_line[1])
+                        curve_values[k, 2] = parse(RealT, current_line[2])
+                    end
+                end
+                # construct the curve interpolant for the current side
+                surface_curves[i, j] = CurvedSurfaceT(cheby_nodes, bary_weights,
+                                                      copy(curve_values))
+                # indexing update that contains a "flip" to ensure correct element orientation
+                # if we need to construct the straight line "curves" when curved_check[i] == 0
+                m1 += 1
+                if i == 3
+                    m2 = 1
+                else
+                    m2 += 1
+                end
+            end
+            # finally read in the boundary names where "---" means an internal connection
+            file_idx += 1
+            boundary_names[:, j] = map(Symbol, split(file_lines[file_idx]))
         end
-      end
-      # finally read in the boundary names where "---" means an internal connection
-      file_idx  += 1
-      boundary_names[:, j] = map(Symbol, split(file_lines[file_idx]))
+        # one last increment to the global index to read the next piece of element information
+        file_idx += 1
     end
-    # one last increment to the global index to read the next piece of element information
-    file_idx += 1
-  end
 
-  return n_boundaries
+    return n_boundaries
 end
 
 @inline Base.ndims(::UnstructuredMesh2D) = 2
@@ -230,24 +246,27 @@ isperiodic(mesh::UnstructuredMesh2D) = mesh.periodicity
 
 Base.length(mesh::UnstructuredMesh2D) = mesh.n_elements
 
-
-function Base.show(io::IO, ::UnstructuredMesh2D{RealT, CurvedSurfaceT}) where {RealT, CurvedSurfaceT}
-  print(io, "UnstructuredMesh2D{2, ", RealT, ", ", CurvedSurfaceT, "}")
+function Base.show(io::IO,
+                   ::UnstructuredMesh2D{RealT, CurvedSurfaceT}) where {RealT,
+                                                                       CurvedSurfaceT}
+    print(io, "UnstructuredMesh2D{2, ", RealT, ", ", CurvedSurfaceT, "}")
 end
 
-
-function Base.show(io::IO, ::MIME"text/plain", mesh::UnstructuredMesh2D{RealT, CurvedSurfaceT}) where {RealT, CurvedSurfaceT}
-  if get(io, :compact, false)
-    show(io, mesh)
-  else
-    summary_header(io, "UnstructuredMesh2D{" * string(2) * ", " * string(RealT) * ", " * string(CurvedSurfaceT) * "}")
-    summary_line(io, "mesh file", mesh.filename)
-    summary_line(io, "number of elements", length(mesh))
-    summary_line(io, "faces", mesh.n_surfaces)
-    summary_line(io, "mesh polynomial degree", mesh.polydeg)
-    summary_footer(io)
-  end
+function Base.show(io::IO, ::MIME"text/plain",
+                   mesh::UnstructuredMesh2D{RealT, CurvedSurfaceT}) where {RealT,
+                                                                           CurvedSurfaceT
+                                                                           }
+    if get(io, :compact, false)
+        show(io, mesh)
+    else
+        summary_header(io,
+                       "UnstructuredMesh2D{" * string(2) * ", " * string(RealT) * ", " *
+                       string(CurvedSurfaceT) * "}")
+        summary_line(io, "mesh file", mesh.filename)
+        summary_line(io, "number of elements", length(mesh))
+        summary_line(io, "faces", mesh.n_surfaces)
+        summary_line(io, "mesh polynomial degree", mesh.polydeg)
+        summary_footer(io)
+    end
 end
-
-
 end # @muladd
diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl
index ec4c33c5628..8fef66d261e 100644
--- a/src/semidiscretization/semidiscretization.jl
+++ b/src/semidiscretization/semidiscretization.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     ndofs(semi::AbstractSemidiscretization)
@@ -11,11 +11,10 @@
 Return the number of degrees of freedom associated with each scalar variable.
 """
 @inline function ndofs(semi::AbstractSemidiscretization)
-  mesh, _, solver, cache = mesh_equations_solver_cache(semi)
-  ndofs(mesh, solver, cache)
+    mesh, _, solver, cache = mesh_equations_solver_cache(semi)
+    ndofs(mesh, solver, cache)
 end
 
-
 """
     integrate_via_indices(func, u_ode, semi::AbstractSemidiscretization, args...; normalize=true)
 
@@ -24,11 +23,13 @@ and integrate the result using a quadrature associated with the semidiscretizati
 
 If `normalize` is true, the result is divided by the total volume of the computational domain.
 """
-function integrate_via_indices(func::Func, u_ode, semi::AbstractSemidiscretization, args...; normalize=true) where {Func}
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+function integrate_via_indices(func::Func, u_ode, semi::AbstractSemidiscretization,
+                               args...; normalize = true) where {Func}
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
 
-  u = wrap_array(u_ode, mesh, equations, solver, cache)
-  integrate_via_indices(func, u, mesh, equations, solver, cache, args..., normalize=normalize)
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
+    integrate_via_indices(func, u, mesh, equations, solver, cache, args...,
+                          normalize = normalize)
 end
 
 """
@@ -39,18 +40,18 @@ and integrate the result using a quadrature associated with the semidiscretizati
 
 If `normalize` is true, the result is divided by the total volume of the computational domain.
 """
-function integrate(func::Func, u_ode, semi::AbstractSemidiscretization; normalize=true) where {Func}
-  mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+function integrate(func::Func, u_ode, semi::AbstractSemidiscretization;
+                   normalize = true) where {Func}
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
 
-  u = wrap_array(u_ode, mesh, equations, solver, cache)
-  integrate(func, u, mesh, equations, solver, cache, normalize=normalize)
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
+    integrate(func, u, mesh, equations, solver, cache, normalize = normalize)
 end
 
-function integrate(u, semi::AbstractSemidiscretization; normalize=true)
-  integrate(cons2cons, u, semi; normalize=normalize)
+function integrate(u, semi::AbstractSemidiscretization; normalize = true)
+    integrate(cons2cons, u, semi; normalize = normalize)
 end
 
-
 """
     calc_error_norms([func=(u_node,equations)->u_node,] u_ode, t, analyzer, semi::AbstractSemidiscretization, cache_analysis)
 
@@ -58,8 +59,10 @@ Calculate discrete L2 and L∞ error norms of `func` applied to each nodal varia
 If no exact solution is available, "errors" are calculated using some reference state and can be useful
 for regression tests.
 """
-calc_error_norms(u_ode, t, analyzer, semi::AbstractSemidiscretization, cache_analysis) = calc_error_norms(cons2cons, u_ode, t, analyzer, semi, cache_analysis)
-
+function calc_error_norms(u_ode, t, analyzer, semi::AbstractSemidiscretization,
+                          cache_analysis)
+    calc_error_norms(cons2cons, u_ode, t, analyzer, semi, cache_analysis)
+end
 
 """
     semidiscretize(semi::AbstractSemidiscretization, tspan)
@@ -68,15 +71,14 @@ Wrap the semidiscretization `semi` as an ODE problem in the time interval `tspan
 that can be passed to `solve` from the [SciML ecosystem](https://diffeq.sciml.ai/latest/).
 """
 function semidiscretize(semi::AbstractSemidiscretization, tspan)
-  u0_ode = compute_coefficients(first(tspan), semi)
-  # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using
-  #       mpi_isparallel() && MPI.Barrier(mpi_comm())
-  #       See https://github.com/trixi-framework/Trixi.jl/issues/328
-  iip = true # is-inplace, i.e., we modify a vector when calling rhs!
-  return ODEProblem{iip}(rhs!, u0_ode, tspan, semi)
+    u0_ode = compute_coefficients(first(tspan), semi)
+    # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using
+    #       mpi_isparallel() && MPI.Barrier(mpi_comm())
+    #       See https://github.com/trixi-framework/Trixi.jl/issues/328
+    iip = true # is-inplace, i.e., we modify a vector when calling rhs!
+    return ODEProblem{iip}(rhs!, u0_ode, tspan, semi)
 end
 
-
 """
     semidiscretize(semi::AbstractSemidiscretization, tspan, restart_file::AbstractString)
 
@@ -84,16 +86,16 @@ Wrap the semidiscretization `semi` as an ODE problem in the time interval `tspan
 that can be passed to `solve` from the [SciML ecosystem](https://diffeq.sciml.ai/latest/).
 The initial condition etc. is taken from the `restart_file`.
 """
-function semidiscretize(semi::AbstractSemidiscretization, tspan, restart_file::AbstractString)
-  u0_ode = load_restart_file(semi, restart_file)
-  # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using
-  #       mpi_isparallel() && MPI.Barrier(mpi_comm())
-  #       See https://github.com/trixi-framework/Trixi.jl/issues/328
-  iip = true # is-inplace, i.e., we modify a vector when calling rhs!
-  return ODEProblem{iip}(rhs!, u0_ode, tspan, semi)
+function semidiscretize(semi::AbstractSemidiscretization, tspan,
+                        restart_file::AbstractString)
+    u0_ode = load_restart_file(semi, restart_file)
+    # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using
+    #       mpi_isparallel() && MPI.Barrier(mpi_comm())
+    #       See https://github.com/trixi-framework/Trixi.jl/issues/328
+    iip = true # is-inplace, i.e., we modify a vector when calling rhs!
+    return ODEProblem{iip}(rhs!, u0_ode, tspan, semi)
 end
 
-
 """
     compute_coefficients(func, t, semi::AbstractSemidiscretization)
 
@@ -109,10 +111,10 @@ For semidiscretizations `semi` associated with an initial condition, `func` can
 to use the given initial condition at time `t`.
 """
 function compute_coefficients(func, t, semi::AbstractSemidiscretization)
-  u_ode = allocate_coefficients(mesh_equations_solver_cache(semi)...)
-  # Call `compute_coefficients` defined below
-  compute_coefficients!(u_ode, func, t, semi)
-  return u_ode
+    u_ode = allocate_coefficients(mesh_equations_solver_cache(semi)...)
+    # Call `compute_coefficients` defined below
+    compute_coefficients!(u_ode, func, t, semi)
+    return u_ode
 end
 
 """
@@ -121,12 +123,11 @@ end
 Same as [`compute_coefficients`](@ref) but stores the result in `u_ode`.
 """
 function compute_coefficients!(u_ode, func, t, semi::AbstractSemidiscretization)
-  u = wrap_array(u_ode, semi)
-  # Call `compute_coefficients` defined by the solver
-  compute_coefficients!(u, func, t, mesh_equations_solver_cache(semi)...)
+    u = wrap_array(u_ode, semi)
+    # Call `compute_coefficients` defined by the solver
+    compute_coefficients!(u, func, t, mesh_equations_solver_cache(semi)...)
 end
 
-
 """
     linear_structure(semi::AbstractSemidiscretization;
                      t0=zero(real(semi)))
@@ -136,31 +137,30 @@ at time `t0` as an affine-linear operator given by a linear operator `A`
 and a vector `b`.
 """
 function linear_structure(semi::AbstractSemidiscretization;
-                          t0=zero(real(semi)))
-  # allocate memory
-  u_ode = allocate_coefficients(mesh_equations_solver_cache(semi)...)
-  du_ode = similar(u_ode)
-
-  # get the right hand side from possible source terms
-  u_ode .= zero(eltype(u_ode))
-  rhs!(du_ode, u_ode, semi, t0)
-  # Create a copy of `b` used internally to extract the linear part of `semi`.
-  # This is necessary to get everything correct when the users updates the
-  # returned vector `b`.
-  b = -du_ode
-  b_tmp = copy(b)
-
-  # wrap the linear operator
-  A = LinearMap(length(u_ode), ismutating=true) do dest,src
-    rhs!(dest, src, semi, t0)
-    @. dest += b_tmp
-    dest
-  end
-
-  return A, b
+                          t0 = zero(real(semi)))
+    # allocate memory
+    u_ode = allocate_coefficients(mesh_equations_solver_cache(semi)...)
+    du_ode = similar(u_ode)
+
+    # get the right hand side from possible source terms
+    u_ode .= zero(eltype(u_ode))
+    rhs!(du_ode, u_ode, semi, t0)
+    # Create a copy of `b` used internally to extract the linear part of `semi`.
+    # This is necessary to get everything correct when the users updates the
+    # returned vector `b`.
+    b = -du_ode
+    b_tmp = copy(b)
+
+    # wrap the linear operator
+    A = LinearMap(length(u_ode), ismutating = true) do dest, src
+        rhs!(dest, src, semi, t0)
+        @. dest += b_tmp
+        dest
+    end
+
+    return A, b
 end
 
-
 """
     jacobian_fd(semi::AbstractSemidiscretization;
                 t0=zero(real(semi)),
@@ -171,44 +171,43 @@ and simple second order finite difference to compute the Jacobian `J`
 of the semidiscretization `semi` at state `u0_ode`.
 """
 function jacobian_fd(semi::AbstractSemidiscretization;
-                     t0=zero(real(semi)),
-                     u0_ode=compute_coefficients(t0, semi))
-  # copy the initial state since it will be modified in the following
-  u_ode = copy(u0_ode)
-  du0_ode = similar(u_ode)
-  dup_ode = similar(u_ode)
-  dum_ode = similar(u_ode)
+                     t0 = zero(real(semi)),
+                     u0_ode = compute_coefficients(t0, semi))
+    # copy the initial state since it will be modified in the following
+    u_ode = copy(u0_ode)
+    du0_ode = similar(u_ode)
+    dup_ode = similar(u_ode)
+    dum_ode = similar(u_ode)
 
-  # compute residual of linearization state
-  rhs!(du0_ode, u_ode, semi, t0)
+    # compute residual of linearization state
+    rhs!(du0_ode, u_ode, semi, t0)
 
-  # initialize Jacobian matrix
-  J = zeros(eltype(u_ode), length(u_ode), length(u_ode))
+    # initialize Jacobian matrix
+    J = zeros(eltype(u_ode), length(u_ode), length(u_ode))
 
-  # use second order finite difference to estimate Jacobian matrix
-  for idx in eachindex(u0_ode)
-    # determine size of fluctuation
-    epsilon = sqrt(eps(u0_ode[idx]))
+    # use second order finite difference to estimate Jacobian matrix
+    for idx in eachindex(u0_ode)
+        # determine size of fluctuation
+        epsilon = sqrt(eps(u0_ode[idx]))
 
-    # plus fluctuation
-    u_ode[idx] = u0_ode[idx] + epsilon
-    rhs!(dup_ode, u_ode, semi, t0)
+        # plus fluctuation
+        u_ode[idx] = u0_ode[idx] + epsilon
+        rhs!(dup_ode, u_ode, semi, t0)
 
-    # minus fluctuation
-    u_ode[idx] = u0_ode[idx] - epsilon
-    rhs!(dum_ode, u_ode, semi, t0)
+        # minus fluctuation
+        u_ode[idx] = u0_ode[idx] - epsilon
+        rhs!(dum_ode, u_ode, semi, t0)
 
-    # restore linearisation state
-    u_ode[idx] = u0_ode[idx]
+        # restore linearisation state
+        u_ode[idx] = u0_ode[idx]
 
-    # central second order finite difference
-    @. J[:, idx] = (dup_ode - dum_ode) / (2 * epsilon)
-  end
+        # central second order finite difference
+        @. J[:, idx] = (dup_ode - dum_ode) / (2 * epsilon)
+    end
 
-  return J
+    return J
 end
 
-
 """
     jacobian_ad_forward(semi::AbstractSemidiscretization;
                         t0=zero(real(semi)),
@@ -219,98 +218,105 @@ and forward mode automatic differentiation to compute the Jacobian `J`
 of the semidiscretization `semi` at state `u0_ode`.
 """
 function jacobian_ad_forward(semi::AbstractSemidiscretization;
-                             t0=zero(real(semi)),
-                             u0_ode=compute_coefficients(t0, semi))
-  jacobian_ad_forward(semi, t0, u0_ode)
+                             t0 = zero(real(semi)),
+                             u0_ode = compute_coefficients(t0, semi))
+    jacobian_ad_forward(semi, t0, u0_ode)
 end
 
 # The following version is for plain arrays
 function jacobian_ad_forward(semi::AbstractSemidiscretization, t0, u0_ode)
-  du_ode = similar(u0_ode)
-  config = ForwardDiff.JacobianConfig(nothing, du_ode, u0_ode)
+    du_ode = similar(u0_ode)
+    config = ForwardDiff.JacobianConfig(nothing, du_ode, u0_ode)
 
-  # Use a function barrier since the generation of the `config` we use above
-  # is not type-stable
-  _jacobian_ad_forward(semi, t0, u0_ode, du_ode, config)
+    # Use a function barrier since the generation of the `config` we use above
+    # is not type-stable
+    _jacobian_ad_forward(semi, t0, u0_ode, du_ode, config)
 end
 
 function _jacobian_ad_forward(semi, t0, u0_ode, du_ode, config)
+    new_semi = remake(semi, uEltype = eltype(config))
+    J = ForwardDiff.jacobian(du_ode, u0_ode, config) do du_ode, u_ode
+        Trixi.rhs!(du_ode, u_ode, new_semi, t0)
+    end
 
-  new_semi = remake(semi, uEltype=eltype(config))
-  J = ForwardDiff.jacobian(du_ode, u0_ode, config) do du_ode, u_ode
-    Trixi.rhs!(du_ode, u_ode, new_semi, t0)
-  end
-
-  return J
+    return J
 end
 
 # This version is specialized to `StructArray`s used by some `DGMulti` solvers.
 # We need to convert the numerical solution vectors since ForwardDiff cannot
 # handle arrays of `SVector`s.
 function jacobian_ad_forward(semi::AbstractSemidiscretization, t0, _u0_ode::StructArray)
-  u0_ode_plain = similar(_u0_ode, eltype(eltype(_u0_ode)), (size(_u0_ode)..., nvariables(semi)))
-  for (v, u_v) in enumerate(StructArrays.components(_u0_ode))
-    u0_ode_plain[.., v] = u_v
-  end
-  du_ode_plain = similar(u0_ode_plain)
-  config = ForwardDiff.JacobianConfig(nothing, du_ode_plain, u0_ode_plain)
-
-  # Use a function barrier since the generation of the `config` we use above
-  # is not type-stable
-  _jacobian_ad_forward_structarrays(semi, t0, u0_ode_plain, du_ode_plain, config)
+    u0_ode_plain = similar(_u0_ode, eltype(eltype(_u0_ode)),
+                           (size(_u0_ode)..., nvariables(semi)))
+    for (v, u_v) in enumerate(StructArrays.components(_u0_ode))
+        u0_ode_plain[.., v] = u_v
+    end
+    du_ode_plain = similar(u0_ode_plain)
+    config = ForwardDiff.JacobianConfig(nothing, du_ode_plain, u0_ode_plain)
+
+    # Use a function barrier since the generation of the `config` we use above
+    # is not type-stable
+    _jacobian_ad_forward_structarrays(semi, t0, u0_ode_plain, du_ode_plain, config)
 end
 
 function _jacobian_ad_forward_structarrays(semi, t0, u0_ode_plain, du_ode_plain, config)
-
-  new_semi = remake(semi, uEltype=eltype(config))
-  J = ForwardDiff.jacobian(du_ode_plain, u0_ode_plain, config) do du_ode_plain, u_ode_plain
-    u_ode  = StructArray{SVector{nvariables(semi), eltype(config)}}(ntuple(v -> view(u_ode_plain,  :, :, v), nvariables(semi)))
-    du_ode = StructArray{SVector{nvariables(semi), eltype(config)}}(ntuple(v -> view(du_ode_plain, :, :, v), nvariables(semi)))
-    Trixi.rhs!(du_ode, u_ode, new_semi, t0)
-  end
-
-  return J
+    new_semi = remake(semi, uEltype = eltype(config))
+    J = ForwardDiff.jacobian(du_ode_plain, u0_ode_plain,
+                             config) do du_ode_plain, u_ode_plain
+        u_ode = StructArray{SVector{nvariables(semi), eltype(config)}}(ntuple(v -> view(u_ode_plain,
+                                                                                        :,
+                                                                                        :,
+                                                                                        v),
+                                                                              nvariables(semi)))
+        du_ode = StructArray{SVector{nvariables(semi), eltype(config)}}(ntuple(v -> view(du_ode_plain,
+                                                                                         :,
+                                                                                         :,
+                                                                                         v),
+                                                                               nvariables(semi)))
+        Trixi.rhs!(du_ode, u_ode, new_semi, t0)
+    end
+
+    return J
 end
 
 # This version is specialized to arrays of `StaticArray`s used by some `DGMulti` solvers.
 # We need to convert the numerical solution vectors since ForwardDiff cannot
 # handle arrays of `SVector`s.
-function jacobian_ad_forward(semi::AbstractSemidiscretization, t0, _u0_ode::AbstractArray{<:SVector})
-  u0_ode_plain = reinterpret(eltype(eltype(_u0_ode)), _u0_ode)
-  du_ode_plain = similar(u0_ode_plain)
-  config = ForwardDiff.JacobianConfig(nothing, du_ode_plain, u0_ode_plain)
-
-  # Use a function barrier since the generation of the `config` we use above
-  # is not type-stable
-  _jacobian_ad_forward_staticarrays(semi, t0, u0_ode_plain, du_ode_plain, config)
+function jacobian_ad_forward(semi::AbstractSemidiscretization, t0,
+                             _u0_ode::AbstractArray{<:SVector})
+    u0_ode_plain = reinterpret(eltype(eltype(_u0_ode)), _u0_ode)
+    du_ode_plain = similar(u0_ode_plain)
+    config = ForwardDiff.JacobianConfig(nothing, du_ode_plain, u0_ode_plain)
+
+    # Use a function barrier since the generation of the `config` we use above
+    # is not type-stable
+    _jacobian_ad_forward_staticarrays(semi, t0, u0_ode_plain, du_ode_plain, config)
 end
 
 function _jacobian_ad_forward_staticarrays(semi, t0, u0_ode_plain, du_ode_plain, config)
-
-  new_semi = remake(semi, uEltype=eltype(config))
-  J = ForwardDiff.jacobian(du_ode_plain, u0_ode_plain, config) do du_ode_plain, u_ode_plain
-    u_ode  = reinterpret(SVector{nvariables(semi), eltype(config)}, u_ode_plain)
-    du_ode = reinterpret(SVector{nvariables(semi), eltype(config)}, du_ode_plain)
-    Trixi.rhs!(du_ode, u_ode, new_semi, t0)
-  end
-
-  return J
+    new_semi = remake(semi, uEltype = eltype(config))
+    J = ForwardDiff.jacobian(du_ode_plain, u0_ode_plain,
+                             config) do du_ode_plain, u_ode_plain
+        u_ode = reinterpret(SVector{nvariables(semi), eltype(config)}, u_ode_plain)
+        du_ode = reinterpret(SVector{nvariables(semi), eltype(config)}, du_ode_plain)
+        Trixi.rhs!(du_ode, u_ode, new_semi, t0)
+    end
+
+    return J
 end
 
-
-
 # Sometimes, it can be useful to save some (scalar) variables associated with each element,
 # e.g. AMR indicators or shock indicators. Since these usually have to be re-computed
 # directly before IO and do not necessarily need to be stored in memory before,
 #   get_element_variables!(element_variables, ..)
 # is used to retrieve such up to date element variables, modifying
 # `element_variables::Dict{Symbol,Any}` in place.
-function get_element_variables!(element_variables, u_ode, semi::AbstractSemidiscretization)
-  u = wrap_array(u_ode, semi)
-  get_element_variables!(element_variables, u, mesh_equations_solver_cache(semi)...)
+function get_element_variables!(element_variables, u_ode,
+                                semi::AbstractSemidiscretization)
+    u = wrap_array(u_ode, semi)
+    get_element_variables!(element_variables, u, mesh_equations_solver_cache(semi)...)
 end
 
-
 # To implement AMR and use OrdinaryDiffEq.jl etc., we have to be a bit creative.
 # Since the caches of the SciML ecosystem are immutable structs, we cannot simply
 # change the underlying arrays therein. Hence, to support changing the number of
@@ -351,17 +357,15 @@ end
 #
 # Xref https://github.com/SciML/OrdinaryDiffEq.jl/pull/1275
 function wrap_array(u_ode, semi::AbstractSemidiscretization)
-  wrap_array(u_ode, mesh_equations_solver_cache(semi)...)
+    wrap_array(u_ode, mesh_equations_solver_cache(semi)...)
 end
 
 # Like `wrap_array`, but guarantees to return a plain `Array`, which can be better
 # for writing solution files etc.
 function wrap_array_native(u_ode, semi::AbstractSemidiscretization)
-  wrap_array_native(u_ode, mesh_equations_solver_cache(semi)...)
+    wrap_array_native(u_ode, mesh_equations_solver_cache(semi)...)
 end
 
-
-
 # TODO: Taal, document interface?
 # New mesh/solver combinations have to implement
 # - ndofs(mesh, solver, cache)
@@ -378,5 +382,4 @@ end
 # - rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache)
 #
 
-
 end # @muladd
diff --git a/src/semidiscretization/semidiscretization_euler_acoustics.jl b/src/semidiscretization/semidiscretization_euler_acoustics.jl
index c98847f7bfc..7608998c557 100644
--- a/src/semidiscretization/semidiscretization_euler_acoustics.jl
+++ b/src/semidiscretization/semidiscretization_euler_acoustics.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     SemidiscretizationEulerAcoustics(semi_acoustics::SemiAcoustics, semi_euler::SemiEuler;
@@ -21,176 +21,193 @@ is described by a function `source_region` that maps the coordinates of a single
 Note that this semidiscretization should be used in conjunction with
 [`EulerAcousticsCouplingCallback`](@ref) and only works in two dimensions.
 """
-struct SemidiscretizationEulerAcoustics{SemiAcoustics, SemiEuler, Cache} <: AbstractSemidiscretization
-  semi_acoustics::SemiAcoustics
-  semi_euler::SemiEuler
-  performance_counter::PerformanceCounter
-  cache::Cache
-
-  function SemidiscretizationEulerAcoustics{SemiAcoustics, SemiEuler, Cache}(
-      semi_acoustics, semi_euler, cache) where {SemiAcoustics, SemiEuler, Cache}
-
-    # Currently both semidiscretizations need to use a shared mesh
-    @assert semi_acoustics.mesh == semi_euler.mesh
-
-    # Check if both solvers use the same polynomial basis
-    @assert semi_acoustics.solver.basis == semi_euler.solver.basis
-
-    performance_counter = PerformanceCounter()
-    new(semi_acoustics, semi_euler, performance_counter, cache)
-  end
+struct SemidiscretizationEulerAcoustics{SemiAcoustics, SemiEuler, Cache} <:
+       AbstractSemidiscretization
+    semi_acoustics::SemiAcoustics
+    semi_euler::SemiEuler
+    performance_counter::PerformanceCounter
+    cache::Cache
+
+    function SemidiscretizationEulerAcoustics{SemiAcoustics, SemiEuler, Cache}(semi_acoustics,
+                                                                               semi_euler,
+                                                                               cache) where {
+                                                                                             SemiAcoustics,
+                                                                                             SemiEuler,
+                                                                                             Cache
+                                                                                             }
+
+        # Currently both semidiscretizations need to use a shared mesh
+        @assert semi_acoustics.mesh == semi_euler.mesh
+
+        # Check if both solvers use the same polynomial basis
+        @assert semi_acoustics.solver.basis == semi_euler.solver.basis
+
+        performance_counter = PerformanceCounter()
+        new(semi_acoustics, semi_euler, performance_counter, cache)
+    end
 end
 
-
-function SemidiscretizationEulerAcoustics(semi_acoustics::SemiAcoustics, semi_euler::SemiEuler;
-                                          source_region=x->true, weights=x->1.0) where
-    {Mesh, SemiAcoustics<:SemidiscretizationHyperbolic{Mesh, <:AbstractAcousticPerturbationEquations},
-     SemiEuler<:SemidiscretizationHyperbolic{Mesh, <:AbstractCompressibleEulerEquations}}
-
-  cache = create_cache(SemidiscretizationEulerAcoustics, source_region, weights,
-                       mesh_equations_solver_cache(semi_acoustics)...)
-
-  return SemidiscretizationEulerAcoustics{typeof(semi_acoustics), typeof(semi_euler), typeof(cache)}(
-    semi_acoustics, semi_euler, cache)
+function SemidiscretizationEulerAcoustics(semi_acoustics::SemiAcoustics,
+                                          semi_euler::SemiEuler;
+                                          source_region = x -> true,
+                                          weights = x -> 1.0) where
+    {Mesh,
+     SemiAcoustics <:
+     SemidiscretizationHyperbolic{Mesh, <:AbstractAcousticPerturbationEquations},
+     SemiEuler <:
+     SemidiscretizationHyperbolic{Mesh, <:AbstractCompressibleEulerEquations}}
+    cache = create_cache(SemidiscretizationEulerAcoustics, source_region, weights,
+                         mesh_equations_solver_cache(semi_acoustics)...)
+
+    return SemidiscretizationEulerAcoustics{typeof(semi_acoustics), typeof(semi_euler),
+                                            typeof(cache)}(semi_acoustics, semi_euler,
+                                                           cache)
 end
 
 function create_cache(::Type{SemidiscretizationEulerAcoustics}, source_region, weights,
-                      mesh, equations::AcousticPerturbationEquations2D, dg::DGSEM, cache)
+                      mesh, equations::AcousticPerturbationEquations2D, dg::DGSEM,
+                      cache)
+    coupled_element_ids = get_coupled_element_ids(source_region, equations, dg, cache)
 
-  coupled_element_ids = get_coupled_element_ids(source_region, equations, dg, cache)
+    acoustic_source_terms = zeros(eltype(cache.elements),
+                                  (ndims(equations), nnodes(dg), nnodes(dg),
+                                   length(coupled_element_ids)))
 
-  acoustic_source_terms = zeros(eltype(cache.elements), (ndims(equations), nnodes(dg), nnodes(dg),
-                                                         length(coupled_element_ids)))
+    acoustic_source_weights = precompute_weights(source_region, weights,
+                                                 coupled_element_ids,
+                                                 equations, dg, cache)
 
-  acoustic_source_weights = precompute_weights(source_region, weights, coupled_element_ids,
-                                               equations, dg, cache)
-
-  return (; acoustic_source_terms, acoustic_source_weights, coupled_element_ids)
+    return (; acoustic_source_terms, acoustic_source_weights, coupled_element_ids)
 end
 
 function get_coupled_element_ids(source_region, equations, dg::DGSEM, cache)
-  coupled_element_ids = Vector{Int}(undef, 0)
-
-  for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      x = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, element)
-      if source_region(x)
-        push!(coupled_element_ids, element)
-        break
-      end
+    coupled_element_ids = Vector{Int}(undef, 0)
+
+    for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            x = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j,
+                                element)
+            if source_region(x)
+                push!(coupled_element_ids, element)
+                break
+            end
+        end
     end
-  end
 
-  return coupled_element_ids
+    return coupled_element_ids
 end
 
-function precompute_weights(source_region, weights, coupled_element_ids, equations, dg::DGSEM, cache)
-  acoustic_source_weights = zeros(eltype(cache.elements),
-                                  (nnodes(dg), nnodes(dg), length(coupled_element_ids)))
-
-  @threaded for k in 1:length(coupled_element_ids)
-    element = coupled_element_ids[k]
-    for j in eachnode(dg), i in eachnode(dg)
-      x = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, element)
-      acoustic_source_weights[i, j, k] = source_region(x) ? weights(x) : zero(weights(x))
+function precompute_weights(source_region, weights, coupled_element_ids, equations,
+                            dg::DGSEM, cache)
+    acoustic_source_weights = zeros(eltype(cache.elements),
+                                    (nnodes(dg), nnodes(dg),
+                                     length(coupled_element_ids)))
+
+    @threaded for k in 1:length(coupled_element_ids)
+        element = coupled_element_ids[k]
+        for j in eachnode(dg), i in eachnode(dg)
+            x = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j,
+                                element)
+            acoustic_source_weights[i, j, k] = source_region(x) ? weights(x) :
+                                               zero(weights(x))
+        end
     end
-  end
 
-  return acoustic_source_weights
+    return acoustic_source_weights
 end
 
-
 function Base.show(io::IO, semi::SemidiscretizationEulerAcoustics)
-  @nospecialize semi # reduce precompilation time
-
-  print(io, "SemidiscretizationEulerAcoustics(")
-  print(io,       semi.semi_acoustics)
-  print(io, ", ", semi.semi_euler)
-  print(io, ", cache(")
-  for (idx, key) in enumerate(keys(semi.cache))
-    idx > 1 && print(io, " ")
-    print(io, key)
-  end
-  print(io, "))")
+    @nospecialize semi # reduce precompilation time
+
+    print(io, "SemidiscretizationEulerAcoustics(")
+    print(io, semi.semi_acoustics)
+    print(io, ", ", semi.semi_euler)
+    print(io, ", cache(")
+    for (idx, key) in enumerate(keys(semi.cache))
+        idx > 1 && print(io, " ")
+        print(io, key)
+    end
+    print(io, "))")
 end
 
-function Base.show(io::IO, mime::MIME"text/plain", semi::SemidiscretizationEulerAcoustics)
-  @nospecialize semi # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, semi)
-  else
-    summary_header(io, "SemidiscretizationEulerAcoustics")
-    summary_line(io, "semidiscretization Euler", semi.semi_euler |> typeof |> nameof)
-    show(increment_indent(io), mime, semi.semi_euler)
-    summary_line(io, "semidiscretization acoustics", semi.semi_acoustics |> typeof |> nameof)
-    show(increment_indent(io), mime, semi.semi_acoustics)
-    summary_footer(io)
-  end
+function Base.show(io::IO, mime::MIME"text/plain",
+                   semi::SemidiscretizationEulerAcoustics)
+    @nospecialize semi # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, semi)
+    else
+        summary_header(io, "SemidiscretizationEulerAcoustics")
+        summary_line(io, "semidiscretization Euler",
+                     semi.semi_euler |> typeof |> nameof)
+        show(increment_indent(io), mime, semi.semi_euler)
+        summary_line(io, "semidiscretization acoustics",
+                     semi.semi_acoustics |> typeof |> nameof)
+        show(increment_indent(io), mime, semi.semi_acoustics)
+        summary_footer(io)
+    end
 end
 
-
 # The acoustics semidiscretization is the main semidiscretization.
 @inline function mesh_equations_solver_cache(semi::SemidiscretizationEulerAcoustics)
-  return mesh_equations_solver_cache(semi.semi_acoustics)
+    return mesh_equations_solver_cache(semi.semi_acoustics)
 end
 
-
 @inline Base.ndims(semi::SemidiscretizationEulerAcoustics) = ndims(semi.semi_acoustics)
 @inline Base.real(semi::SemidiscretizationEulerAcoustics) = real(semi.semi_acoustics)
 
-
 # Computes the coefficients of the initial condition
 @inline function compute_coefficients(t, semi::SemidiscretizationEulerAcoustics)
-  compute_coefficients(t, semi.semi_acoustics)
+    compute_coefficients(t, semi.semi_acoustics)
 end
 
 @inline function compute_coefficients!(u_ode, t, semi::SemidiscretizationEulerAcoustics)
-  compute_coefficients!(u_ode, t, semi.semi_acoustics)
+    compute_coefficients!(u_ode, t, semi.semi_acoustics)
 end
 
-
-@inline function calc_error_norms(func, u, t, analyzer, semi::SemidiscretizationEulerAcoustics,
+@inline function calc_error_norms(func, u, t, analyzer,
+                                  semi::SemidiscretizationEulerAcoustics,
                                   cache_analysis)
-  calc_error_norms(func, u, t, analyzer, semi.semi_acoustics, cache_analysis)
+    calc_error_norms(func, u, t, analyzer, semi.semi_acoustics, cache_analysis)
 end
 
-
 function rhs!(du_ode, u_ode, semi::SemidiscretizationEulerAcoustics, t)
-  @unpack semi_acoustics, cache = semi
-  @unpack acoustic_source_terms, acoustic_source_weights, coupled_element_ids = cache
+    @unpack semi_acoustics, cache = semi
+    @unpack acoustic_source_terms, acoustic_source_weights, coupled_element_ids = cache
 
-  du_acoustics = wrap_array(du_ode, semi_acoustics)
+    du_acoustics = wrap_array(du_ode, semi_acoustics)
 
-  time_start = time_ns()
+    time_start = time_ns()
 
-  @trixi_timeit timer() "acoustics rhs!" rhs!(du_ode, u_ode, semi_acoustics, t)
+    @trixi_timeit timer() "acoustics rhs!" rhs!(du_ode, u_ode, semi_acoustics, t)
 
-  @trixi_timeit timer() "add acoustic source terms" add_acoustic_source_terms!(
-    du_acoustics, acoustic_source_terms, acoustic_source_weights, coupled_element_ids,
-    mesh_equations_solver_cache(semi_acoustics)...)
+    @trixi_timeit timer() "add acoustic source terms" begin
+        add_acoustic_source_terms!(du_acoustics, acoustic_source_terms,
+                                   acoustic_source_weights, coupled_element_ids,
+                                   mesh_equations_solver_cache(semi_acoustics)...)
+    end
 
-  runtime = time_ns() - time_start
-  put!(semi.performance_counter, runtime)
+    runtime = time_ns() - time_start
+    put!(semi.performance_counter, runtime)
 
-  return nothing
+    return nothing
 end
 
-
 function add_acoustic_source_terms!(du_acoustics, acoustic_source_terms, source_weights,
-                                    coupled_element_ids, mesh::TreeMesh{2}, equations, dg::DGSEM,
+                                    coupled_element_ids, mesh::TreeMesh{2}, equations,
+                                    dg::DGSEM,
                                     cache)
-
-  @threaded for k in 1:length(coupled_element_ids)
-    element = coupled_element_ids[k]
-
-    for j in eachnode(dg), i in eachnode(dg)
-      du_acoustics[1, i, j, element] += source_weights[i, j, k] * acoustic_source_terms[1, i, j, k]
-      du_acoustics[2, i, j, element] += source_weights[i, j, k] * acoustic_source_terms[2, i, j, k]
+    @threaded for k in 1:length(coupled_element_ids)
+        element = coupled_element_ids[k]
+
+        for j in eachnode(dg), i in eachnode(dg)
+            du_acoustics[1, i, j, element] += source_weights[i, j, k] *
+                                              acoustic_source_terms[1, i, j, k]
+            du_acoustics[2, i, j, element] += source_weights[i, j, k] *
+                                              acoustic_source_terms[2, i, j, k]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-end # @muladd
\ No newline at end of file
+end # @muladd
diff --git a/src/semidiscretization/semidiscretization_euler_gravity.jl b/src/semidiscretization/semidiscretization_euler_gravity.jl
index 1a8d7bfad9d..665f2be9bfa 100644
--- a/src/semidiscretization/semidiscretization_euler_gravity.jl
+++ b/src/semidiscretization/semidiscretization_euler_gravity.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     ParametersEulerGravity(; background_density=0.0,
@@ -15,54 +15,56 @@
 
 Set up parameters for the gravitational part of a [`SemidiscretizationEulerGravity`](@ref).
 """
-struct ParametersEulerGravity{RealT<:Real, TimestepGravity}
-  background_density    ::RealT # aka rho0
-  gravitational_constant::RealT # aka G
-  cfl                   ::RealT
-  resid_tol             ::RealT
-  n_iterations_max      ::Int
-  timestep_gravity::TimestepGravity
+struct ParametersEulerGravity{RealT <: Real, TimestepGravity}
+    background_density     :: RealT # aka rho0
+    gravitational_constant :: RealT # aka G
+    cfl                    :: RealT
+    resid_tol              :: RealT
+    n_iterations_max       :: Int
+    timestep_gravity       :: TimestepGravity
 end
 
-function ParametersEulerGravity(; background_density=0.0,
-                                  gravitational_constant=1.0,
-                                  cfl=1.0,
-                                  resid_tol=1.0e-4,
-                                  n_iterations_max=10^4,
-                                  timestep_gravity=timestep_gravity_erk52_3Sstar!)
-  background_density, gravitational_constant, cfl, resid_tol = promote(background_density, gravitational_constant, cfl, resid_tol)
-  ParametersEulerGravity(background_density, gravitational_constant, cfl, resid_tol, n_iterations_max, timestep_gravity)
+function ParametersEulerGravity(; background_density = 0.0,
+                                gravitational_constant = 1.0,
+                                cfl = 1.0,
+                                resid_tol = 1.0e-4,
+                                n_iterations_max = 10^4,
+                                timestep_gravity = timestep_gravity_erk52_3Sstar!)
+    background_density, gravitational_constant, cfl, resid_tol = promote(background_density,
+                                                                         gravitational_constant,
+                                                                         cfl, resid_tol)
+    ParametersEulerGravity(background_density, gravitational_constant, cfl, resid_tol,
+                           n_iterations_max, timestep_gravity)
 end
 
 function Base.show(io::IO, parameters::ParametersEulerGravity)
-  @nospecialize parameters # reduce precompilation time
-
-  print(io, "ParametersEulerGravity(")
-  print(io,   "background_density=", parameters.background_density)
-  print(io, ", gravitational_constant=", parameters.gravitational_constant)
-  print(io, ", cfl=", parameters.cfl)
-  print(io, ", n_iterations_max=", parameters.n_iterations_max)
-  print(io, ", timestep_gravity=", parameters.timestep_gravity)
-  print(io, ")")
+    @nospecialize parameters # reduce precompilation time
+
+    print(io, "ParametersEulerGravity(")
+    print(io, "background_density=", parameters.background_density)
+    print(io, ", gravitational_constant=", parameters.gravitational_constant)
+    print(io, ", cfl=", parameters.cfl)
+    print(io, ", n_iterations_max=", parameters.n_iterations_max)
+    print(io, ", timestep_gravity=", parameters.timestep_gravity)
+    print(io, ")")
 end
 function Base.show(io::IO, ::MIME"text/plain", parameters::ParametersEulerGravity)
-  @nospecialize parameters # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, parameters)
-  else
-    setup = [
-             "background density (ρ₀)" => parameters.background_density,
-             "gravitational constant (G)" => parameters.gravitational_constant,
-             "CFL (gravity)" => parameters.cfl,
-             "max. #iterations" => parameters.n_iterations_max,
-             "time integrator" => parameters.timestep_gravity,
-            ]
-    summary_box(io, "ParametersEulerGravity", setup)
-  end
+    @nospecialize parameters # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, parameters)
+    else
+        setup = [
+            "background density (ρ₀)" => parameters.background_density,
+            "gravitational constant (G)" => parameters.gravitational_constant,
+            "CFL (gravity)" => parameters.cfl,
+            "max. #iterations" => parameters.n_iterations_max,
+            "time integrator" => parameters.timestep_gravity,
+        ]
+        summary_box(io, "ParametersEulerGravity", setup)
+    end
 end
 
-
 """
     SemidiscretizationEulerGravity
 
@@ -75,27 +77,35 @@ the hyperblic diffusion equations.
   [arXiv: 2008.10593](https://arXiv.org/abs/2008.10593)
 """
 struct SemidiscretizationEulerGravity{SemiEuler, SemiGravity,
-                                      Parameters<:ParametersEulerGravity, Cache} <: AbstractSemidiscretization
-  semi_euler::SemiEuler
-  semi_gravity::SemiGravity
-  parameters::Parameters
-  performance_counter::PerformanceCounter
-  gravity_counter::PerformanceCounter
-  cache::Cache
-
-  function SemidiscretizationEulerGravity{SemiEuler, SemiGravity, Parameters, Cache}(
-      semi_euler::SemiEuler, semi_gravity::SemiGravity,
-      parameters::Parameters, cache::Cache) where {SemiEuler, SemiGravity,
-                                                   Parameters<:ParametersEulerGravity, Cache}
-    @assert ndims(semi_euler) == ndims(semi_gravity)
-    @assert typeof(semi_euler.mesh) == typeof(semi_gravity.mesh)
-    @assert polydeg(semi_euler.solver) == polydeg(semi_gravity.solver)
-
-    performance_counter = PerformanceCounter()
-    gravity_counter = PerformanceCounter()
-
-    new(semi_euler, semi_gravity, parameters, performance_counter, gravity_counter, cache)
-  end
+                                      Parameters <: ParametersEulerGravity, Cache} <:
+       AbstractSemidiscretization
+    semi_euler          :: SemiEuler
+    semi_gravity        :: SemiGravity
+    parameters          :: Parameters
+    performance_counter :: PerformanceCounter
+    gravity_counter     :: PerformanceCounter
+    cache               :: Cache
+
+    function SemidiscretizationEulerGravity{SemiEuler, SemiGravity, Parameters, Cache}(semi_euler::SemiEuler,
+                                                                                       semi_gravity::SemiGravity,
+                                                                                       parameters::Parameters,
+                                                                                       cache::Cache) where {
+                                                                                                            SemiEuler,
+                                                                                                            SemiGravity,
+                                                                                                            Parameters <:
+                                                                                                            ParametersEulerGravity,
+                                                                                                            Cache
+                                                                                                            }
+        @assert ndims(semi_euler) == ndims(semi_gravity)
+        @assert typeof(semi_euler.mesh) == typeof(semi_gravity.mesh)
+        @assert polydeg(semi_euler.solver) == polydeg(semi_gravity.solver)
+
+        performance_counter = PerformanceCounter()
+        gravity_counter = PerformanceCounter()
+
+        new(semi_euler, semi_gravity, parameters, performance_counter, gravity_counter,
+            cache)
+    end
 end
 
 """
@@ -104,346 +114,391 @@ end
 Construct a semidiscretization of the compressible Euler equations with self-gravity.
 `parameters` should be given as [`ParametersEulerGravity`](@ref).
 """
-function SemidiscretizationEulerGravity(semi_euler::SemiEuler, semi_gravity::SemiGravity, parameters) where
-    {Mesh, SemiEuler<:SemidiscretizationHyperbolic{Mesh, <:AbstractCompressibleEulerEquations},
-           SemiGravity<:SemidiscretizationHyperbolic{Mesh, <:AbstractHyperbolicDiffusionEquations}}
-
-  u_ode = compute_coefficients(zero(real(semi_gravity)), semi_gravity)
-  du_ode     = similar(u_ode)
-  u_tmp1_ode = similar(u_ode)
-  u_tmp2_ode = similar(u_ode)
-  cache = (; u_ode, du_ode, u_tmp1_ode, u_tmp2_ode)
-
-  SemidiscretizationEulerGravity{typeof(semi_euler), typeof(semi_gravity), typeof(parameters), typeof(cache)}(
-    semi_euler, semi_gravity, parameters, cache)
+function SemidiscretizationEulerGravity(semi_euler::SemiEuler,
+                                        semi_gravity::SemiGravity,
+                                        parameters) where
+    {Mesh,
+     SemiEuler <:
+     SemidiscretizationHyperbolic{Mesh, <:AbstractCompressibleEulerEquations},
+     SemiGravity <:
+     SemidiscretizationHyperbolic{Mesh, <:AbstractHyperbolicDiffusionEquations}}
+    u_ode = compute_coefficients(zero(real(semi_gravity)), semi_gravity)
+    du_ode = similar(u_ode)
+    u_tmp1_ode = similar(u_ode)
+    u_tmp2_ode = similar(u_ode)
+    cache = (; u_ode, du_ode, u_tmp1_ode, u_tmp2_ode)
+
+    SemidiscretizationEulerGravity{typeof(semi_euler), typeof(semi_gravity),
+                                   typeof(parameters), typeof(cache)}(semi_euler,
+                                                                      semi_gravity,
+                                                                      parameters, cache)
 end
 
-
 # TODO: AD, add appropriate method for remake
 
-
 function Base.show(io::IO, semi::SemidiscretizationEulerGravity)
-  @nospecialize semi # reduce precompilation time
-
-  print(io, "SemidiscretizationEulerGravity using")
-  print(io,       semi.semi_euler)
-  print(io, ", ", semi.semi_gravity)
-  print(io, ", ", semi.parameters)
-  print(io, ", cache(")
-  for (idx,key) in enumerate(keys(semi.cache))
-    idx > 1 && print(io, " ")
-    print(io, key)
-  end
-  print(io, "))")
+    @nospecialize semi # reduce precompilation time
+
+    print(io, "SemidiscretizationEulerGravity using")
+    print(io, semi.semi_euler)
+    print(io, ", ", semi.semi_gravity)
+    print(io, ", ", semi.parameters)
+    print(io, ", cache(")
+    for (idx, key) in enumerate(keys(semi.cache))
+        idx > 1 && print(io, " ")
+        print(io, key)
+    end
+    print(io, "))")
 end
 
 function Base.show(io::IO, mime::MIME"text/plain", semi::SemidiscretizationEulerGravity)
-  @nospecialize semi # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, semi)
-  else
-    summary_header(io, "SemidiscretizationEulerGravity")
-    summary_line(io, "semidiscretization Euler", semi.semi_euler |> typeof |> nameof)
-    show(increment_indent(io), mime, semi.semi_euler)
-    summary_line(io, "semidiscretization gravity", semi.semi_gravity |> typeof |> nameof)
-    show(increment_indent(io), mime, semi.semi_gravity)
-    summary_line(io, "parameters", semi.parameters |> typeof |> nameof)
-    show(increment_indent(io), mime, semi.parameters)
-    summary_footer(io)
-  end
+    @nospecialize semi # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, semi)
+    else
+        summary_header(io, "SemidiscretizationEulerGravity")
+        summary_line(io, "semidiscretization Euler",
+                     semi.semi_euler |> typeof |> nameof)
+        show(increment_indent(io), mime, semi.semi_euler)
+        summary_line(io, "semidiscretization gravity",
+                     semi.semi_gravity |> typeof |> nameof)
+        show(increment_indent(io), mime, semi.semi_gravity)
+        summary_line(io, "parameters", semi.parameters |> typeof |> nameof)
+        show(increment_indent(io), mime, semi.parameters)
+        summary_footer(io)
+    end
 end
 
-
 # The compressible Euler semidiscretization is considered to be the main semidiscretization.
 # The hyperbolic diffusion equations part is only used internally to update the gravitational
 # potential during an rhs! evaluation of the flow solver.
 @inline function mesh_equations_solver_cache(semi::SemidiscretizationEulerGravity)
-  mesh_equations_solver_cache(semi.semi_euler)
+    mesh_equations_solver_cache(semi.semi_euler)
 end
 
 @inline Base.ndims(semi::SemidiscretizationEulerGravity) = ndims(semi.semi_euler)
 
 @inline Base.real(semi::SemidiscretizationEulerGravity) = real(semi.semi_euler)
 
-
 # computes the coefficients of the initial condition
 @inline function compute_coefficients(t, semi::SemidiscretizationEulerGravity)
-  compute_coefficients!(semi.cache.u_ode, t, semi.semi_gravity)
-  compute_coefficients(t, semi.semi_euler)
+    compute_coefficients!(semi.cache.u_ode, t, semi.semi_gravity)
+    compute_coefficients(t, semi.semi_euler)
 end
 
 # computes the coefficients of the initial condition and stores the Euler part in `u_ode`
 @inline function compute_coefficients!(u_ode, t, semi::SemidiscretizationEulerGravity)
-  compute_coefficients!(semi.cache.u_ode, t, semi.semi_gravity)
-  compute_coefficients!(u_ode, t, semi.semi_euler)
+    compute_coefficients!(semi.cache.u_ode, t, semi.semi_gravity)
+    compute_coefficients!(u_ode, t, semi.semi_euler)
 end
 
-
-@inline function calc_error_norms(func, u, t, analyzer, semi::SemidiscretizationEulerGravity, cache_analysis)
-  calc_error_norms(func, u, t, analyzer, semi.semi_euler, cache_analysis)
+@inline function calc_error_norms(func, u, t, analyzer,
+                                  semi::SemidiscretizationEulerGravity, cache_analysis)
+    calc_error_norms(func, u, t, analyzer, semi.semi_euler, cache_analysis)
 end
 
-
 function rhs!(du_ode, u_ode, semi::SemidiscretizationEulerGravity, t)
-  @unpack semi_euler, semi_gravity, cache = semi
-
-  u_euler   = wrap_array(u_ode , semi_euler)
-  du_euler  = wrap_array(du_ode, semi_euler)
-  u_gravity = wrap_array(cache.u_ode, semi_gravity)
-
-  time_start = time_ns()
-
-  # standard semidiscretization of the compressible Euler equations
-  @trixi_timeit timer() "Euler solver" rhs!(du_ode, u_ode, semi_euler, t)
-
-  # compute gravitational potential and forces
-  @trixi_timeit timer() "gravity solver" update_gravity!(semi, u_ode)
-
-  # add gravitational source source_terms to the Euler part
-  if ndims(semi_euler) == 1
-    @views @. du_euler[2, .., :] -= u_euler[1, .., :] * u_gravity[2, .., :]
-    @views @. du_euler[3, .., :] -= u_euler[2, .., :] * u_gravity[2, .., :]
-  elseif ndims(semi_euler) == 2
-    @views @. du_euler[2, .., :] -=  u_euler[1, .., :] * u_gravity[2, .., :]
-    @views @. du_euler[3, .., :] -=  u_euler[1, .., :] * u_gravity[3, .., :]
-    @views @. du_euler[4, .., :] -= (u_euler[2, .., :] * u_gravity[2, .., :] +
-                                     u_euler[3, .., :] * u_gravity[3, .., :])
-  elseif ndims(semi_euler) == 3
-    @views @. du_euler[2, .., :] -=  u_euler[1, .., :] * u_gravity[2, .., :]
-    @views @. du_euler[3, .., :] -=  u_euler[1, .., :] * u_gravity[3, .., :]
-    @views @. du_euler[4, .., :] -=  u_euler[1, .., :] * u_gravity[4, .., :]
-    @views @. du_euler[5, .., :] -= (u_euler[2, .., :] * u_gravity[2, .., :] +
-                                     u_euler[3, .., :] * u_gravity[3, .., :] +
-                                     u_euler[4, .., :] * u_gravity[4, .., :])
-  else
-    error("Number of dimensions $(ndims(semi_euler)) not supported.")
-  end
-
-  runtime = time_ns() - time_start
-  put!(semi.performance_counter, runtime)
-
-  return nothing
-end
+    @unpack semi_euler, semi_gravity, cache = semi
 
+    u_euler = wrap_array(u_ode, semi_euler)
+    du_euler = wrap_array(du_ode, semi_euler)
+    u_gravity = wrap_array(cache.u_ode, semi_gravity)
 
-# TODO: Taal refactor, add some callbacks or so within the gravity update to allow investigating/optimizing it
-function update_gravity!(semi::SemidiscretizationEulerGravity, u_ode)
-  @unpack semi_euler, semi_gravity, parameters, gravity_counter, cache = semi
-
-  # Can be changed by AMR
-  resize!(cache.du_ode,     length(cache.u_ode))
-  resize!(cache.u_tmp1_ode, length(cache.u_ode))
-  resize!(cache.u_tmp2_ode, length(cache.u_ode))
-
-  u_euler    = wrap_array(u_ode,        semi_euler)
-  u_gravity  = wrap_array(cache.u_ode,  semi_gravity)
-  du_gravity = wrap_array(cache.du_ode, semi_gravity)
-
-  # set up main loop
-  finalstep = false
-  @unpack n_iterations_max, cfl, resid_tol, timestep_gravity = parameters
-  iter = 0
-  t = zero(real(semi_gravity.solver))
-
-  # iterate gravity solver until convergence or maximum number of iterations are reached
-  @unpack equations = semi_gravity
-  while !finalstep
-    dt = @trixi_timeit timer() "calculate dt" cfl * max_dt(u_gravity, t, semi_gravity.mesh,
-                                                           have_constant_speed(equations), equations,
-                                                           semi_gravity.solver, semi_gravity.cache)
-
-    # evolve solution by one pseudo-time step
     time_start = time_ns()
-    timestep_gravity(cache, u_euler, t, dt, parameters, semi_gravity)
-    runtime = time_ns() - time_start
-    put!(gravity_counter, runtime)
 
-    # update iteration counter
-    iter += 1
-    t += dt
-
-    # check if we reached the maximum number of iterations
-    if n_iterations_max > 0 && iter >= n_iterations_max
-      @warn "Max iterations reached: Gravity solver failed to converge!" residual=maximum(abs, @views du_gravity[1, .., :]) t=t dt=dt
-      finalstep = true
+    # standard semidiscretization of the compressible Euler equations
+    @trixi_timeit timer() "Euler solver" rhs!(du_ode, u_ode, semi_euler, t)
+
+    # compute gravitational potential and forces
+    @trixi_timeit timer() "gravity solver" update_gravity!(semi, u_ode)
+
+    # add gravitational source source_terms to the Euler part
+    if ndims(semi_euler) == 1
+        @views @. du_euler[2, .., :] -= u_euler[1, .., :] * u_gravity[2, .., :]
+        @views @. du_euler[3, .., :] -= u_euler[2, .., :] * u_gravity[2, .., :]
+    elseif ndims(semi_euler) == 2
+        @views @. du_euler[2, .., :] -= u_euler[1, .., :] * u_gravity[2, .., :]
+        @views @. du_euler[3, .., :] -= u_euler[1, .., :] * u_gravity[3, .., :]
+        @views @. du_euler[4, .., :] -= (u_euler[2, .., :] * u_gravity[2, .., :] +
+                                         u_euler[3, .., :] * u_gravity[3, .., :])
+    elseif ndims(semi_euler) == 3
+        @views @. du_euler[2, .., :] -= u_euler[1, .., :] * u_gravity[2, .., :]
+        @views @. du_euler[3, .., :] -= u_euler[1, .., :] * u_gravity[3, .., :]
+        @views @. du_euler[4, .., :] -= u_euler[1, .., :] * u_gravity[4, .., :]
+        @views @. du_euler[5, .., :] -= (u_euler[2, .., :] * u_gravity[2, .., :] +
+                                         u_euler[3, .., :] * u_gravity[3, .., :] +
+                                         u_euler[4, .., :] * u_gravity[4, .., :])
+    else
+        error("Number of dimensions $(ndims(semi_euler)) not supported.")
     end
 
-    # this is an absolute tolerance check
-    if maximum(abs, @views du_gravity[1, .., :]) <= resid_tol
-      finalstep = true
-    end
-  end
+    runtime = time_ns() - time_start
+    put!(semi.performance_counter, runtime)
 
-  return nothing
+    return nothing
 end
 
+# TODO: Taal refactor, add some callbacks or so within the gravity update to allow investigating/optimizing it
+function update_gravity!(semi::SemidiscretizationEulerGravity, u_ode)
+    @unpack semi_euler, semi_gravity, parameters, gravity_counter, cache = semi
+
+    # Can be changed by AMR
+    resize!(cache.du_ode, length(cache.u_ode))
+    resize!(cache.u_tmp1_ode, length(cache.u_ode))
+    resize!(cache.u_tmp2_ode, length(cache.u_ode))
+
+    u_euler = wrap_array(u_ode, semi_euler)
+    u_gravity = wrap_array(cache.u_ode, semi_gravity)
+    du_gravity = wrap_array(cache.du_ode, semi_gravity)
+
+    # set up main loop
+    finalstep = false
+    @unpack n_iterations_max, cfl, resid_tol, timestep_gravity = parameters
+    iter = 0
+    t = zero(real(semi_gravity.solver))
+
+    # iterate gravity solver until convergence or maximum number of iterations are reached
+    @unpack equations = semi_gravity
+    while !finalstep
+        dt = @trixi_timeit timer() "calculate dt" begin
+            cfl * max_dt(u_gravity, t, semi_gravity.mesh,
+                   have_constant_speed(equations), equations,
+                   semi_gravity.solver, semi_gravity.cache)
+        end
+
+        # evolve solution by one pseudo-time step
+        time_start = time_ns()
+        timestep_gravity(cache, u_euler, t, dt, parameters, semi_gravity)
+        runtime = time_ns() - time_start
+        put!(gravity_counter, runtime)
+
+        # update iteration counter
+        iter += 1
+        t += dt
+
+        # check if we reached the maximum number of iterations
+        if n_iterations_max > 0 && iter >= n_iterations_max
+            @warn "Max iterations reached: Gravity solver failed to converge!" residual=maximum(abs,
+                                                                                                @views du_gravity[1,
+                                                                                                                  ..,
+                                                                                                                  :]) t=t dt=dt
+            finalstep = true
+        end
+
+        # this is an absolute tolerance check
+        if maximum(abs, @views du_gravity[1, .., :]) <= resid_tol
+            finalstep = true
+        end
+    end
+
+    return nothing
+end
 
 # Integrate gravity solver for 2N-type low-storage schemes
 function timestep_gravity_2N!(cache, u_euler, t, dt, gravity_parameters, semi_gravity,
                               a, b, c)
-  G    = gravity_parameters.gravitational_constant
-  rho0 = gravity_parameters.background_density
-  grav_scale = -4.0*pi*G
-
-  @unpack u_ode, du_ode, u_tmp1_ode = cache
-  u_tmp1_ode .= zero(eltype(u_tmp1_ode))
-  du_gravity = wrap_array(du_ode, semi_gravity)
-  for stage in eachindex(c)
-    t_stage = t + dt * c[stage]
-
-    # rhs! has the source term for the harmonic problem
-    # We don't need a `@trixi_timeit timer() "rhs!"` here since that's already
-    # included in the `rhs!` call.
-    rhs!(du_ode, u_ode, semi_gravity, t_stage)
-
-    # Source term: Jeans instability OR coupling convergence test OR blast wave
-    # put in gravity source term proportional to Euler density
-    # OBS! subtract off the background density ρ_0 (spatial mean value)
-    @views @. du_gravity[1, .., :] += grav_scale * (u_euler[1, .., :] - rho0)
-
-    a_stage = a[stage]
-    b_stage_dt = b[stage] * dt
-    @trixi_timeit timer() "Runge-Kutta step" begin
-      @threaded for idx in eachindex(u_ode)
-        u_tmp1_ode[idx] = du_ode[idx] - u_tmp1_ode[idx] * a_stage
-        u_ode[idx] += u_tmp1_ode[idx] * b_stage_dt
-      end
+    G = gravity_parameters.gravitational_constant
+    rho0 = gravity_parameters.background_density
+    grav_scale = -4.0 * pi * G
+
+    @unpack u_ode, du_ode, u_tmp1_ode = cache
+    u_tmp1_ode .= zero(eltype(u_tmp1_ode))
+    du_gravity = wrap_array(du_ode, semi_gravity)
+    for stage in eachindex(c)
+        t_stage = t + dt * c[stage]
+
+        # rhs! has the source term for the harmonic problem
+        # We don't need a `@trixi_timeit timer() "rhs!"` here since that's already
+        # included in the `rhs!` call.
+        rhs!(du_ode, u_ode, semi_gravity, t_stage)
+
+        # Source term: Jeans instability OR coupling convergence test OR blast wave
+        # put in gravity source term proportional to Euler density
+        # OBS! subtract off the background density ρ_0 (spatial mean value)
+        @views @. du_gravity[1, .., :] += grav_scale * (u_euler[1, .., :] - rho0)
+
+        a_stage = a[stage]
+        b_stage_dt = b[stage] * dt
+        @trixi_timeit timer() "Runge-Kutta step" begin
+            @threaded for idx in eachindex(u_ode)
+                u_tmp1_ode[idx] = du_ode[idx] - u_tmp1_ode[idx] * a_stage
+                u_ode[idx] += u_tmp1_ode[idx] * b_stage_dt
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-function timestep_gravity_carpenter_kennedy_erk54_2N!(cache, u_euler, t, dt, gravity_parameters, semi_gravity)
-  # Coefficients for Carpenter's 5-stage 4th-order low-storage Runge-Kutta method
-  a = SVector(0.0, 567301805773.0 / 1357537059087.0,2404267990393.0 / 2016746695238.0,
-              3550918686646.0 / 2091501179385.0, 1275806237668.0 / 842570457699.0)
-  b = SVector(1432997174477.0 / 9575080441755.0, 5161836677717.0 / 13612068292357.0,
-              1720146321549.0 / 2090206949498.0, 3134564353537.0 / 4481467310338.0,
-              2277821191437.0 / 14882151754819.0)
-  c = SVector(0.0, 1432997174477.0 / 9575080441755.0, 2526269341429.0 / 6820363962896.0,
-              2006345519317.0 / 3224310063776.0, 2802321613138.0 / 2924317926251.0)
-
-  timestep_gravity_2N!(cache, u_euler, t, dt, gravity_parameters, semi_gravity, a, b, c)
+function timestep_gravity_carpenter_kennedy_erk54_2N!(cache, u_euler, t, dt,
+                                                      gravity_parameters, semi_gravity)
+    # Coefficients for Carpenter's 5-stage 4th-order low-storage Runge-Kutta method
+    a = SVector(0.0, 567301805773.0 / 1357537059087.0,
+                2404267990393.0 / 2016746695238.0,
+                3550918686646.0 / 2091501179385.0, 1275806237668.0 / 842570457699.0)
+    b = SVector(1432997174477.0 / 9575080441755.0, 5161836677717.0 / 13612068292357.0,
+                1720146321549.0 / 2090206949498.0, 3134564353537.0 / 4481467310338.0,
+                2277821191437.0 / 14882151754819.0)
+    c = SVector(0.0, 1432997174477.0 / 9575080441755.0,
+                2526269341429.0 / 6820363962896.0,
+                2006345519317.0 / 3224310063776.0, 2802321613138.0 / 2924317926251.0)
+
+    timestep_gravity_2N!(cache, u_euler, t, dt, gravity_parameters, semi_gravity, a, b,
+                         c)
 end
 
-
 # Integrate gravity solver for 3S*-type low-storage schemes
-function timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity,
+function timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters,
+                                  semi_gravity,
                                   gamma1, gamma2, gamma3, beta, delta, c)
-  G    = gravity_parameters.gravitational_constant
-  rho0 = gravity_parameters.background_density
-  grav_scale = -4 * G * pi
-
-  @unpack u_ode, du_ode, u_tmp1_ode, u_tmp2_ode = cache
-  u_tmp1_ode .= zero(eltype(u_tmp1_ode))
-  u_tmp2_ode .= u_ode
-  du_gravity = wrap_array(du_ode, semi_gravity)
-  for stage in eachindex(c)
-    t_stage = t + dt * c[stage]
-
-    # rhs! has the source term for the harmonic problem
-    # We don't need a `@trixi_timeit timer() "rhs!"` here since that's already
-    # included in the `rhs!` call.
-    rhs!(du_ode, u_ode, semi_gravity, t_stage)
-
-    # Source term: Jeans instability OR coupling convergence test OR blast wave
-    # put in gravity source term proportional to Euler density
-    # OBS! subtract off the background density ρ_0 around which the Jeans instability is perturbed
-    @views @. du_gravity[1, .., :] += grav_scale * (u_euler[1, .., :] - rho0)
-
-    delta_stage   = delta[stage]
-    gamma1_stage  = gamma1[stage]
-    gamma2_stage  = gamma2[stage]
-    gamma3_stage  = gamma3[stage]
-    beta_stage_dt = beta[stage] * dt
-    @trixi_timeit timer() "Runge-Kutta step" begin
-      @threaded for idx in eachindex(u_ode)
-        u_tmp1_ode[idx] += delta_stage * u_ode[idx]
-        u_ode[idx]       = (gamma1_stage * u_ode[idx] +
-                            gamma2_stage * u_tmp1_ode[idx] +
-                            gamma3_stage * u_tmp2_ode[idx] +
-                            beta_stage_dt * du_ode[idx])
-      end
+    G = gravity_parameters.gravitational_constant
+    rho0 = gravity_parameters.background_density
+    grav_scale = -4 * G * pi
+
+    @unpack u_ode, du_ode, u_tmp1_ode, u_tmp2_ode = cache
+    u_tmp1_ode .= zero(eltype(u_tmp1_ode))
+    u_tmp2_ode .= u_ode
+    du_gravity = wrap_array(du_ode, semi_gravity)
+    for stage in eachindex(c)
+        t_stage = t + dt * c[stage]
+
+        # rhs! has the source term for the harmonic problem
+        # We don't need a `@trixi_timeit timer() "rhs!"` here since that's already
+        # included in the `rhs!` call.
+        rhs!(du_ode, u_ode, semi_gravity, t_stage)
+
+        # Source term: Jeans instability OR coupling convergence test OR blast wave
+        # put in gravity source term proportional to Euler density
+        # OBS! subtract off the background density ρ_0 around which the Jeans instability is perturbed
+        @views @. du_gravity[1, .., :] += grav_scale * (u_euler[1, .., :] - rho0)
+
+        delta_stage = delta[stage]
+        gamma1_stage = gamma1[stage]
+        gamma2_stage = gamma2[stage]
+        gamma3_stage = gamma3[stage]
+        beta_stage_dt = beta[stage] * dt
+        @trixi_timeit timer() "Runge-Kutta step" begin
+            @threaded for idx in eachindex(u_ode)
+                u_tmp1_ode[idx] += delta_stage * u_ode[idx]
+                u_ode[idx] = (gamma1_stage * u_ode[idx] +
+                              gamma2_stage * u_tmp1_ode[idx] +
+                              gamma3_stage * u_tmp2_ode[idx] +
+                              beta_stage_dt * du_ode[idx])
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-function timestep_gravity_erk51_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity)
-  # New 3Sstar coefficients optimized for polynomials of degree polydeg=3
-  # and examples/parameters_hypdiff_lax_friedrichs.toml
-  # 5 stages, order 1
-  gamma1 = SVector(0.0000000000000000E+00, 5.2910412316555866E-01, 2.8433964362349406E-01, -1.4467571130907027E+00, 7.5592215948661057E-02)
-  gamma2 = SVector(1.0000000000000000E+00, 2.6366970460864109E-01, 3.7423646095836322E-01,  7.8786901832431289E-01, 3.7754129043053775E-01)
-  gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00,  8.0043329115077388E-01, 1.3550099149374278E-01)
-  beta   = SVector(1.9189497208340553E-01, 5.4506406707700059E-02, 1.2103893164085415E-01,  6.8582252490550921E-01, 8.7914657211972225E-01)
-  delta  = SVector(1.0000000000000000E+00, 7.8593091509463076E-01, 1.2639038717454840E-01,  1.7726945920209813E-01, 0.0000000000000000E+00)
-  c      = SVector(0.0000000000000000E+00, 1.9189497208340553E-01, 1.9580448818599061E-01,  2.4241635859769023E-01, 5.0728347557552977E-01)
-
-  timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity,
-                           gamma1, gamma2, gamma3, beta, delta, c)
+function timestep_gravity_erk51_3Sstar!(cache, u_euler, t, dt, gravity_parameters,
+                                        semi_gravity)
+    # New 3Sstar coefficients optimized for polynomials of degree polydeg=3
+    # and examples/parameters_hypdiff_lax_friedrichs.toml
+    # 5 stages, order 1
+    gamma1 = SVector(0.0000000000000000E+00, 5.2910412316555866E-01,
+                     2.8433964362349406E-01, -1.4467571130907027E+00,
+                     7.5592215948661057E-02)
+    gamma2 = SVector(1.0000000000000000E+00, 2.6366970460864109E-01,
+                     3.7423646095836322E-01, 7.8786901832431289E-01,
+                     3.7754129043053775E-01)
+    gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00,
+                     0.0000000000000000E+00, 8.0043329115077388E-01,
+                     1.3550099149374278E-01)
+    beta = SVector(1.9189497208340553E-01, 5.4506406707700059E-02,
+                   1.2103893164085415E-01, 6.8582252490550921E-01,
+                   8.7914657211972225E-01)
+    delta = SVector(1.0000000000000000E+00, 7.8593091509463076E-01,
+                    1.2639038717454840E-01, 1.7726945920209813E-01,
+                    0.0000000000000000E+00)
+    c = SVector(0.0000000000000000E+00, 1.9189497208340553E-01, 1.9580448818599061E-01,
+                2.4241635859769023E-01, 5.0728347557552977E-01)
+
+    timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity,
+                             gamma1, gamma2, gamma3, beta, delta, c)
 end
 
-function timestep_gravity_erk52_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity)
-  # New 3Sstar coefficients optimized for polynomials of degree polydeg=3
-  # and examples/parameters_hypdiff_lax_friedrichs.toml
-  # 5 stages, order 2
-  gamma1 = SVector(0.0000000000000000E+00, 5.2656474556752575E-01,  1.0385212774098265E+00, 3.6859755007388034E-01, -6.3350615190506088E-01)
-  gamma2 = SVector(1.0000000000000000E+00, 4.1892580153419307E-01, -2.7595818152587825E-02, 9.1271323651988631E-02,  6.8495995159465062E-01)
-  gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00,  0.0000000000000000E+00, 4.1301005663300466E-01, -5.4537881202277507E-03)
-  beta   = SVector(4.5158640252832094E-01, 7.5974836561844006E-01,  3.7561630338850771E-01, 2.9356700007428856E-02,  2.5205285143494666E-01)
-  delta  = SVector(1.0000000000000000E+00, 1.3011720142005145E-01,  2.6579275844515687E-01, 9.9687218193685878E-01,  0.0000000000000000E+00)
-  c      = SVector(0.0000000000000000E+00, 4.5158640252832094E-01,  1.0221535725056414E+00, 1.4280257701954349E+00,  7.1581334196229851E-01)
-
-  timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity,
-                           gamma1, gamma2, gamma3, beta, delta, c)
+function timestep_gravity_erk52_3Sstar!(cache, u_euler, t, dt, gravity_parameters,
+                                        semi_gravity)
+    # New 3Sstar coefficients optimized for polynomials of degree polydeg=3
+    # and examples/parameters_hypdiff_lax_friedrichs.toml
+    # 5 stages, order 2
+    gamma1 = SVector(0.0000000000000000E+00, 5.2656474556752575E-01,
+                     1.0385212774098265E+00, 3.6859755007388034E-01,
+                     -6.3350615190506088E-01)
+    gamma2 = SVector(1.0000000000000000E+00, 4.1892580153419307E-01,
+                     -2.7595818152587825E-02, 9.1271323651988631E-02,
+                     6.8495995159465062E-01)
+    gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00,
+                     0.0000000000000000E+00, 4.1301005663300466E-01,
+                     -5.4537881202277507E-03)
+    beta = SVector(4.5158640252832094E-01, 7.5974836561844006E-01,
+                   3.7561630338850771E-01, 2.9356700007428856E-02,
+                   2.5205285143494666E-01)
+    delta = SVector(1.0000000000000000E+00, 1.3011720142005145E-01,
+                    2.6579275844515687E-01, 9.9687218193685878E-01,
+                    0.0000000000000000E+00)
+    c = SVector(0.0000000000000000E+00, 4.5158640252832094E-01, 1.0221535725056414E+00,
+                1.4280257701954349E+00, 7.1581334196229851E-01)
+
+    timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity,
+                             gamma1, gamma2, gamma3, beta, delta, c)
 end
 
-function timestep_gravity_erk53_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity)
-  # New 3Sstar coefficients optimized for polynomials of degree polydeg=3
-  # and examples/parameters_hypdiff_lax_friedrichs.toml
-  # 5 stages, order 3
-  gamma1 = SVector(0.0000000000000000E+00,  6.9362208054011210E-01, 9.1364483229179472E-01,  1.3129305757628569E+00, -1.4615811339132949E+00)
-  gamma2 = SVector(1.0000000000000000E+00,  1.3224582239681788E+00, 2.4213162353103135E-01, -3.8532017293685838E-01,  1.5603355704723714E+00)
-  gamma3 = SVector(0.0000000000000000E+00,  0.0000000000000000E+00, 0.0000000000000000E+00,  3.8306787039991996E-01, -3.5683121201711010E-01)
-  beta   = SVector(8.4476964977404881E-02,  3.0834660698015803E-01, 3.2131664733089232E-01,  2.8783574345390539E-01,  8.2199204703236073E-01)
-  delta  = SVector(1.0000000000000000E+00, -7.6832695815481578E-01, 1.2497251501714818E-01,  1.4496404749796306E+00,  0.0000000000000000E+00)
-  c      = SVector(0.0000000000000000E+00,  8.4476964977404881E-02, 2.8110631488732202E-01,  5.7093842145029405E-01,  7.2999896418559662E-01)
-
-  timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity,
-                           gamma1, gamma2, gamma3, beta, delta, c)
+function timestep_gravity_erk53_3Sstar!(cache, u_euler, t, dt, gravity_parameters,
+                                        semi_gravity)
+    # New 3Sstar coefficients optimized for polynomials of degree polydeg=3
+    # and examples/parameters_hypdiff_lax_friedrichs.toml
+    # 5 stages, order 3
+    gamma1 = SVector(0.0000000000000000E+00, 6.9362208054011210E-01,
+                     9.1364483229179472E-01, 1.3129305757628569E+00,
+                     -1.4615811339132949E+00)
+    gamma2 = SVector(1.0000000000000000E+00, 1.3224582239681788E+00,
+                     2.4213162353103135E-01, -3.8532017293685838E-01,
+                     1.5603355704723714E+00)
+    gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00,
+                     0.0000000000000000E+00, 3.8306787039991996E-01,
+                     -3.5683121201711010E-01)
+    beta = SVector(8.4476964977404881E-02, 3.0834660698015803E-01,
+                   3.2131664733089232E-01, 2.8783574345390539E-01,
+                   8.2199204703236073E-01)
+    delta = SVector(1.0000000000000000E+00, -7.6832695815481578E-01,
+                    1.2497251501714818E-01, 1.4496404749796306E+00,
+                    0.0000000000000000E+00)
+    c = SVector(0.0000000000000000E+00, 8.4476964977404881E-02, 2.8110631488732202E-01,
+                5.7093842145029405E-01, 7.2999896418559662E-01)
+
+    timestep_gravity_3Sstar!(cache, u_euler, t, dt, gravity_parameters, semi_gravity,
+                             gamma1, gamma2, gamma3, beta, delta, c)
 end
 
-
 # TODO: Taal decide, where should specific parts like these be?
 @inline function save_solution_file(u_ode, t, dt, iter,
-                                    semi::SemidiscretizationEulerGravity, solution_callback,
-                                    element_variables=Dict{Symbol,Any}())
-
-  u_euler = wrap_array_native(u_ode, semi.semi_euler)
-  filename_euler = save_solution_file(u_euler, t, dt, iter,
-                                      mesh_equations_solver_cache(semi.semi_euler)...,
-                                      solution_callback, element_variables, system="euler")
-
-  u_gravity = wrap_array_native(semi.cache.u_ode, semi.semi_gravity)
-  filename_gravity = save_solution_file(u_gravity, t, dt, iter,
-                                        mesh_equations_solver_cache(semi.semi_gravity)...,
-                                        solution_callback, element_variables, system="gravity")
-
-  return filename_euler, filename_gravity
+                                    semi::SemidiscretizationEulerGravity,
+                                    solution_callback,
+                                    element_variables = Dict{Symbol, Any}())
+    u_euler = wrap_array_native(u_ode, semi.semi_euler)
+    filename_euler = save_solution_file(u_euler, t, dt, iter,
+                                        mesh_equations_solver_cache(semi.semi_euler)...,
+                                        solution_callback, element_variables,
+                                        system = "euler")
+
+    u_gravity = wrap_array_native(semi.cache.u_ode, semi.semi_gravity)
+    filename_gravity = save_solution_file(u_gravity, t, dt, iter,
+                                          mesh_equations_solver_cache(semi.semi_gravity)...,
+                                          solution_callback, element_variables,
+                                          system = "gravity")
+
+    return filename_euler, filename_gravity
 end
 
-
 @inline function (amr_callback::AMRCallback)(u_ode,
                                              semi::SemidiscretizationEulerGravity,
                                              t, iter; kwargs...)
-  passive_args = ((semi.cache.u_ode, mesh_equations_solver_cache(semi.semi_gravity)...),)
-  amr_callback(u_ode, mesh_equations_solver_cache(semi.semi_euler)..., semi, t, iter;
-               kwargs..., passive_args=passive_args)
+    passive_args = ((semi.cache.u_ode,
+                     mesh_equations_solver_cache(semi.semi_gravity)...),)
+    amr_callback(u_ode, mesh_equations_solver_cache(semi.semi_euler)..., semi, t, iter;
+                 kwargs..., passive_args = passive_args)
 end
-
-
 end # @muladd
diff --git a/src/semidiscretization/semidiscretization_hyperbolic.jl b/src/semidiscretization/semidiscretization_hyperbolic.jl
index 7e93f2a7f64..50b2c21c14e 100644
--- a/src/semidiscretization/semidiscretization_hyperbolic.jl
+++ b/src/semidiscretization/semidiscretization_hyperbolic.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     SemidiscretizationHyperbolic
@@ -11,33 +11,42 @@
 A struct containing everything needed to describe a spatial semidiscretization
 of a hyperbolic conservation law.
 """
-struct SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, BoundaryConditions,
-                                    SourceTerms, Solver, Cache} <: AbstractSemidiscretization
-
-  mesh::Mesh
-  equations::Equations
-
-  # This guy is a bit messy since we abuse it as some kind of "exact solution"
-  # although this doesn't really exist...
-  initial_condition::InitialCondition
-
-  boundary_conditions::BoundaryConditions
-  source_terms::SourceTerms
-  solver::Solver
-  cache::Cache
-  performance_counter::PerformanceCounter
-
-  function SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, BoundaryConditions, SourceTerms, Solver, Cache}(
-      mesh::Mesh, equations::Equations,
-      initial_condition::InitialCondition, boundary_conditions::BoundaryConditions,
-      source_terms::SourceTerms,
-      solver::Solver, cache::Cache) where {Mesh, Equations, InitialCondition, BoundaryConditions, SourceTerms, Solver, Cache}
-    @assert ndims(mesh) == ndims(equations)
-
-    performance_counter = PerformanceCounter()
-
-    new(mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache, performance_counter)
-  end
+struct SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition,
+                                    BoundaryConditions,
+                                    SourceTerms, Solver, Cache} <:
+       AbstractSemidiscretization
+    mesh::Mesh
+    equations::Equations
+
+    # This guy is a bit messy since we abuse it as some kind of "exact solution"
+    # although this doesn't really exist...
+    initial_condition::InitialCondition
+
+    boundary_conditions::BoundaryConditions
+    source_terms::SourceTerms
+    solver::Solver
+    cache::Cache
+    performance_counter::PerformanceCounter
+
+    function SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition,
+                                          BoundaryConditions, SourceTerms, Solver, Cache
+                                          }(mesh::Mesh, equations::Equations,
+                                            initial_condition::InitialCondition,
+                                            boundary_conditions::BoundaryConditions,
+                                            source_terms::SourceTerms,
+                                            solver::Solver,
+                                            cache::Cache) where {Mesh, Equations,
+                                                                 InitialCondition,
+                                                                 BoundaryConditions,
+                                                                 SourceTerms, Solver,
+                                                                 Cache}
+        @assert ndims(mesh) == ndims(equations)
+
+        performance_counter = PerformanceCounter()
+
+        new(mesh, equations, initial_condition, boundary_conditions, source_terms,
+            solver, cache, performance_counter)
+    end
 end
 
 """
@@ -51,208 +60,253 @@ end
 Construct a semidiscretization of a hyperbolic PDE.
 """
 function SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver;
-                                      source_terms=nothing,
-                                      boundary_conditions=boundary_condition_periodic,
+                                      source_terms = nothing,
+                                      boundary_conditions = boundary_condition_periodic,
                                       # `RealT` is used as real type for node locations etc.
                                       # while `uEltype` is used as element type of solutions etc.
-                                      RealT=real(solver), uEltype=RealT,
-                                      initial_cache=NamedTuple())
-
-  cache = (; create_cache(mesh, equations, solver, RealT, uEltype)..., initial_cache...)
-  _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver, cache)
-
-  SemidiscretizationHyperbolic{typeof(mesh), typeof(equations), typeof(initial_condition), typeof(_boundary_conditions), typeof(source_terms), typeof(solver), typeof(cache)}(
-    mesh, equations, initial_condition, _boundary_conditions, source_terms, solver, cache)
+                                      RealT = real(solver), uEltype = RealT,
+                                      initial_cache = NamedTuple())
+    cache = (; create_cache(mesh, equations, solver, RealT, uEltype)...,
+             initial_cache...)
+    _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver,
+                                                      cache)
+
+    SemidiscretizationHyperbolic{typeof(mesh), typeof(equations),
+                                 typeof(initial_condition),
+                                 typeof(_boundary_conditions), typeof(source_terms),
+                                 typeof(solver), typeof(cache)}(mesh, equations,
+                                                                initial_condition,
+                                                                _boundary_conditions,
+                                                                source_terms, solver,
+                                                                cache)
 end
 
-
 # Create a new semidiscretization but change some parameters compared to the input.
 # `Base.similar` follows a related concept but would require us to `copy` the `mesh`,
 # which would impact the performance. Instead, `SciMLBase.remake` has exactly the
 # semantics we want to use here. In particular, it allows us to re-use mutable parts,
 # e.g. `remake(semi).mesh === semi.mesh`.
-function remake(semi::SemidiscretizationHyperbolic; uEltype=real(semi.solver),
-                                                    mesh=semi.mesh,
-                                                    equations=semi.equations,
-                                                    initial_condition=semi.initial_condition,
-                                                    solver=semi.solver,
-                                                    source_terms=semi.source_terms,
-                                                    boundary_conditions=semi.boundary_conditions
-                                                    )
-  # TODO: Which parts do we want to `remake`? At least the solver needs some
-  #       special care if shock-capturing volume integrals are used (because of
-  #       the indicators and their own caches...).
-  SemidiscretizationHyperbolic(
-    mesh,  equations, initial_condition, solver; source_terms, boundary_conditions, uEltype)
+function remake(semi::SemidiscretizationHyperbolic; uEltype = real(semi.solver),
+                mesh = semi.mesh,
+                equations = semi.equations,
+                initial_condition = semi.initial_condition,
+                solver = semi.solver,
+                source_terms = semi.source_terms,
+                boundary_conditions = semi.boundary_conditions)
+    # TODO: Which parts do we want to `remake`? At least the solver needs some
+    #       special care if shock-capturing volume integrals are used (because of
+    #       the indicators and their own caches...).
+    SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver;
+                                 source_terms, boundary_conditions, uEltype)
 end
 
-
 # general fallback
-digest_boundary_conditions(boundary_conditions, mesh, solver, cache) = boundary_conditions
+function digest_boundary_conditions(boundary_conditions, mesh, solver, cache)
+    boundary_conditions
+end
 
 # general fallback
-digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic,
-                           mesh, solver, cache) = boundary_conditions
+function digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic,
+                                    mesh, solver, cache)
+    boundary_conditions
+end
 
 # resolve ambiguities with definitions below
-digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic,
-                           mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver, cache) = boundary_conditions
+function digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic,
+                                    mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver,
+                                    cache)
+    boundary_conditions
+end
 
-digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic,
-                           mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver, cache) = boundary_conditions
+function digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic,
+                                    mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver,
+                                    cache)
+    boundary_conditions
+end
 
-digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic,
-                           mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver, cache) = boundary_conditions
+function digest_boundary_conditions(boundary_conditions::BoundaryConditionPeriodic,
+                                    mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver,
+                                    cache)
+    boundary_conditions
+end
 
 # allow passing a single BC that get converted into a tuple of BCs
 # on (mapped) hypercube domains
 function digest_boundary_conditions(boundary_conditions,
-                                    mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver, cache)
-  (; x_neg=boundary_conditions, x_pos=boundary_conditions)
+                                    mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver,
+                                    cache)
+    (; x_neg = boundary_conditions, x_pos = boundary_conditions)
 end
 
 function digest_boundary_conditions(boundary_conditions,
-                                    mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver, cache)
-  (; x_neg=boundary_conditions, x_pos=boundary_conditions,
-     y_neg=boundary_conditions, y_pos=boundary_conditions)
+                                    mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver,
+                                    cache)
+    (; x_neg = boundary_conditions, x_pos = boundary_conditions,
+     y_neg = boundary_conditions, y_pos = boundary_conditions)
 end
 
 function digest_boundary_conditions(boundary_conditions,
-                                    mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver, cache)
-  (; x_neg=boundary_conditions, x_pos=boundary_conditions,
-     y_neg=boundary_conditions, y_pos=boundary_conditions,
-     z_neg=boundary_conditions, z_pos=boundary_conditions)
+                                    mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver,
+                                    cache)
+    (; x_neg = boundary_conditions, x_pos = boundary_conditions,
+     y_neg = boundary_conditions, y_pos = boundary_conditions,
+     z_neg = boundary_conditions, z_pos = boundary_conditions)
 end
 
 # allow passing a tuple of BCs that get converted into a named tuple to make it
 # self-documenting on (mapped) hypercube domains
 function digest_boundary_conditions(boundary_conditions::NTuple{2, Any},
-                                    mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver, cache)
-  (; x_neg=boundary_conditions[1], x_pos=boundary_conditions[2])
+                                    mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver,
+                                    cache)
+    (; x_neg = boundary_conditions[1], x_pos = boundary_conditions[2])
 end
 
 function digest_boundary_conditions(boundary_conditions::NTuple{4, Any},
-                                    mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver, cache)
-  (; x_neg=boundary_conditions[1], x_pos=boundary_conditions[2],
-     y_neg=boundary_conditions[3], y_pos=boundary_conditions[4])
+                                    mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver,
+                                    cache)
+    (; x_neg = boundary_conditions[1], x_pos = boundary_conditions[2],
+     y_neg = boundary_conditions[3], y_pos = boundary_conditions[4])
 end
 
 function digest_boundary_conditions(boundary_conditions::NTuple{6, Any},
-                                    mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver, cache)
-  (; x_neg=boundary_conditions[1], x_pos=boundary_conditions[2],
-     y_neg=boundary_conditions[3], y_pos=boundary_conditions[4],
-     z_neg=boundary_conditions[5], z_pos=boundary_conditions[6])
+                                    mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver,
+                                    cache)
+    (; x_neg = boundary_conditions[1], x_pos = boundary_conditions[2],
+     y_neg = boundary_conditions[3], y_pos = boundary_conditions[4],
+     z_neg = boundary_conditions[5], z_pos = boundary_conditions[6])
 end
 
 # allow passing named tuples of BCs constructed in an arbitrary order
 # on (mapped) hypercube domains
-function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys,ValueTypes},
-                                    mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver, cache) where {Keys, ValueTypes<:NTuple{2,Any}}
-  @unpack x_neg, x_pos = boundary_conditions
-  (; x_neg, x_pos)
+function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys, ValueTypes},
+                                    mesh::Union{TreeMesh{1}, StructuredMesh{1}}, solver,
+                                    cache) where {Keys, ValueTypes <: NTuple{2, Any}}
+    @unpack x_neg, x_pos = boundary_conditions
+    (; x_neg, x_pos)
 end
 
-function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys,ValueTypes},
-                                    mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver, cache) where {Keys, ValueTypes<:NTuple{4,Any}}
-  @unpack x_neg, x_pos, y_neg, y_pos = boundary_conditions
-  (; x_neg, x_pos, y_neg, y_pos)
+function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys, ValueTypes},
+                                    mesh::Union{TreeMesh{2}, StructuredMesh{2}}, solver,
+                                    cache) where {Keys, ValueTypes <: NTuple{4, Any}}
+    @unpack x_neg, x_pos, y_neg, y_pos = boundary_conditions
+    (; x_neg, x_pos, y_neg, y_pos)
 end
 
-function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys,ValueTypes},
-                                    mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver, cache) where {Keys, ValueTypes<:NTuple{6,Any}}
-  @unpack x_neg, x_pos, y_neg, y_pos, z_neg, z_pos = boundary_conditions
-  (; x_neg, x_pos, y_neg, y_pos, z_neg, z_pos)
+function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys, ValueTypes},
+                                    mesh::Union{TreeMesh{3}, StructuredMesh{3}}, solver,
+                                    cache) where {Keys, ValueTypes <: NTuple{6, Any}}
+    @unpack x_neg, x_pos, y_neg, y_pos, z_neg, z_pos = boundary_conditions
+    (; x_neg, x_pos, y_neg, y_pos, z_neg, z_pos)
 end
 
 # sort the boundary conditions from a dictionary and into tuples
 function digest_boundary_conditions(boundary_conditions::Dict, mesh, solver, cache)
-  UnstructuredSortedBoundaryTypes(boundary_conditions, cache)
+    UnstructuredSortedBoundaryTypes(boundary_conditions, cache)
 end
 
-function digest_boundary_conditions(boundary_conditions::AbstractArray, mesh, solver, cache)
-  throw(ArgumentError("Please use a (named) tuple instead of an (abstract) array to supply multiple boundary conditions (to improve performance)."))
+function digest_boundary_conditions(boundary_conditions::AbstractArray, mesh, solver,
+                                    cache)
+    throw(ArgumentError("Please use a (named) tuple instead of an (abstract) array to supply multiple boundary conditions (to improve performance)."))
 end
 
-
 function Base.show(io::IO, semi::SemidiscretizationHyperbolic)
-  @nospecialize semi # reduce precompilation time
-
-  print(io, "SemidiscretizationHyperbolic(")
-  print(io,       semi.mesh)
-  print(io, ", ", semi.equations)
-  print(io, ", ", semi.initial_condition)
-  print(io, ", ", semi.boundary_conditions)
-  print(io, ", ", semi.source_terms)
-  print(io, ", ", semi.solver)
-  print(io, ", cache(")
-  for (idx,key) in enumerate(keys(semi.cache))
-    idx > 1 && print(io, " ")
-    print(io, key)
-  end
-  print(io, "))")
+    @nospecialize semi # reduce precompilation time
+
+    print(io, "SemidiscretizationHyperbolic(")
+    print(io, semi.mesh)
+    print(io, ", ", semi.equations)
+    print(io, ", ", semi.initial_condition)
+    print(io, ", ", semi.boundary_conditions)
+    print(io, ", ", semi.source_terms)
+    print(io, ", ", semi.solver)
+    print(io, ", cache(")
+    for (idx, key) in enumerate(keys(semi.cache))
+        idx > 1 && print(io, " ")
+        print(io, key)
+    end
+    print(io, "))")
 end
 
 function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationHyperbolic)
-  @nospecialize semi # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, semi)
-  else
-    summary_header(io, "SemidiscretizationHyperbolic")
-    summary_line(io, "#spatial dimensions", ndims(semi.equations))
-    summary_line(io, "mesh", semi.mesh)
-    summary_line(io, "equations", semi.equations |> typeof |> nameof)
-    summary_line(io, "initial condition", semi.initial_condition)
-
-    print_boundary_conditions(io, semi)
-
-    summary_line(io, "source terms", semi.source_terms)
-    summary_line(io, "solver", semi.solver |> typeof |> nameof)
-    summary_line(io, "total #DOFs", ndofs(semi))
-    summary_footer(io)
-  end
+    @nospecialize semi # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, semi)
+    else
+        summary_header(io, "SemidiscretizationHyperbolic")
+        summary_line(io, "#spatial dimensions", ndims(semi.equations))
+        summary_line(io, "mesh", semi.mesh)
+        summary_line(io, "equations", semi.equations |> typeof |> nameof)
+        summary_line(io, "initial condition", semi.initial_condition)
+
+        print_boundary_conditions(io, semi)
+
+        summary_line(io, "source terms", semi.source_terms)
+        summary_line(io, "solver", semi.solver |> typeof |> nameof)
+        summary_line(io, "total #DOFs", ndofs(semi))
+        summary_footer(io)
+    end
 end
 
 # type alias for dispatch in printing of boundary conditions
+#! format: off
 const SemiHypMeshBCSolver{Mesh, BoundaryConditions, Solver} =
-      SemidiscretizationHyperbolic{Mesh, Equations, InitialCondition, BoundaryConditions,
-                                   SourceTerms, Solver} where {Equations, InitialCondition, SourceTerms}
+        SemidiscretizationHyperbolic{Mesh,
+                                     Equations,
+                                     InitialCondition,
+                                     BoundaryConditions,
+                                     SourceTerms,
+                                     Solver} where {Equations,
+                                                    InitialCondition,
+                                                    SourceTerms}
+#! format: on
 
 # generic fallback: print the type of semi.boundary_condition.
-print_boundary_conditions(io, semi::SemiHypMeshBCSolver) = summary_line(io, "boundary conditions", typeof(semi.boundary_conditions))
-
-function print_boundary_conditions(io, semi::SemiHypMeshBCSolver{<:Any, <:UnstructuredSortedBoundaryTypes})
-  @unpack boundary_conditions = semi
-  @unpack boundary_dictionary = boundary_conditions
-  summary_line(io, "boundary conditions", length(boundary_dictionary))
-  for (boundary_name, boundary_condition) in boundary_dictionary
-    summary_line(increment_indent(io), boundary_name, typeof(boundary_condition))
-  end
+function print_boundary_conditions(io, semi::SemiHypMeshBCSolver)
+    summary_line(io, "boundary conditions", typeof(semi.boundary_conditions))
+end
+
+function print_boundary_conditions(io,
+                                   semi::SemiHypMeshBCSolver{<:Any,
+                                                             <:UnstructuredSortedBoundaryTypes
+                                                             })
+    @unpack boundary_conditions = semi
+    @unpack boundary_dictionary = boundary_conditions
+    summary_line(io, "boundary conditions", length(boundary_dictionary))
+    for (boundary_name, boundary_condition) in boundary_dictionary
+        summary_line(increment_indent(io), boundary_name, typeof(boundary_condition))
+    end
 end
 
 function print_boundary_conditions(io, semi::SemiHypMeshBCSolver{<:Any, <:NamedTuple})
-  @unpack boundary_conditions = semi
-  summary_line(io, "boundary conditions", length(boundary_conditions))
-  bc_names = keys(boundary_conditions)
-  for (i, bc_name) in enumerate(bc_names)
-    summary_line(increment_indent(io), String(bc_name), typeof(boundary_conditions[i]))
-  end
+    @unpack boundary_conditions = semi
+    summary_line(io, "boundary conditions", length(boundary_conditions))
+    bc_names = keys(boundary_conditions)
+    for (i, bc_name) in enumerate(bc_names)
+        summary_line(increment_indent(io), String(bc_name),
+                     typeof(boundary_conditions[i]))
+    end
 end
 
-function print_boundary_conditions(io, semi::SemiHypMeshBCSolver{<:Union{TreeMesh, StructuredMesh}, <:Union{Tuple,NamedTuple,AbstractArray}})
-  summary_line(io, "boundary conditions", 2*ndims(semi))
-  bcs = semi.boundary_conditions
-
-  summary_line(increment_indent(io), "negative x", bcs[1])
-  summary_line(increment_indent(io), "positive x", bcs[2])
-  if ndims(semi) > 1
-    summary_line(increment_indent(io), "negative y", bcs[3])
-    summary_line(increment_indent(io), "positive y", bcs[4])
-  end
-  if ndims(semi) > 2
-    summary_line(increment_indent(io), "negative z", bcs[5])
-    summary_line(increment_indent(io), "positive z", bcs[6])
-  end
+function print_boundary_conditions(io,
+                                   semi::SemiHypMeshBCSolver{
+                                                             <:Union{TreeMesh,
+                                                                     StructuredMesh},
+                                                             <:Union{Tuple, NamedTuple,
+                                                                     AbstractArray}})
+    summary_line(io, "boundary conditions", 2 * ndims(semi))
+    bcs = semi.boundary_conditions
+
+    summary_line(increment_indent(io), "negative x", bcs[1])
+    summary_line(increment_indent(io), "positive x", bcs[2])
+    if ndims(semi) > 1
+        summary_line(increment_indent(io), "negative y", bcs[3])
+        summary_line(increment_indent(io), "positive y", bcs[4])
+    end
+    if ndims(semi) > 2
+        summary_line(increment_indent(io), "negative z", bcs[5])
+        summary_line(increment_indent(io), "positive z", bcs[6])
+    end
 end
 
 @inline Base.ndims(semi::SemidiscretizationHyperbolic) = ndims(semi.mesh)
@@ -261,45 +315,42 @@ end
 
 @inline Base.real(semi::SemidiscretizationHyperbolic) = real(semi.solver)
 
-
 @inline function mesh_equations_solver_cache(semi::SemidiscretizationHyperbolic)
-  @unpack mesh, equations, solver, cache = semi
-  return mesh, equations, solver, cache
+    @unpack mesh, equations, solver, cache = semi
+    return mesh, equations, solver, cache
 end
 
+function calc_error_norms(func, u_ode, t, analyzer, semi::SemidiscretizationHyperbolic,
+                          cache_analysis)
+    @unpack mesh, equations, initial_condition, solver, cache = semi
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
 
-function calc_error_norms(func, u_ode, t, analyzer, semi::SemidiscretizationHyperbolic, cache_analysis)
-  @unpack mesh, equations, initial_condition, solver, cache = semi
-  u = wrap_array(u_ode, mesh, equations, solver, cache)
-
-  calc_error_norms(func, u, t, analyzer, mesh, equations, initial_condition, solver, cache, cache_analysis)
+    calc_error_norms(func, u, t, analyzer, mesh, equations, initial_condition, solver,
+                     cache, cache_analysis)
 end
 
-
 function compute_coefficients(t, semi::SemidiscretizationHyperbolic)
-  # Call `compute_coefficients` in `src/semidiscretization/semidiscretization.jl`
-  compute_coefficients(semi.initial_condition, t, semi)
+    # Call `compute_coefficients` in `src/semidiscretization/semidiscretization.jl`
+    compute_coefficients(semi.initial_condition, t, semi)
 end
 
 function compute_coefficients!(u_ode, t, semi::SemidiscretizationHyperbolic)
-  compute_coefficients!(u_ode, semi.initial_condition, t, semi)
+    compute_coefficients!(u_ode, semi.initial_condition, t, semi)
 end
 
-
 function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolic, t)
-  @unpack mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache = semi
+    @unpack mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache = semi
 
-  u  = wrap_array(u_ode,  mesh, equations, solver, cache)
-  du = wrap_array(du_ode, mesh, equations, solver, cache)
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
+    du = wrap_array(du_ode, mesh, equations, solver, cache)
 
-  # TODO: Taal decide, do we need to pass the mesh?
-  time_start = time_ns()
-  @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache)
-  runtime = time_ns() - time_start
-  put!(semi.performance_counter, runtime)
+    # TODO: Taal decide, do we need to pass the mesh?
+    time_start = time_ns()
+    @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, initial_condition,
+                                      boundary_conditions, source_terms, solver, cache)
+    runtime = time_ns() - time_start
+    put!(semi.performance_counter, runtime)
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl
index 9d85034c805..f54bc744164 100644
--- a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl
+++ b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     SemidiscretizationHyperbolicParabolic
@@ -11,46 +11,75 @@
 A struct containing everything needed to describe a spatial semidiscretization
 of a mixed hyperbolic-parabolic conservation law.
 """
-struct SemidiscretizationHyperbolicParabolic{Mesh, Equations, EquationsParabolic, InitialCondition,
-                                             BoundaryConditions, BoundaryConditionsParabolic,
-                                             SourceTerms, Solver, SolverParabolic, Cache, CacheParabolic} <: AbstractSemidiscretization
-
-  mesh::Mesh
-
-  equations::Equations
-  equations_parabolic::EquationsParabolic
-
-  # This guy is a bit messy since we abuse it as some kind of "exact solution"
-  # although this doesn't really exist...
-  initial_condition::InitialCondition
-
-  boundary_conditions::BoundaryConditions
-  boundary_conditions_parabolic::BoundaryConditionsParabolic
-
-  source_terms::SourceTerms
-
-  solver::Solver
-  solver_parabolic::SolverParabolic
-
-  cache::Cache
-  cache_parabolic::CacheParabolic
-
-  performance_counter::PerformanceCounterList{2}
-
-  function SemidiscretizationHyperbolicParabolic{Mesh, Equations, EquationsParabolic, InitialCondition, BoundaryConditions, BoundaryConditionsParabolic, SourceTerms, Solver, SolverParabolic, Cache, CacheParabolic}(
-      mesh::Mesh, equations::Equations, equations_parabolic::EquationsParabolic, initial_condition::InitialCondition,
-      boundary_conditions::BoundaryConditions, boundary_conditions_parabolic::BoundaryConditionsParabolic,
-      source_terms::SourceTerms, solver::Solver, solver_parabolic::SolverParabolic, cache::Cache, cache_parabolic::CacheParabolic) where {Mesh, Equations, EquationsParabolic, InitialCondition, BoundaryConditions, BoundaryConditionsParabolic, SourceTerms, Solver, SolverParabolic, Cache, CacheParabolic}
-    @assert ndims(mesh) == ndims(equations)
-
-    # Todo: assert nvariables(equations)==nvariables(equations_parabolic)
-
-    performance_counter = PerformanceCounterList{2}(false)
-
-    new(mesh, equations, equations_parabolic, initial_condition,
-        boundary_conditions, boundary_conditions_parabolic,
-        source_terms, solver, solver_parabolic, cache, cache_parabolic, performance_counter)
-  end
+struct SemidiscretizationHyperbolicParabolic{Mesh, Equations, EquationsParabolic,
+                                             InitialCondition,
+                                             BoundaryConditions,
+                                             BoundaryConditionsParabolic,
+                                             SourceTerms, Solver, SolverParabolic,
+                                             Cache, CacheParabolic} <:
+       AbstractSemidiscretization
+    mesh::Mesh
+
+    equations::Equations
+    equations_parabolic::EquationsParabolic
+
+    # This guy is a bit messy since we abuse it as some kind of "exact solution"
+    # although this doesn't really exist...
+    initial_condition::InitialCondition
+
+    boundary_conditions::BoundaryConditions
+    boundary_conditions_parabolic::BoundaryConditionsParabolic
+
+    source_terms::SourceTerms
+
+    solver::Solver
+    solver_parabolic::SolverParabolic
+
+    cache::Cache
+    cache_parabolic::CacheParabolic
+
+    performance_counter::PerformanceCounterList{2}
+
+    function SemidiscretizationHyperbolicParabolic{Mesh, Equations, EquationsParabolic,
+                                                   InitialCondition, BoundaryConditions,
+                                                   BoundaryConditionsParabolic,
+                                                   SourceTerms, Solver,
+                                                   SolverParabolic, Cache,
+                                                   CacheParabolic
+                                                   }(mesh::Mesh,
+                                                     equations::Equations,
+                                                     equations_parabolic::EquationsParabolic,
+                                                     initial_condition::InitialCondition,
+                                                     boundary_conditions::BoundaryConditions,
+                                                     boundary_conditions_parabolic::BoundaryConditionsParabolic,
+                                                     source_terms::SourceTerms,
+                                                     solver::Solver,
+                                                     solver_parabolic::SolverParabolic,
+                                                     cache::Cache,
+                                                     cache_parabolic::CacheParabolic) where {
+                                                                                             Mesh,
+                                                                                             Equations,
+                                                                                             EquationsParabolic,
+                                                                                             InitialCondition,
+                                                                                             BoundaryConditions,
+                                                                                             BoundaryConditionsParabolic,
+                                                                                             SourceTerms,
+                                                                                             Solver,
+                                                                                             SolverParabolic,
+                                                                                             Cache,
+                                                                                             CacheParabolic
+                                                                                             }
+        @assert ndims(mesh) == ndims(equations)
+
+        # Todo: assert nvariables(equations)==nvariables(equations_parabolic)
+
+        performance_counter = PerformanceCounterList{2}(false)
+
+        new(mesh, equations, equations_parabolic, initial_condition,
+            boundary_conditions, boundary_conditions_parabolic,
+            source_terms, solver, solver_parabolic, cache, cache_parabolic,
+            performance_counter)
+    end
 end
 
 """
@@ -66,150 +95,174 @@ Construct a semidiscretization of a hyperbolic-parabolic PDE.
 """
 function SemidiscretizationHyperbolicParabolic(mesh, equations::Tuple,
                                                initial_condition, solver;
-                                               solver_parabolic=default_parabolic_solver(),
-                                               source_terms=nothing,
-                                               boundary_conditions=(boundary_condition_periodic, boundary_condition_periodic),
+                                               solver_parabolic = default_parabolic_solver(),
+                                               source_terms = nothing,
+                                               boundary_conditions = (boundary_condition_periodic,
+                                                                      boundary_condition_periodic),
                                                # `RealT` is used as real type for node locations etc.
                                                # while `uEltype` is used as element type of solutions etc.
-                                               RealT=real(solver), uEltype=RealT,
-                                               initial_caches=(NamedTuple(), NamedTuple()))
-
-  equations_hyperbolic, equations_parabolic = equations
-  boundary_conditions_hyperbolic, boundary_conditions_parabolic = boundary_conditions
-  initial_hyperbolic_cache, initial_cache_parabolic = initial_caches
-
-  return SemidiscretizationHyperbolicParabolic(mesh, equations_hyperbolic, equations_parabolic,
-                                               initial_condition, solver; solver_parabolic, source_terms,
-                                               boundary_conditions=boundary_conditions_hyperbolic,
-                                               boundary_conditions_parabolic=boundary_conditions_parabolic,
-                                               RealT, uEltype, initial_cache=initial_hyperbolic_cache,
-                                               initial_cache_parabolic=initial_cache_parabolic)
+                                               RealT = real(solver), uEltype = RealT,
+                                               initial_caches = (NamedTuple(),
+                                                                 NamedTuple()))
+    equations_hyperbolic, equations_parabolic = equations
+    boundary_conditions_hyperbolic, boundary_conditions_parabolic = boundary_conditions
+    initial_hyperbolic_cache, initial_cache_parabolic = initial_caches
+
+    return SemidiscretizationHyperbolicParabolic(mesh, equations_hyperbolic,
+                                                 equations_parabolic,
+                                                 initial_condition, solver;
+                                                 solver_parabolic, source_terms,
+                                                 boundary_conditions = boundary_conditions_hyperbolic,
+                                                 boundary_conditions_parabolic = boundary_conditions_parabolic,
+                                                 RealT, uEltype,
+                                                 initial_cache = initial_hyperbolic_cache,
+                                                 initial_cache_parabolic = initial_cache_parabolic)
 end
 
 function SemidiscretizationHyperbolicParabolic(mesh, equations, equations_parabolic,
                                                initial_condition, solver;
-                                               solver_parabolic=default_parabolic_solver(),
-                                               source_terms=nothing,
-                                               boundary_conditions=boundary_condition_periodic,
-                                               boundary_conditions_parabolic=boundary_condition_periodic,
+                                               solver_parabolic = default_parabolic_solver(),
+                                               source_terms = nothing,
+                                               boundary_conditions = boundary_condition_periodic,
+                                               boundary_conditions_parabolic = boundary_condition_periodic,
                                                # `RealT` is used as real type for node locations etc.
                                                # while `uEltype` is used as element type of solutions etc.
-                                               RealT=real(solver), uEltype=RealT,
-                                               initial_cache=NamedTuple(),
-                                               initial_cache_parabolic=NamedTuple())
-
-  cache = (; create_cache(mesh, equations, solver, RealT, uEltype)..., initial_cache...)
-  _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver, cache)
-  _boundary_conditions_parabolic = digest_boundary_conditions(boundary_conditions_parabolic, mesh, solver, cache)
-
-  cache_parabolic = (; create_cache_parabolic(mesh, equations, equations_parabolic,
-                                              solver, solver_parabolic, RealT, uEltype)...,
-                                              initial_cache_parabolic...)
-
-  SemidiscretizationHyperbolicParabolic{typeof(mesh), typeof(equations), typeof(equations_parabolic),
-                                        typeof(initial_condition), typeof(_boundary_conditions), typeof(_boundary_conditions_parabolic),
-                                        typeof(source_terms), typeof(solver), typeof(solver_parabolic), typeof(cache), typeof(cache_parabolic)}(
-    mesh, equations, equations_parabolic, initial_condition,
-    _boundary_conditions, _boundary_conditions_parabolic, source_terms,
-    solver, solver_parabolic, cache, cache_parabolic)
+                                               RealT = real(solver), uEltype = RealT,
+                                               initial_cache = NamedTuple(),
+                                               initial_cache_parabolic = NamedTuple())
+    cache = (; create_cache(mesh, equations, solver, RealT, uEltype)...,
+             initial_cache...)
+    _boundary_conditions = digest_boundary_conditions(boundary_conditions, mesh, solver,
+                                                      cache)
+    _boundary_conditions_parabolic = digest_boundary_conditions(boundary_conditions_parabolic,
+                                                                mesh, solver, cache)
+
+    cache_parabolic = (;
+                       create_cache_parabolic(mesh, equations, equations_parabolic,
+                                              solver, solver_parabolic, RealT,
+                                              uEltype)...,
+                       initial_cache_parabolic...)
+
+    SemidiscretizationHyperbolicParabolic{typeof(mesh), typeof(equations),
+                                          typeof(equations_parabolic),
+                                          typeof(initial_condition),
+                                          typeof(_boundary_conditions),
+                                          typeof(_boundary_conditions_parabolic),
+                                          typeof(source_terms), typeof(solver),
+                                          typeof(solver_parabolic), typeof(cache),
+                                          typeof(cache_parabolic)}(mesh, equations,
+                                                                   equations_parabolic,
+                                                                   initial_condition,
+                                                                   _boundary_conditions,
+                                                                   _boundary_conditions_parabolic,
+                                                                   source_terms,
+                                                                   solver,
+                                                                   solver_parabolic,
+                                                                   cache,
+                                                                   cache_parabolic)
 end
 
-
 # Create a new semidiscretization but change some parameters compared to the input.
 # `Base.similar` follows a related concept but would require us to `copy` the `mesh`,
 # which would impact the performance. Instead, `SciMLBase.remake` has exactly the
 # semantics we want to use here. In particular, it allows us to re-use mutable parts,
 # e.g. `remake(semi).mesh === semi.mesh`.
-function remake(semi::SemidiscretizationHyperbolicParabolic; uEltype=real(semi.solver),
-                                                             mesh=semi.mesh,
-                                                             equations=semi.equations,
-                                                             equations_parabolic=semi.equations_parabolic,
-                                                             initial_condition=semi.initial_condition,
-                                                             solver=semi.solver,
-                                                             solver_parabolic=semi.solver_parabolic,
-                                                             source_terms=semi.source_terms,
-                                                             boundary_conditions=semi.boundary_conditions,
-                                                             boundary_conditions_parabolic=semi.boundary_conditions_parabolic
-                                                             )
-  # TODO: Which parts do we want to `remake`? At least the solver needs some
-  #       special care if shock-capturing volume integrals are used (because of
-  #       the indicators and their own caches...).
-  SemidiscretizationHyperbolicParabolic(
-    mesh, equations, equations_parabolic, initial_condition, solver; solver_parabolic, source_terms, boundary_conditions, boundary_conditions_parabolic, uEltype)
+function remake(semi::SemidiscretizationHyperbolicParabolic;
+                uEltype = real(semi.solver),
+                mesh = semi.mesh,
+                equations = semi.equations,
+                equations_parabolic = semi.equations_parabolic,
+                initial_condition = semi.initial_condition,
+                solver = semi.solver,
+                solver_parabolic = semi.solver_parabolic,
+                source_terms = semi.source_terms,
+                boundary_conditions = semi.boundary_conditions,
+                boundary_conditions_parabolic = semi.boundary_conditions_parabolic)
+    # TODO: Which parts do we want to `remake`? At least the solver needs some
+    #       special care if shock-capturing volume integrals are used (because of
+    #       the indicators and their own caches...).
+    SemidiscretizationHyperbolicParabolic(mesh, equations, equations_parabolic,
+                                          initial_condition, solver; solver_parabolic,
+                                          source_terms, boundary_conditions,
+                                          boundary_conditions_parabolic, uEltype)
 end
 
 function Base.show(io::IO, semi::SemidiscretizationHyperbolicParabolic)
-  @nospecialize semi # reduce precompilation time
-
-  print(io, "SemidiscretizationHyperbolicParabolic(")
-  print(io,       semi.mesh)
-  print(io, ", ", semi.equations)
-  print(io, ", ", semi.equations_parabolic)
-  print(io, ", ", semi.initial_condition)
-  print(io, ", ", semi.boundary_conditions)
-  print(io, ", ", semi.boundary_conditions_parabolic)
-  print(io, ", ", semi.source_terms)
-  print(io, ", ", semi.solver)
-  print(io, ", ", semi.solver_parabolic)
-  print(io, ", cache(")
-  for (idx,key) in enumerate(keys(semi.cache))
-    idx > 1 && print(io, " ")
-    print(io, key)
-  end
-  print(io, "))")
+    @nospecialize semi # reduce precompilation time
+
+    print(io, "SemidiscretizationHyperbolicParabolic(")
+    print(io, semi.mesh)
+    print(io, ", ", semi.equations)
+    print(io, ", ", semi.equations_parabolic)
+    print(io, ", ", semi.initial_condition)
+    print(io, ", ", semi.boundary_conditions)
+    print(io, ", ", semi.boundary_conditions_parabolic)
+    print(io, ", ", semi.source_terms)
+    print(io, ", ", semi.solver)
+    print(io, ", ", semi.solver_parabolic)
+    print(io, ", cache(")
+    for (idx, key) in enumerate(keys(semi.cache))
+        idx > 1 && print(io, " ")
+        print(io, key)
+    end
+    print(io, "))")
 end
 
-function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationHyperbolicParabolic)
-  @nospecialize semi # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, semi)
-  else
-    summary_header(io, "SemidiscretizationHyperbolicParabolic")
-    summary_line(io, "#spatial dimensions", ndims(semi.equations))
-    summary_line(io, "mesh", semi.mesh)
-    summary_line(io, "hyperbolic equations", semi.equations |> typeof |> nameof)
-    summary_line(io, "parabolic equations", semi.equations_parabolic |> typeof |> nameof)
-    summary_line(io, "initial condition", semi.initial_condition)
-
-    # print_boundary_conditions(io, semi)
-
-    summary_line(io, "source terms", semi.source_terms)
-    summary_line(io, "solver", semi.solver |> typeof |> nameof)
-    summary_line(io, "parabolic solver", semi.solver_parabolic |> typeof |> nameof)
-    summary_line(io, "total #DOFs", ndofs(semi))
-    summary_footer(io)
-  end
+function Base.show(io::IO, ::MIME"text/plain",
+                   semi::SemidiscretizationHyperbolicParabolic)
+    @nospecialize semi # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, semi)
+    else
+        summary_header(io, "SemidiscretizationHyperbolicParabolic")
+        summary_line(io, "#spatial dimensions", ndims(semi.equations))
+        summary_line(io, "mesh", semi.mesh)
+        summary_line(io, "hyperbolic equations", semi.equations |> typeof |> nameof)
+        summary_line(io, "parabolic equations",
+                     semi.equations_parabolic |> typeof |> nameof)
+        summary_line(io, "initial condition", semi.initial_condition)
+
+        # print_boundary_conditions(io, semi)
+
+        summary_line(io, "source terms", semi.source_terms)
+        summary_line(io, "solver", semi.solver |> typeof |> nameof)
+        summary_line(io, "parabolic solver", semi.solver_parabolic |> typeof |> nameof)
+        summary_line(io, "total #DOFs", ndofs(semi))
+        summary_footer(io)
+    end
 end
 
 @inline Base.ndims(semi::SemidiscretizationHyperbolicParabolic) = ndims(semi.mesh)
 
-@inline nvariables(semi::SemidiscretizationHyperbolicParabolic) = nvariables(semi.equations)
+@inline function nvariables(semi::SemidiscretizationHyperbolicParabolic)
+    nvariables(semi.equations)
+end
 
 @inline Base.real(semi::SemidiscretizationHyperbolicParabolic) = real(semi.solver)
 
 # retain dispatch on hyperbolic equations only
 @inline function mesh_equations_solver_cache(semi::SemidiscretizationHyperbolicParabolic)
-  @unpack mesh, equations, solver, cache = semi
-  return mesh, equations, solver, cache
+    @unpack mesh, equations, solver, cache = semi
+    return mesh, equations, solver, cache
 end
 
+function calc_error_norms(func, u_ode, t, analyzer,
+                          semi::SemidiscretizationHyperbolicParabolic, cache_analysis)
+    @unpack mesh, equations, initial_condition, solver, cache = semi
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
 
-function calc_error_norms(func, u_ode, t, analyzer, semi::SemidiscretizationHyperbolicParabolic, cache_analysis)
-  @unpack mesh, equations, initial_condition, solver, cache = semi
-  u = wrap_array(u_ode, mesh, equations, solver, cache)
-
-  calc_error_norms(func, u, t, analyzer, mesh, equations, initial_condition, solver, cache, cache_analysis)
+    calc_error_norms(func, u, t, analyzer, mesh, equations, initial_condition, solver,
+                     cache, cache_analysis)
 end
 
-
 function compute_coefficients(t, semi::SemidiscretizationHyperbolicParabolic)
-  # Call `compute_coefficients` in `src/semidiscretization/semidiscretization.jl`
-  compute_coefficients(semi.initial_condition, t, semi)
+    # Call `compute_coefficients` in `src/semidiscretization/semidiscretization.jl`
+    compute_coefficients(semi.initial_condition, t, semi)
 end
 
 function compute_coefficients!(u_ode, t, semi::SemidiscretizationHyperbolicParabolic)
-  compute_coefficients!(u_ode, semi.initial_condition, t, semi)
+    compute_coefficients!(u_ode, semi.initial_condition, t, semi)
 end
 
 """
@@ -222,49 +275,51 @@ will be used by default by the implicit part of IMEX methods from the
 SciML ecosystem.
 """
 function semidiscretize(semi::SemidiscretizationHyperbolicParabolic, tspan)
-  u0_ode = compute_coefficients(first(tspan), semi)
-  # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using
-  #       mpi_isparallel() && MPI.Barrier(mpi_comm())
-  #       See https://github.com/trixi-framework/Trixi.jl/issues/328
-  iip = true # is-inplace, i.e., we modify a vector when calling rhs_parabolic!, rhs!
-  # Note that the IMEX time integration methods of OrdinaryDiffEq.jl treat the
-  # first function implicitly and the second one explicitly. Thus, we pass the
-  # stiffer parabolic function first.
-  return SplitODEProblem{iip}(rhs_parabolic!, rhs!, u0_ode, tspan, semi)
+    u0_ode = compute_coefficients(first(tspan), semi)
+    # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using
+    #       mpi_isparallel() && MPI.Barrier(mpi_comm())
+    #       See https://github.com/trixi-framework/Trixi.jl/issues/328
+    iip = true # is-inplace, i.e., we modify a vector when calling rhs_parabolic!, rhs!
+    # Note that the IMEX time integration methods of OrdinaryDiffEq.jl treat the
+    # first function implicitly and the second one explicitly. Thus, we pass the
+    # stiffer parabolic function first.
+    return SplitODEProblem{iip}(rhs_parabolic!, rhs!, u0_ode, tspan, semi)
 end
 
 function rhs!(du_ode, u_ode, semi::SemidiscretizationHyperbolicParabolic, t)
-  @unpack mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache = semi
+    @unpack mesh, equations, initial_condition, boundary_conditions, source_terms, solver, cache = semi
 
-  u  = wrap_array(u_ode,  mesh, equations, solver, cache)
-  du = wrap_array(du_ode, mesh, equations, solver, cache)
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
+    du = wrap_array(du_ode, mesh, equations, solver, cache)
 
-  # TODO: Taal decide, do we need to pass the mesh?
-  time_start = time_ns()
-  @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, initial_condition,
-                                    boundary_conditions, source_terms, solver, cache)
-  runtime = time_ns() - time_start
-  put!(semi.performance_counter.counters[1], runtime)
+    # TODO: Taal decide, do we need to pass the mesh?
+    time_start = time_ns()
+    @trixi_timeit timer() "rhs!" rhs!(du, u, t, mesh, equations, initial_condition,
+                                      boundary_conditions, source_terms, solver, cache)
+    runtime = time_ns() - time_start
+    put!(semi.performance_counter.counters[1], runtime)
 
-  return nothing
+    return nothing
 end
 
 function rhs_parabolic!(du_ode, u_ode, semi::SemidiscretizationHyperbolicParabolic, t)
-  @unpack mesh, equations_parabolic, initial_condition, boundary_conditions_parabolic, source_terms, solver, solver_parabolic, cache, cache_parabolic = semi
-
-  u  = wrap_array(u_ode,  mesh, equations_parabolic, solver, cache_parabolic)
-  du = wrap_array(du_ode, mesh, equations_parabolic, solver, cache_parabolic)
-
-  # TODO: Taal decide, do we need to pass the mesh?
-  time_start = time_ns()
-  @trixi_timeit timer() "parabolic rhs!" rhs_parabolic!(du, u, t, mesh, equations_parabolic, initial_condition,
-                                                        boundary_conditions_parabolic, source_terms,
-                                                        solver, solver_parabolic, cache, cache_parabolic)
-  runtime = time_ns() - time_start
-  put!(semi.performance_counter.counters[2], runtime)
-
-  return nothing
+    @unpack mesh, equations_parabolic, initial_condition, boundary_conditions_parabolic, source_terms, solver, solver_parabolic, cache, cache_parabolic = semi
+
+    u = wrap_array(u_ode, mesh, equations_parabolic, solver, cache_parabolic)
+    du = wrap_array(du_ode, mesh, equations_parabolic, solver, cache_parabolic)
+
+    # TODO: Taal decide, do we need to pass the mesh?
+    time_start = time_ns()
+    @trixi_timeit timer() "parabolic rhs!" rhs_parabolic!(du, u, t, mesh,
+                                                          equations_parabolic,
+                                                          initial_condition,
+                                                          boundary_conditions_parabolic,
+                                                          source_terms,
+                                                          solver, solver_parabolic,
+                                                          cache, cache_parabolic)
+    runtime = time_ns() - time_start
+    put!(semi.performance_counter.counters[2], runtime)
+
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl
index fc6420791bb..838fa2d5819 100644
--- a/src/solvers/dg.jl
+++ b/src/solvers/dg.jl
@@ -3,13 +3,14 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 abstract type AbstractVolumeIntegral end
 
-get_element_variables!(element_variables, u, mesh, equations,
-                       volume_integral::AbstractVolumeIntegral, dg, cache) = nothing
-
+function get_element_variables!(element_variables, u, mesh, equations,
+                                volume_integral::AbstractVolumeIntegral, dg, cache)
+    nothing
+end
 
 """
     VolumeIntegralStrongForm()
@@ -18,7 +19,6 @@ The classical strong form volume integral type for FD/DG methods.
 """
 struct VolumeIntegralStrongForm <: AbstractVolumeIntegral end
 
-
 """
     VolumeIntegralWeakForm()
 
@@ -40,7 +40,6 @@ struct VolumeIntegralWeakForm <: AbstractVolumeIntegral end
 
 create_cache(mesh, equations, ::VolumeIntegralWeakForm, dg, uEltype) = NamedTuple()
 
-
 """
     VolumeIntegralFluxDifferencing(volume_flux)
 
@@ -67,23 +66,22 @@ the interface of numerical fluxes in Trixi.jl.
   [doi: 10.1016/j.jcp.2017.05.025](https://doi.org/10.1016/j.jcp.2017.05.025)
 """
 struct VolumeIntegralFluxDifferencing{VolumeFlux} <: AbstractVolumeIntegral
-  volume_flux::VolumeFlux
+    volume_flux::VolumeFlux
 end
 
 function Base.show(io::IO, ::MIME"text/plain", integral::VolumeIntegralFluxDifferencing)
-  @nospecialize integral # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, integral)
-  else
-    setup = [
-            "volume flux" => integral.volume_flux
-            ]
-    summary_box(io, "VolumeIntegralFluxDifferencing", setup)
-  end
+    @nospecialize integral # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, integral)
+    else
+        setup = [
+            "volume flux" => integral.volume_flux,
+        ]
+        summary_box(io, "VolumeIntegralFluxDifferencing", setup)
+    end
 end
 
-
 """
     VolumeIntegralShockCapturingHG(indicator; volume_flux_dg=flux_central,
                                               volume_flux_fv=flux_lax_friedrichs)
@@ -100,41 +98,45 @@ The amount of blending is determined by the `indicator`, e.g.,
   "A provably entropy stable subcell shock capturing approach for high order split form DG"
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
-struct VolumeIntegralShockCapturingHG{VolumeFluxDG, VolumeFluxFV, Indicator} <: AbstractVolumeIntegral
-  volume_flux_dg::VolumeFluxDG # symmetric, e.g. split-form or entropy-conservative
-  volume_flux_fv::VolumeFluxFV # non-symmetric in general, e.g. entropy-dissipative
-  indicator::Indicator
-end
-
-function VolumeIntegralShockCapturingHG(indicator; volume_flux_dg=flux_central,
-                                                   volume_flux_fv=flux_lax_friedrichs)
-  VolumeIntegralShockCapturingHG{typeof(volume_flux_dg), typeof(volume_flux_fv), typeof(indicator)}(
-    volume_flux_dg, volume_flux_fv, indicator)
-end
-
-function Base.show(io::IO, mime::MIME"text/plain", integral::VolumeIntegralShockCapturingHG)
-  @nospecialize integral # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, integral)
-  else
-    summary_header(io, "VolumeIntegralShockCapturingHG")
-    summary_line(io, "volume flux DG", integral.volume_flux_dg)
-    summary_line(io, "volume flux FV", integral.volume_flux_fv)
-    summary_line(io, "indicator", integral.indicator |> typeof |> nameof)
-    show(increment_indent(io), mime, integral.indicator)
-    summary_footer(io)
-  end
+struct VolumeIntegralShockCapturingHG{VolumeFluxDG, VolumeFluxFV, Indicator} <:
+       AbstractVolumeIntegral
+    volume_flux_dg::VolumeFluxDG # symmetric, e.g. split-form or entropy-conservative
+    volume_flux_fv::VolumeFluxFV # non-symmetric in general, e.g. entropy-dissipative
+    indicator::Indicator
+end
+
+function VolumeIntegralShockCapturingHG(indicator; volume_flux_dg = flux_central,
+                                        volume_flux_fv = flux_lax_friedrichs)
+    VolumeIntegralShockCapturingHG{typeof(volume_flux_dg), typeof(volume_flux_fv),
+                                   typeof(indicator)}(volume_flux_dg, volume_flux_fv,
+                                                      indicator)
+end
+
+function Base.show(io::IO, mime::MIME"text/plain",
+                   integral::VolumeIntegralShockCapturingHG)
+    @nospecialize integral # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, integral)
+    else
+        summary_header(io, "VolumeIntegralShockCapturingHG")
+        summary_line(io, "volume flux DG", integral.volume_flux_dg)
+        summary_line(io, "volume flux FV", integral.volume_flux_fv)
+        summary_line(io, "indicator", integral.indicator |> typeof |> nameof)
+        show(increment_indent(io), mime, integral.indicator)
+        summary_footer(io)
+    end
 end
 
 function get_element_variables!(element_variables, u, mesh, equations,
-                                volume_integral::VolumeIntegralShockCapturingHG, dg, cache)
-  # call the indicator to get up-to-date values for IO
-  volume_integral.indicator(u, mesh, equations, dg, cache)
-  get_element_variables!(element_variables, volume_integral.indicator, volume_integral)
+                                volume_integral::VolumeIntegralShockCapturingHG, dg,
+                                cache)
+    # call the indicator to get up-to-date values for IO
+    volume_integral.indicator(u, mesh, equations, dg, cache)
+    get_element_variables!(element_variables, volume_integral.indicator,
+                           volume_integral)
 end
 
-
 """
     VolumeIntegralPureLGLFiniteVolume(volume_flux_fv)
 
@@ -154,24 +156,24 @@ mesh (LGL = Legendre-Gauss-Lobatto).
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
 struct VolumeIntegralPureLGLFiniteVolume{VolumeFluxFV} <: AbstractVolumeIntegral
-  volume_flux_fv::VolumeFluxFV # non-symmetric in general, e.g. entropy-dissipative
+    volume_flux_fv::VolumeFluxFV # non-symmetric in general, e.g. entropy-dissipative
 end
 # TODO: Figure out if this can also be used for Gauss nodes, not just LGL, and adjust the name accordingly
 
-function Base.show(io::IO, ::MIME"text/plain", integral::VolumeIntegralPureLGLFiniteVolume)
-  @nospecialize integral # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, integral)
-  else
-    setup = [
-            "FV flux" => integral.volume_flux_fv
-            ]
-    summary_box(io, "VolumeIntegralPureLGLFiniteVolume", setup)
-  end
+function Base.show(io::IO, ::MIME"text/plain",
+                   integral::VolumeIntegralPureLGLFiniteVolume)
+    @nospecialize integral # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, integral)
+    else
+        setup = [
+            "FV flux" => integral.volume_flux_fv,
+        ]
+        summary_box(io, "VolumeIntegralPureLGLFiniteVolume", setup)
+    end
 end
 
-
 # TODO: FD. Should this definition live in a different file because it is
 # not strictly a DG method?
 """
@@ -195,23 +197,22 @@ See also [`splitting_steger_warming`](@ref), [`splitting_lax_friedrichs`](@ref),
     This is an experimental feature and may change in future releases.
 """
 struct VolumeIntegralUpwind{FluxSplitting} <: AbstractVolumeIntegral
-  splitting::FluxSplitting
+    splitting::FluxSplitting
 end
 
 function Base.show(io::IO, ::MIME"text/plain", integral::VolumeIntegralUpwind)
-  @nospecialize integral # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, integral)
-  else
-    setup = [
-            "flux splitting" => integral.splitting
-            ]
-    summary_box(io, "VolumeIntegralUpwind", setup)
-  end
+    @nospecialize integral # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, integral)
+    else
+        setup = [
+            "flux splitting" => integral.splitting,
+        ]
+        summary_box(io, "VolumeIntegralUpwind", setup)
+    end
 end
 
-
 abstract type AbstractSurfaceIntegral end
 
 """
@@ -234,25 +235,24 @@ See also [`VolumeIntegralWeakForm`](@ref).
   [doi: 10.1007/978-0-387-72067-8](https://doi.org/10.1007/978-0-387-72067-8)
 """
 struct SurfaceIntegralWeakForm{SurfaceFlux} <: AbstractSurfaceIntegral
-  surface_flux::SurfaceFlux
+    surface_flux::SurfaceFlux
 end
 
 SurfaceIntegralWeakForm() = SurfaceIntegralWeakForm(flux_central)
 
 function Base.show(io::IO, ::MIME"text/plain", integral::SurfaceIntegralWeakForm)
-  @nospecialize integral # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, integral)
-  else
-    setup = [
-            "surface flux" => integral.surface_flux
-            ]
-    summary_box(io, "SurfaceIntegralWeakForm", setup)
-  end
+    @nospecialize integral # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, integral)
+    else
+        setup = [
+            "surface flux" => integral.surface_flux,
+        ]
+        summary_box(io, "SurfaceIntegralWeakForm", setup)
+    end
 end
 
-
 """
     SurfaceIntegralStrongForm(surface_flux=flux_central)
 
@@ -261,25 +261,24 @@ The classical strong form surface integral type for FD/DG methods.
 See also [`VolumeIntegralStrongForm`](@ref).
 """
 struct SurfaceIntegralStrongForm{SurfaceFlux} <: AbstractSurfaceIntegral
-  surface_flux::SurfaceFlux
+    surface_flux::SurfaceFlux
 end
 
 SurfaceIntegralStrongForm() = SurfaceIntegralStrongForm(flux_central)
 
 function Base.show(io::IO, ::MIME"text/plain", integral::SurfaceIntegralStrongForm)
-  @nospecialize integral # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, integral)
-  else
-    setup = [
-            "surface flux" => integral.surface_flux
-            ]
-    summary_box(io, "SurfaceIntegralStrongForm", setup)
-  end
+    @nospecialize integral # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, integral)
+    else
+        setup = [
+            "surface flux" => integral.surface_flux,
+        ]
+        summary_box(io, "SurfaceIntegralStrongForm", setup)
+    end
 end
 
-
 # TODO: FD. Should this definition live in a different file because it is
 # not strictly a DG method?
 """
@@ -295,23 +294,22 @@ See also [`VolumeIntegralUpwind`](@ref).
     This is an experimental feature and may change in future releases.
 """
 struct SurfaceIntegralUpwind{FluxSplitting} <: AbstractSurfaceIntegral
-  splitting::FluxSplitting
+    splitting::FluxSplitting
 end
 
 function Base.show(io::IO, ::MIME"text/plain", integral::SurfaceIntegralUpwind)
-  @nospecialize integral # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, integral)
-  else
-    setup = [
-            "flux splitting" => integral.splitting
-            ]
-    summary_box(io, "SurfaceIntegralUpwind", setup)
-  end
+    @nospecialize integral # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, integral)
+    else
+        setup = [
+            "flux splitting" => integral.splitting,
+        ]
+        summary_box(io, "SurfaceIntegralUpwind", setup)
+    end
 end
 
-
 """
     DG(; basis, mortar, surface_integral, volume_integral)
 
@@ -320,55 +318,56 @@ If [`basis isa LobattoLegendreBasis`](@ref LobattoLegendreBasis),
 this creates a [`DGSEM`](@ref).
 """
 struct DG{Basis, Mortar, SurfaceIntegral, VolumeIntegral}
-  basis::Basis
-  mortar::Mortar
-  surface_integral::SurfaceIntegral
-  volume_integral::VolumeIntegral
+    basis::Basis
+    mortar::Mortar
+    surface_integral::SurfaceIntegral
+    volume_integral::VolumeIntegral
 end
 
 function Base.show(io::IO, dg::DG)
-  @nospecialize dg # reduce precompilation time
+    @nospecialize dg # reduce precompilation time
 
-  print(io, "DG{", real(dg), "}(")
-  print(io,       dg.basis)
-  print(io, ", ", dg.mortar)
-  print(io, ", ", dg.surface_integral)
-  print(io, ", ", dg.volume_integral)
-  print(io, ")")
+    print(io, "DG{", real(dg), "}(")
+    print(io, dg.basis)
+    print(io, ", ", dg.mortar)
+    print(io, ", ", dg.surface_integral)
+    print(io, ", ", dg.volume_integral)
+    print(io, ")")
 end
 
 function Base.show(io::IO, mime::MIME"text/plain", dg::DG)
-  @nospecialize dg # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, dg)
-  else
-    summary_header(io, "DG{" * string(real(dg)) * "}")
-    summary_line(io, "basis", dg.basis)
-    summary_line(io, "mortar", dg.mortar)
-    summary_line(io, "surface integral", dg.surface_integral |> typeof |> nameof)
-    show(increment_indent(io), mime, dg.surface_integral)
-    summary_line(io, "volume integral", dg.volume_integral |> typeof |> nameof)
-    if !(dg.volume_integral isa VolumeIntegralWeakForm)
-      show(increment_indent(io), mime, dg.volume_integral)
+    @nospecialize dg # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, dg)
+    else
+        summary_header(io, "DG{" * string(real(dg)) * "}")
+        summary_line(io, "basis", dg.basis)
+        summary_line(io, "mortar", dg.mortar)
+        summary_line(io, "surface integral", dg.surface_integral |> typeof |> nameof)
+        show(increment_indent(io), mime, dg.surface_integral)
+        summary_line(io, "volume integral", dg.volume_integral |> typeof |> nameof)
+        if !(dg.volume_integral isa VolumeIntegralWeakForm)
+            show(increment_indent(io), mime, dg.volume_integral)
+        end
+        summary_footer(io)
     end
-    summary_footer(io)
-  end
 end
 
 Base.summary(io::IO, dg::DG) = print(io, "DG(" * summary(dg.basis) * ")")
 
 @inline Base.real(dg::DG) = real(dg.basis)
 
-
 function get_element_variables!(element_variables, u, mesh, equations, dg::DG, cache)
-  get_element_variables!(element_variables, u, mesh, equations, dg.volume_integral, dg, cache)
+    get_element_variables!(element_variables, u, mesh, equations, dg.volume_integral,
+                           dg, cache)
 end
 
-
 const MeshesDGSEM = Union{TreeMesh, StructuredMesh, UnstructuredMesh2D, P4estMesh}
 
-@inline ndofs(mesh::MeshesDGSEM, dg::DG, cache) = nelements(cache.elements) * nnodes(dg)^ndims(mesh)
+@inline function ndofs(mesh::MeshesDGSEM, dg::DG, cache)
+    nelements(cache.elements) * nnodes(dg)^ndims(mesh)
+end
 
 # TODO: Taal performance, 1:nnodes(dg) vs. Base.OneTo(nnodes(dg)) vs. SOneTo(nnodes(dg)) for DGSEM
 """
@@ -379,12 +378,14 @@ for the nodes in `dg`.
 In particular, not the nodes themselves are returned.
 """
 @inline eachnode(dg::DG) = Base.OneTo(nnodes(dg))
-@inline nnodes(dg::DG)   = nnodes(dg.basis)
+@inline nnodes(dg::DG) = nnodes(dg.basis)
 
 # This is used in some more general analysis code and needs to dispatch on the
 # `mesh` for some combinations of mesh/solver.
 @inline nelements(mesh, dg::DG, cache) = nelements(dg, cache)
-@inline ndofsglobal(mesh, dg::DG, cache) = nelementsglobal(dg, cache) * nnodes(dg)^ndims(mesh)
+@inline function ndofsglobal(mesh, dg::DG, cache)
+    nelementsglobal(dg, cache) * nnodes(dg)^ndims(mesh)
+end
 
 """
     eachelement(dg::DG, cache)
@@ -393,7 +394,7 @@ Return an iterator over the indices that specify the location in relevant data s
 for the elements in `cache`.
 In particular, not the elements themselves are returned.
 """
-@inline eachelement(dg::DG, cache)   = Base.OneTo(nelements(dg, cache))
+@inline eachelement(dg::DG, cache) = Base.OneTo(nelements(dg, cache))
 
 """
     eachinterface(dg::DG, cache)
@@ -411,7 +412,7 @@ Return an iterator over the indices that specify the location in relevant data s
 for the boundaries in `cache`.
 In particular, not the boundaries themselves are returned.
 """
-@inline eachboundary(dg::DG, cache)  = Base.OneTo(nboundaries(dg, cache))
+@inline eachboundary(dg::DG, cache) = Base.OneTo(nboundaries(dg, cache))
 
 """
     eachmortar(dg::DG, cache)
@@ -420,7 +421,7 @@ Return an iterator over the indices that specify the location in relevant data s
 for the mortars in `cache`.
 In particular, not the mortars themselves are returned.
 """
-@inline eachmortar(dg::DG, cache)    = Base.OneTo(nmortars(dg, cache))
+@inline eachmortar(dg::DG, cache) = Base.OneTo(nmortars(dg, cache))
 
 """
     eachmpiinterface(dg::DG, cache)
@@ -440,15 +441,16 @@ In particular, not the mortars themselves are returned.
 """
 @inline eachmpimortar(dg::DG, cache) = Base.OneTo(nmpimortars(dg, cache))
 
-@inline nelements(dg::DG, cache)   = nelements(cache.elements)
-@inline nelementsglobal(dg::DG, cache) = mpi_isparallel() ? cache.mpi_cache.n_elements_global : nelements(dg, cache)
+@inline nelements(dg::DG, cache) = nelements(cache.elements)
+@inline function nelementsglobal(dg::DG, cache)
+    mpi_isparallel() ? cache.mpi_cache.n_elements_global : nelements(dg, cache)
+end
 @inline ninterfaces(dg::DG, cache) = ninterfaces(cache.interfaces)
 @inline nboundaries(dg::DG, cache) = nboundaries(cache.boundaries)
-@inline nmortars(dg::DG, cache)    = nmortars(cache.mortars)
+@inline nmortars(dg::DG, cache) = nmortars(cache.mortars)
 @inline nmpiinterfaces(dg::DG, cache) = nmpiinterfaces(cache.mpi_interfaces)
 @inline nmpimortars(dg::DG, cache) = nmpimortars(cache.mpi_mortars)
 
-
 # The following functions assume an array-of-structs memory layout
 # We would like to experiment with different memory layout choices
 # in the future, see
@@ -456,66 +458,64 @@ In particular, not the mortars themselves are returned.
 # - https://github.com/trixi-framework/Trixi.jl/issues/87
 # - https://github.com/trixi-framework/Trixi.jl/issues/86
 @inline function get_node_coords(x, equations, solver::DG, indices...)
-  SVector(ntuple(@inline(idx -> x[idx, indices...]), Val(ndims(equations))))
+    SVector(ntuple(@inline(idx->x[idx, indices...]), Val(ndims(equations))))
 end
 
 @inline function get_node_vars(u, equations, solver::DG, indices...)
-  # There is a cut-off at `n == 10` inside of the method
-  # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17
-  # in Julia `v1.5`, leading to type instabilities if
-  # more than ten variables are used. That's why we use
-  # `Val(...)` below.
-  # We use `@inline` to make sure that the `getindex` calls are
-  # really inlined, which might be the default choice of the Julia
-  # compiler for standard `Array`s but not necessarily for more
-  # advanced array types such as `PtrArray`s, cf.
-  # https://github.com/JuliaSIMD/VectorizationBase.jl/issues/55
-  SVector(ntuple(@inline(v -> u[v, indices...]), Val(nvariables(equations))))
+    # There is a cut-off at `n == 10` inside of the method
+    # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17
+    # in Julia `v1.5`, leading to type instabilities if
+    # more than ten variables are used. That's why we use
+    # `Val(...)` below.
+    # We use `@inline` to make sure that the `getindex` calls are
+    # really inlined, which might be the default choice of the Julia
+    # compiler for standard `Array`s but not necessarily for more
+    # advanced array types such as `PtrArray`s, cf.
+    # https://github.com/JuliaSIMD/VectorizationBase.jl/issues/55
+    SVector(ntuple(@inline(v->u[v, indices...]), Val(nvariables(equations))))
 end
 
 @inline function get_surface_node_vars(u, equations, solver::DG, indices...)
-  # There is a cut-off at `n == 10` inside of the method
-  # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17
-  # in Julia `v1.5`, leading to type instabilities if
-  # more than ten variables are used. That's why we use
-  # `Val(...)` below.
-  u_ll = SVector(ntuple(@inline(v -> u[1, v, indices...]), Val(nvariables(equations))))
-  u_rr = SVector(ntuple(@inline(v -> u[2, v, indices...]), Val(nvariables(equations))))
-  return u_ll, u_rr
+    # There is a cut-off at `n == 10` inside of the method
+    # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17
+    # in Julia `v1.5`, leading to type instabilities if
+    # more than ten variables are used. That's why we use
+    # `Val(...)` below.
+    u_ll = SVector(ntuple(@inline(v->u[1, v, indices...]), Val(nvariables(equations))))
+    u_rr = SVector(ntuple(@inline(v->u[2, v, indices...]), Val(nvariables(equations))))
+    return u_ll, u_rr
 end
 
 @inline function set_node_vars!(u, u_node, equations, solver::DG, indices...)
-  for v in eachvariable(equations)
-    u[v, indices...] = u_node[v]
-  end
-  return nothing
+    for v in eachvariable(equations)
+        u[v, indices...] = u_node[v]
+    end
+    return nothing
 end
 
 @inline function add_to_node_vars!(u, u_node, equations, solver::DG, indices...)
-  for v in eachvariable(equations)
-    u[v, indices...] += u_node[v]
-  end
-  return nothing
+    for v in eachvariable(equations)
+        u[v, indices...] += u_node[v]
+    end
+    return nothing
 end
 
 # Use this function instead of `add_to_node_vars` to speed up
 # multiply-and-add-to-node-vars operations
 # See https://github.com/trixi-framework/Trixi.jl/pull/643
-@inline function multiply_add_to_node_vars!(u, factor, u_node, equations, solver::DG, indices...)
-  for v in eachvariable(equations)
-    u[v, indices...] = u[v, indices...] + factor * u_node[v]
-  end
-  return nothing
+@inline function multiply_add_to_node_vars!(u, factor, u_node, equations, solver::DG,
+                                            indices...)
+    for v in eachvariable(equations)
+        u[v, indices...] = u[v, indices...] + factor * u_node[v]
+    end
+    return nothing
 end
 
-
 # Used for analyze_solution
 SolutionAnalyzer(dg::DG; kwargs...) = SolutionAnalyzer(dg.basis; kwargs...)
 
 AdaptorAMR(mesh, dg::DG) = AdaptorL2(dg.basis)
 
-
-
 # General structs for discretizations based on the basic principle of
 # DGSEM (discontinuous Galerkin spectral element method)
 include("dgsem/dgsem.jl")
@@ -526,125 +526,138 @@ include("dgsem/dgsem.jl")
 # functionality implemented for DGSEM.
 include("fdsbp_tree/fdsbp.jl")
 
-
-
 function allocate_coefficients(mesh::AbstractMesh, equations, dg::DG, cache)
-  # We must allocate a `Vector` in order to be able to `resize!` it (AMR).
-  # cf. wrap_array
-  zeros(eltype(cache.elements), nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
-end
-
-@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, dg::DGSEM, cache)
-  @boundscheck begin
-    @assert length(u_ode) == nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)
-  end
-  # We would like to use
-  #     reshape(u_ode, (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache)))
-  # but that results in
-  #     ERROR: LoadError: cannot resize array with shared data
-  # when we resize! `u_ode` during AMR.
-  #
-  # !!! danger "Segfaults"
-  #     Remember to `GC.@preserve` temporaries such as copies of `u_ode`
-  #     and other stuff that is only used indirectly via `wrap_array` afterwards!
-
-  # Currently, there are problems when AD is used with `PtrArray`s in broadcasts
-  # since LoopVectorization does not support `ForwardDiff.Dual`s. Hence, we use
-  # optimized `PtrArray`s whenever possible and fall back to plain `Array`s
-  # otherwise.
-  if LoopVectorization.check_args(u_ode)
-    # This version using `PtrArray`s from StrideArrays.jl is very fast and
-    # does not result in allocations.
+    # We must allocate a `Vector` in order to be able to `resize!` it (AMR).
+    # cf. wrap_array
+    zeros(eltype(cache.elements),
+          nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
+end
+
+@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations,
+                            dg::DGSEM, cache)
+    @boundscheck begin
+        @assert length(u_ode) ==
+                nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)
+    end
+    # We would like to use
+    #     reshape(u_ode, (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache)))
+    # but that results in
+    #     ERROR: LoadError: cannot resize array with shared data
+    # when we resize! `u_ode` during AMR.
     #
-    # !!! danger "Heisenbug"
-    #     Do not use this code when `@threaded` uses `Threads.@threads`. There is
-    #     a very strange Heisenbug that makes some parts very slow *sometimes*.
-    #     In fact, everything can be fast and fine for many cases but some parts
-    #     of the RHS evaluation can take *exactly* (!) five seconds randomly...
-    #     Hence, this version should only be used when `@threaded` is based on
-    #     `@batch` from Polyester.jl or something similar. Using Polyester.jl
-    #     is probably the best option since everything will be handed over to
-    #     Chris Elrod, one of the best performance software engineers for Julia.
-    PtrArray(pointer(u_ode),
-             (StaticInt(nvariables(equations)), ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))..., nelements(dg, cache)))
-            #  (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache)))
-  else
-    # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`.
-    unsafe_wrap(Array{eltype(u_ode), ndims(mesh)+2}, pointer(u_ode),
-                (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache)))
-  end
+    # !!! danger "Segfaults"
+    #     Remember to `GC.@preserve` temporaries such as copies of `u_ode`
+    #     and other stuff that is only used indirectly via `wrap_array` afterwards!
+
+    # Currently, there are problems when AD is used with `PtrArray`s in broadcasts
+    # since LoopVectorization does not support `ForwardDiff.Dual`s. Hence, we use
+    # optimized `PtrArray`s whenever possible and fall back to plain `Array`s
+    # otherwise.
+    if LoopVectorization.check_args(u_ode)
+        # This version using `PtrArray`s from StrideArrays.jl is very fast and
+        # does not result in allocations.
+        #
+        # !!! danger "Heisenbug"
+        #     Do not use this code when `@threaded` uses `Threads.@threads`. There is
+        #     a very strange Heisenbug that makes some parts very slow *sometimes*.
+        #     In fact, everything can be fast and fine for many cases but some parts
+        #     of the RHS evaluation can take *exactly* (!) five seconds randomly...
+        #     Hence, this version should only be used when `@threaded` is based on
+        #     `@batch` from Polyester.jl or something similar. Using Polyester.jl
+        #     is probably the best option since everything will be handed over to
+        #     Chris Elrod, one of the best performance software engineers for Julia.
+        PtrArray(pointer(u_ode),
+                 (StaticInt(nvariables(equations)),
+                  ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
+                  nelements(dg, cache)))
+        #  (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache)))
+    else
+        # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`.
+        unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode),
+                    (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))...,
+                     nelements(dg, cache)))
+    end
 end
 
 # Finite difference summation by parts (FDSBP) methods
-@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, dg::FDSBP, cache)
-  @boundscheck begin
-    @assert length(u_ode) == nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)
-  end
-  # See comments on the DGSEM version above
-  if LoopVectorization.check_args(u_ode)
-    # Here, we do not specialize on the number of nodes using `StaticInt` since
-    # - it will not be type stable (SBP operators just store it as a runtime value)
-    # - FD methods tend to use high node counts
-    PtrArray(pointer(u_ode),
-             (StaticInt(nvariables(equations)), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache)))
-  else
-    # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`.
-    unsafe_wrap(Array{eltype(u_ode), ndims(mesh)+2}, pointer(u_ode),
-                (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache)))
-  end
+@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations,
+                            dg::FDSBP, cache)
+    @boundscheck begin
+        @assert length(u_ode) ==
+                nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)
+    end
+    # See comments on the DGSEM version above
+    if LoopVectorization.check_args(u_ode)
+        # Here, we do not specialize on the number of nodes using `StaticInt` since
+        # - it will not be type stable (SBP operators just store it as a runtime value)
+        # - FD methods tend to use high node counts
+        PtrArray(pointer(u_ode),
+                 (StaticInt(nvariables(equations)),
+                  ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache)))
+    else
+        # The following version is reasonably fast and allows us to `resize!(u_ode, ...)`.
+        unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode),
+                    (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))...,
+                     nelements(dg, cache)))
+    end
 end
 
 # General fallback
-@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations, dg::DG, cache)
-  wrap_array_native(u_ode, mesh, equations, dg, cache)
+@inline function wrap_array(u_ode::AbstractVector, mesh::AbstractMesh, equations,
+                            dg::DG, cache)
+    wrap_array_native(u_ode, mesh, equations, dg, cache)
 end
 
 # Like `wrap_array`, but guarantees to return a plain `Array`, which can be better
 # for interfacing with external C libraries (MPI, HDF5, visualization),
 # writing solution files etc.
-@inline function wrap_array_native(u_ode::AbstractVector, mesh::AbstractMesh, equations, dg::DG, cache)
-  @boundscheck begin
-    @assert length(u_ode) == nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)
-  end
-  unsafe_wrap(Array{eltype(u_ode), ndims(mesh)+2}, pointer(u_ode),
-              (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))..., nelements(dg, cache)))
-end
-
-
-function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg::DG, cache)
-
-  @threaded for element in eachelement(dg, cache)
-    for i in eachnode(dg)
-      x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, element)
-      u_node = func(x_node, t, equations)
-      set_node_vars!(u, u_node, equations, dg, i, element)
+@inline function wrap_array_native(u_ode::AbstractVector, mesh::AbstractMesh, equations,
+                                   dg::DG, cache)
+    @boundscheck begin
+        @assert length(u_ode) ==
+                nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache)
+    end
+    unsafe_wrap(Array{eltype(u_ode), ndims(mesh) + 2}, pointer(u_ode),
+                (nvariables(equations), ntuple(_ -> nnodes(dg), ndims(mesh))...,
+                 nelements(dg, cache)))
+end
+
+function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg::DG,
+                               cache)
+    @threaded for element in eachelement(dg, cache)
+        for i in eachnode(dg)
+            x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i,
+                                     element)
+            u_node = func(x_node, t, equations)
+            set_node_vars!(u, u_node, equations, dg, i, element)
+        end
     end
-  end
 end
 
-function compute_coefficients!(u, func, t, mesh::AbstractMesh{2}, equations, dg::DG, cache)
-
-  @threaded for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, element)
-      u_node = func(x_node, t, equations)
-      set_node_vars!(u, u_node, equations, dg, i, j, element)
+function compute_coefficients!(u, func, t, mesh::AbstractMesh{2}, equations, dg::DG,
+                               cache)
+    @threaded for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i,
+                                     j, element)
+            u_node = func(x_node, t, equations)
+            set_node_vars!(u, u_node, equations, dg, i, j, element)
+        end
     end
-  end
 end
 
-function compute_coefficients!(u, func, t, mesh::AbstractMesh{3}, equations, dg::DG, cache)
-
-  @threaded for element in eachelement(dg, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, k, element)
-      u_node = func(x_node, t, equations)
-      set_node_vars!(u, u_node, equations, dg, i, j, k, element)
+function compute_coefficients!(u, func, t, mesh::AbstractMesh{3}, equations, dg::DG,
+                               cache)
+    @threaded for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i,
+                                     j, k, element)
+            u_node = func(x_node, t, equations)
+            set_node_vars!(u, u_node, equations, dg, i, j, k, element)
+        end
     end
-  end
 end
 
-
 # Discretizations specific to each mesh type of Trixi.jl
 # If some functionality is shared by multiple combinations of meshes/solvers,
 # it is defined in the directory of the most basic mesh and solver type.
@@ -657,6 +670,4 @@ include("dgsem_tree/dg.jl")
 include("dgsem_structured/dg.jl")
 include("dgsem_unstructured/dg.jl")
 include("dgsem_p4est/dg.jl")
-
-
 end # @muladd
diff --git a/src/solvers/dgmulti.jl b/src/solvers/dgmulti.jl
index 8ff27db0cd9..363d91b5a4c 100644
--- a/src/solvers/dgmulti.jl
+++ b/src/solvers/dgmulti.jl
@@ -14,4 +14,4 @@ include("dgmulti/flux_differencing_compressible_euler.jl")
 include("dgmulti/shock_capturing.jl")
 
 # parabolic terms for DGMulti solvers
-include("dgmulti/dg_parabolic.jl")
\ No newline at end of file
+include("dgmulti/dg_parabolic.jl")
diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl
index c9b7f5f021d..d51c7cabf9d 100644
--- a/src/solvers/dgmulti/dg.jl
+++ b/src/solvers/dgmulti/dg.jl
@@ -3,17 +3,21 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # out <- A*x
 mul_by!(A) = @inline (out, x) -> matmul!(out, A, x)
 mul_by!(A::T) where {T <: SimpleKronecker} = @inline (out, x) -> mul!(out, A, x)
 mul_by!(A::AbstractSparseMatrix) = @inline (out, x) -> mul!(out, A, x)
-mul_by!(A::LinearAlgebra.AdjOrTrans{T, S}) where {T, S<:AbstractSparseMatrix} = @inline (out, x) -> mul!(out, A, x)
+function mul_by!(A::LinearAlgebra.AdjOrTrans{T, S}) where {T, S <: AbstractSparseMatrix}
+    @inline (out, x) -> mul!(out, A, x)
+end
 
 #  out <- out + α * A * x
 mul_by_accum!(A, α) = @inline (out, x) -> matmul!(out, A, x, α, One())
-mul_by_accum!(A::AbstractSparseMatrix, α) = @inline (out, x) -> mul!(out, A, x, α, One())
+function mul_by_accum!(A::AbstractSparseMatrix, α)
+    @inline (out, x) -> mul!(out, A, x, α, One())
+end
 
 # out <- out + A * x
 mul_by_accum!(A) = mul_by_accum!(A, One())
@@ -25,15 +29,17 @@ mul_by!(A::UniformScaling) = MulByUniformScaling()
 mul_by_accum!(A::UniformScaling) = MulByAccumUniformScaling()
 
 # StructArray fallback
-@inline apply_to_each_field(f::F, args::Vararg{Any, N}) where {F, N} = StructArrays.foreachfield(f, args...)
+@inline function apply_to_each_field(f::F, args::Vararg{Any, N}) where {F, N}
+    StructArrays.foreachfield(f, args...)
+end
 
 # specialize for UniformScaling types: works for either StructArray{SVector} or Matrix{SVector}
 # solution storage formats.
 @inline apply_to_each_field(f::MulByUniformScaling, out, x, args...) = copy!(out, x)
 @inline function apply_to_each_field(f::MulByAccumUniformScaling, out, x, args...)
-  @threaded for i in eachindex(x)
-    out[i] = out[i] + x[i]
-  end
+    @threaded for i in eachindex(x)
+        out[i] = out[i] + x[i]
+    end
 end
 
 """
@@ -46,7 +52,9 @@ In particular, not the dimensions themselves are returned.
 @inline eachdim(mesh) = Base.OneTo(ndims(mesh))
 
 # iteration over all elements in a mesh
-@inline ndofs(mesh::DGMultiMesh, dg::DGMulti, other_args...) = dg.basis.Np * mesh.md.num_elements
+@inline function ndofs(mesh::DGMultiMesh, dg::DGMulti, other_args...)
+    dg.basis.Np * mesh.md.num_elements
+end
 """
     eachelement(mesh::DGMultiMesh, dg::DGMulti, other_args...)
 
@@ -54,7 +62,9 @@ Return an iterator over the indices that specify the location in relevant data s
 for the elements in `mesh`.
 In particular, not the elements themselves are returned.
 """
-@inline eachelement(mesh::DGMultiMesh, dg::DGMulti, other_args...) = Base.OneTo(mesh.md.num_elements)
+@inline function eachelement(mesh::DGMultiMesh, dg::DGMulti, other_args...)
+    Base.OneTo(mesh.md.num_elements)
+end
 
 # iteration over quantities in a single element
 @inline nnodes(basis::RefElemData) = basis.Np
@@ -66,7 +76,9 @@ Return an iterator over the indices that specify the location in relevant data s
 for the face nodes in `dg`.
 In particular, not the face_nodes themselves are returned.
 """
-@inline each_face_node(mesh::DGMultiMesh, dg::DGMulti, other_args...) = Base.OneTo(dg.basis.Nfq)
+@inline function each_face_node(mesh::DGMultiMesh, dg::DGMulti, other_args...)
+    Base.OneTo(dg.basis.Nfq)
+end
 
 """
     each_quad_node(mesh::DGMultiMesh, dg::DGMulti, other_args...)
@@ -75,7 +87,9 @@ Return an iterator over the indices that specify the location in relevant data s
 for the quadrature nodes in `dg`.
 In particular, not the quadrature nodes themselves are returned.
 """
-@inline each_quad_node(mesh::DGMultiMesh, dg::DGMulti, other_args...) = Base.OneTo(dg.basis.Nq)
+@inline function each_quad_node(mesh::DGMultiMesh, dg::DGMulti, other_args...)
+    Base.OneTo(dg.basis.Nq)
+end
 
 # iteration over quantities over the entire mesh (dofs, quad nodes, face nodes).
 """
@@ -85,8 +99,9 @@ Return an iterator over the indices that specify the location in relevant data s
 for the degrees of freedom (DOF) in `dg`.
 In particular, not the DOFs themselves are returned.
 """
-@inline each_dof_global(mesh::DGMultiMesh, dg::DGMulti, other_args...) = Base.OneTo(ndofs(mesh, dg, other_args...))
-
+@inline function each_dof_global(mesh::DGMultiMesh, dg::DGMulti, other_args...)
+    Base.OneTo(ndofs(mesh, dg, other_args...))
+end
 
 """
     each_quad_node_global(mesh::DGMultiMesh, dg::DGMulti, other_args...)
@@ -95,7 +110,9 @@ Return an iterator over the indices that specify the location in relevant data s
 for the global quadrature nodes in `mesh`.
 In particular, not the quadrature nodes themselves are returned.
 """
-@inline each_quad_node_global(mesh::DGMultiMesh, dg::DGMulti, other_args...) = Base.OneTo(dg.basis.Nq * mesh.md.num_elements)
+@inline function each_quad_node_global(mesh::DGMultiMesh, dg::DGMulti, other_args...)
+    Base.OneTo(dg.basis.Nq * mesh.md.num_elements)
+end
 
 """
     each_face_node_global(mesh::DGMultiMesh, dg::DGMulti, other_args...)
@@ -104,156 +121,166 @@ Return an iterator over the indices that specify the location in relevant data s
 for the face nodes in `mesh`.
 In particular, not the face nodes themselves are returned.
 """
-@inline each_face_node_global(mesh::DGMultiMesh, dg::DGMulti, other_args...) = Base.OneTo(dg.basis.Nfq * mesh.md.num_elements)
+@inline function each_face_node_global(mesh::DGMultiMesh, dg::DGMulti, other_args...)
+    Base.OneTo(dg.basis.Nfq * mesh.md.num_elements)
+end
 
 # interface with semidiscretization_hyperbolic
 wrap_array(u_ode, mesh::DGMultiMesh, equations, dg::DGMulti, cache) = u_ode
 wrap_array_native(u_ode, mesh::DGMultiMesh, equations, dg::DGMulti, cache) = u_ode
-function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys, ValueTypes}, mesh::DGMultiMesh,
-                                    dg::DGMulti, cache) where {Keys, ValueTypes<:NTuple{N, Any}} where {N}
-  return boundary_conditions
+function digest_boundary_conditions(boundary_conditions::NamedTuple{Keys, ValueTypes},
+                                    mesh::DGMultiMesh,
+                                    dg::DGMulti,
+                                    cache) where {Keys, ValueTypes <: NTuple{N, Any}
+                                                  } where {N}
+    return boundary_conditions
 end
 
 # Allocate nested array type for DGMulti solution storage.
 function allocate_nested_array(uEltype, nvars, array_dimensions, dg)
-  # store components as separate arrays, combine via StructArrays
-  return StructArray{SVector{nvars, uEltype}}(ntuple(_->zeros(uEltype, array_dimensions...), nvars))
+    # store components as separate arrays, combine via StructArrays
+    return StructArray{SVector{nvars, uEltype}}(ntuple(_ -> zeros(uEltype,
+                                                                  array_dimensions...),
+                                                       nvars))
 end
 
 function reset_du!(du, dg::DGMulti, other_args...)
-  @threaded for i in eachindex(du)
-      du[i] = zero(eltype(du))
-  end
+    @threaded for i in eachindex(du)
+        du[i] = zero(eltype(du))
+    end
 
-  return du
+    return du
 end
 
 # Constructs cache variables for both affine and non-affine (curved) DGMultiMeshes
-function create_cache(mesh::DGMultiMesh{NDIMS}, equations, dg::DGMultiWeakForm, RealT, uEltype) where {NDIMS}
-  rd = dg.basis
-  md = mesh.md
-
-  # volume quadrature weights, volume interpolation matrix, mass matrix, differentiation matrices
-  @unpack wq, Vq, M, Drst = rd
-
-  # ∫f(u) * dv/dx_i = ∑_j (Vq*Drst[i])'*diagm(wq)*(rstxyzJ[i,j].*f(Vq*u))
-  weak_differentiation_matrices = map(D -> -M \ ((Vq * D)' * Diagonal(wq)), Drst)
-
-  nvars = nvariables(equations)
-
-  # storage for volume quadrature values, face quadrature values, flux values
-  u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg)
-  u_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg)
-  flux_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg)
-  if typeof(rd.approximation_type) <: Union{SBP, AbstractNonperiodicDerivativeOperator}
-    lift_scalings = rd.wf ./ rd.wq[rd.Fmask] # lift scalings for diag-norm SBP operators
-  else
-    lift_scalings = nothing
-  end
+function create_cache(mesh::DGMultiMesh{NDIMS}, equations, dg::DGMultiWeakForm, RealT,
+                      uEltype) where {NDIMS}
+    rd = dg.basis
+    md = mesh.md
+
+    # volume quadrature weights, volume interpolation matrix, mass matrix, differentiation matrices
+    @unpack wq, Vq, M, Drst = rd
+
+    # ∫f(u) * dv/dx_i = ∑_j (Vq*Drst[i])'*diagm(wq)*(rstxyzJ[i,j].*f(Vq*u))
+    weak_differentiation_matrices = map(D -> -M \ ((Vq * D)' * Diagonal(wq)), Drst)
+
+    nvars = nvariables(equations)
+
+    # storage for volume quadrature values, face quadrature values, flux values
+    u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg)
+    u_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg)
+    flux_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg)
+    if typeof(rd.approximation_type) <:
+       Union{SBP, AbstractNonperiodicDerivativeOperator}
+        lift_scalings = rd.wf ./ rd.wq[rd.Fmask] # lift scalings for diag-norm SBP operators
+    else
+        lift_scalings = nothing
+    end
 
-  # local storage for volume integral and source computations
-  local_values_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) for _ in 1:Threads.nthreads()]
+    # local storage for volume integral and source computations
+    local_values_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg)
+                             for _ in 1:Threads.nthreads()]
 
-  # For curved meshes, we interpolate geometric terms from nodal points to quadrature points.
-  # For affine meshes, we just access one element of this interpolated data.
-  dxidxhatj = map(x -> rd.Vq * x, md.rstxyzJ)
+    # For curved meshes, we interpolate geometric terms from nodal points to quadrature points.
+    # For affine meshes, we just access one element of this interpolated data.
+    dxidxhatj = map(x -> rd.Vq * x, md.rstxyzJ)
 
-  # interpolate J to quadrature points for weight-adjusted DG (WADG)
-  invJ = inv.(rd.Vq * md.J)
+    # interpolate J to quadrature points for weight-adjusted DG (WADG)
+    invJ = inv.(rd.Vq * md.J)
 
-  # for scaling by curved geometric terms (not used by affine DGMultiMesh)
-  flux_threaded =
-    [[allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) for _ in 1:NDIMS] for _ in 1:Threads.nthreads()]
-  rotated_flux_threaded =
-    [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) for _ in 1:Threads.nthreads()]
+    # for scaling by curved geometric terms (not used by affine DGMultiMesh)
+    flux_threaded = [[allocate_nested_array(uEltype, nvars, (rd.Nq,), dg)
+                      for _ in 1:NDIMS] for _ in 1:Threads.nthreads()]
+    rotated_flux_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg)
+                             for _ in 1:Threads.nthreads()]
 
-  return (; md, weak_differentiation_matrices, lift_scalings, invJ, dxidxhatj,
+    return (; md, weak_differentiation_matrices, lift_scalings, invJ, dxidxhatj,
             u_values, u_face_values, flux_face_values,
             local_values_threaded, flux_threaded, rotated_flux_threaded)
 end
 
 function allocate_coefficients(mesh::DGMultiMesh, equations, dg::DGMulti, cache)
-  return allocate_nested_array(real(dg), nvariables(equations), size(mesh.md.x), dg)
+    return allocate_nested_array(real(dg), nvariables(equations), size(mesh.md.x), dg)
 end
 
 function compute_coefficients!(u, initial_condition, t,
                                mesh::DGMultiMesh, equations, dg::DGMulti, cache)
-  md = mesh.md
-  rd = dg.basis
-  @unpack u_values = cache
-
-  # evaluate the initial condition at quadrature points
-  @threaded for i in each_quad_node_global(mesh, dg, cache)
-    u_values[i] = initial_condition(SVector(getindex.(md.xyzq, i)),
-                                    t, equations)
-  end
+    md = mesh.md
+    rd = dg.basis
+    @unpack u_values = cache
+
+    # evaluate the initial condition at quadrature points
+    @threaded for i in each_quad_node_global(mesh, dg, cache)
+        u_values[i] = initial_condition(SVector(getindex.(md.xyzq, i)),
+                                        t, equations)
+    end
 
-  # multiplying by Pq computes the L2 projection
-  apply_to_each_field(mul_by!(rd.Pq), u, u_values)
+    # multiplying by Pq computes the L2 projection
+    apply_to_each_field(mul_by!(rd.Pq), u, u_values)
 end
 
 # estimates the timestep based on polynomial degree and mesh. Does not account for physics (e.g.,
 # computes an estimate of `dt` based on the advection equation with constant unit advection speed).
 function estimate_dt(mesh::DGMultiMesh, dg::DGMulti)
-  rd = dg.basis # RefElemData
-  return StartUpDG.estimate_h(rd, mesh.md) / StartUpDG.inverse_trace_constant(rd)
+    rd = dg.basis # RefElemData
+    return StartUpDG.estimate_h(rd, mesh.md) / StartUpDG.inverse_trace_constant(rd)
 end
 
 # for the stepsize callback
 function max_dt(u, t, mesh::DGMultiMesh,
-                constant_speed::False, equations, dg::DGMulti{NDIMS}, cache) where {NDIMS}
-
-  @unpack md = mesh
-  rd = dg.basis
-
-  dt_min = Inf
-  for e in eachelement(mesh, dg, cache)
-    h_e = StartUpDG.estimate_h(e, rd, md)
-    max_speeds = ntuple(_->nextfloat(zero(t)), NDIMS)
-    for i in Base.OneTo(rd.Np) # loop over nodes
-      lambda_i = max_abs_speeds(u[i, e], equations)
-      max_speeds = max.(max_speeds, lambda_i)
+                constant_speed::False, equations, dg::DGMulti{NDIMS},
+                cache) where {NDIMS}
+    @unpack md = mesh
+    rd = dg.basis
+
+    dt_min = Inf
+    for e in eachelement(mesh, dg, cache)
+        h_e = StartUpDG.estimate_h(e, rd, md)
+        max_speeds = ntuple(_ -> nextfloat(zero(t)), NDIMS)
+        for i in Base.OneTo(rd.Np) # loop over nodes
+            lambda_i = max_abs_speeds(u[i, e], equations)
+            max_speeds = max.(max_speeds, lambda_i)
+        end
+        dt_min = min(dt_min, h_e / sum(max_speeds))
     end
-    dt_min = min(dt_min, h_e / sum(max_speeds))
-  end
-  # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by
-  # `polydeg+1`. This is because `nnodes(dg)` returns the total number of
-  # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns
-  # the number of 1D nodes for `DGSEM` solvers.
-  polydeg = rd.N
-  return 2 * dt_min / (polydeg + 1)
+    # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by
+    # `polydeg+1`. This is because `nnodes(dg)` returns the total number of
+    # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns
+    # the number of 1D nodes for `DGSEM` solvers.
+    polydeg = rd.N
+    return 2 * dt_min / (polydeg + 1)
 end
 
 function max_dt(u, t, mesh::DGMultiMesh,
-                constant_speed::True, equations, dg::DGMulti{NDIMS}, cache) where {NDIMS}
-
-  @unpack md = mesh
-  rd = dg.basis
-
-  dt_min = Inf
-  for e in eachelement(mesh, dg, cache)
-    h_e = StartUpDG.estimate_h(e, rd, md)
-    max_speeds = ntuple(_->nextfloat(zero(t)), NDIMS)
-    for i in Base.OneTo(rd.Np) # loop over nodes
-      max_speeds = max.(max_abs_speeds(equations), max_speeds)
+                constant_speed::True, equations, dg::DGMulti{NDIMS},
+                cache) where {NDIMS}
+    @unpack md = mesh
+    rd = dg.basis
+
+    dt_min = Inf
+    for e in eachelement(mesh, dg, cache)
+        h_e = StartUpDG.estimate_h(e, rd, md)
+        max_speeds = ntuple(_ -> nextfloat(zero(t)), NDIMS)
+        for i in Base.OneTo(rd.Np) # loop over nodes
+            max_speeds = max.(max_abs_speeds(equations), max_speeds)
+        end
+        dt_min = min(dt_min, h_e / sum(max_speeds))
     end
-    dt_min = min(dt_min, h_e / sum(max_speeds))
-  end
-  # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by
-  # `polydeg+1`. This is because `nnodes(dg)` returns the total number of
-  # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns
-  # the number of 1D nodes for `DGSEM` solvers.
-  polydeg = rd.N
-  return 2 * dt_min / (polydeg + 1)
+    # This mimics `max_dt` for `TreeMesh`, except that `nnodes(dg)` is replaced by
+    # `polydeg+1`. This is because `nnodes(dg)` returns the total number of
+    # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns
+    # the number of 1D nodes for `DGSEM` solvers.
+    polydeg = rd.N
+    return 2 * dt_min / (polydeg + 1)
 end
 
 # interpolates from solution coefficients to face quadrature points
 # We pass the `surface_integral` argument solely for dispatch
 function prolong2interfaces!(cache, u, mesh::DGMultiMesh, equations,
                              surface_integral, dg::DGMulti)
-  rd = dg.basis
-  @unpack u_face_values = cache
-  apply_to_each_field(mul_by!(rd.Vf), u_face_values, u)
+    rd = dg.basis
+    @unpack u_face_values = cache
+    apply_to_each_field(mul_by!(rd.Vf), u_face_values, u)
 end
 
 # version for affine meshes
@@ -261,26 +288,25 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh,
                                have_nonconservative_terms::False, equations,
                                volume_integral::VolumeIntegralWeakForm, dg::DGMulti,
                                cache)
-
-  rd = dg.basis
-  md = mesh.md
-  @unpack weak_differentiation_matrices, dxidxhatj, u_values, local_values_threaded = cache
-  @unpack rstxyzJ = md # geometric terms
-
-  # interpolate to quadrature points
-  apply_to_each_field(mul_by!(rd.Vq), u_values, u)
-
-  @threaded for e in eachelement(mesh, dg, cache)
-
-    flux_values = local_values_threaded[Threads.threadid()]
-    for i in eachdim(mesh)
-      flux_values .= flux.(view(u_values, :, e), i, equations)
-      for j in eachdim(mesh)
-        apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[j], dxidxhatj[i, j][1, e]),
-                            view(du, :, e), flux_values)
-      end
+    rd = dg.basis
+    md = mesh.md
+    @unpack weak_differentiation_matrices, dxidxhatj, u_values, local_values_threaded = cache
+    @unpack rstxyzJ = md # geometric terms
+
+    # interpolate to quadrature points
+    apply_to_each_field(mul_by!(rd.Vq), u_values, u)
+
+    @threaded for e in eachelement(mesh, dg, cache)
+        flux_values = local_values_threaded[Threads.threadid()]
+        for i in eachdim(mesh)
+            flux_values .= flux.(view(u_values, :, e), i, equations)
+            for j in eachdim(mesh)
+                apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[j],
+                                                  dxidxhatj[i, j][1, e]),
+                                    view(du, :, e), flux_values)
+            end
+        end
     end
-  end
 end
 
 # version for curved meshes
@@ -288,103 +314,101 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh{NDIMS, <:NonAffine},
                                have_nonconservative_terms::False, equations,
                                volume_integral::VolumeIntegralWeakForm, dg::DGMulti,
                                cache) where {NDIMS}
+    rd = dg.basis
+    (; weak_differentiation_matrices, u_values) = cache
+    (; dxidxhatj) = cache
 
-  rd = dg.basis
-  (; weak_differentiation_matrices, u_values) = cache
-  (; dxidxhatj) = cache
-
-  # interpolate to quadrature points
-  apply_to_each_field(mul_by!(rd.Vq), u_values, u)
-
-  @threaded for e in eachelement(mesh, dg, cache)
-
-    flux_values = cache.flux_threaded[Threads.threadid()]
-    for i in eachdim(mesh)
-      flux_values[i] .= flux.(view(u_values, :, e), i, equations)
-    end
-
-    # rotate flux with df_i/dx_i = sum_j d(x_i)/d(x̂_j) * d(f_i)/d(x̂_j).
-    # Example: df_x/dx + df_y/dy = dr/dx * df_x/dr + ds/dx * df_x/ds
-    #                  + dr/dy * df_y/dr + ds/dy * df_y/ds
-    #                  = Dr * (dr/dx * fx + dr/dy * fy) + Ds * (...)
-    #                  = Dr * (f_r) + Ds * (f_s)
-
-    rotated_flux_values = cache.rotated_flux_threaded[Threads.threadid()]
-    for j in eachdim(mesh)
-
-      fill!(rotated_flux_values, zero(eltype(rotated_flux_values)))
+    # interpolate to quadrature points
+    apply_to_each_field(mul_by!(rd.Vq), u_values, u)
 
-      # compute rotated fluxes
-      for i in eachdim(mesh)
-        for ii in eachindex(rotated_flux_values)
-          flux_i_node = flux_values[i][ii]
-          dxidxhatj_node = dxidxhatj[i, j][ii, e]
-          rotated_flux_values[ii] = rotated_flux_values[ii] + dxidxhatj_node * flux_i_node
+    @threaded for e in eachelement(mesh, dg, cache)
+        flux_values = cache.flux_threaded[Threads.threadid()]
+        for i in eachdim(mesh)
+            flux_values[i] .= flux.(view(u_values, :, e), i, equations)
         end
-      end
 
-      # apply weak differentiation matrices to rotated fluxes
-      apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[j]),
-                          view(du, :, e), rotated_flux_values)
+        # rotate flux with df_i/dx_i = sum_j d(x_i)/d(x̂_j) * d(f_i)/d(x̂_j).
+        # Example: df_x/dx + df_y/dy = dr/dx * df_x/dr + ds/dx * df_x/ds
+        #                  + dr/dy * df_y/dr + ds/dy * df_y/ds
+        #                  = Dr * (dr/dx * fx + dr/dy * fy) + Ds * (...)
+        #                  = Dr * (f_r) + Ds * (f_s)
+
+        rotated_flux_values = cache.rotated_flux_threaded[Threads.threadid()]
+        for j in eachdim(mesh)
+            fill!(rotated_flux_values, zero(eltype(rotated_flux_values)))
+
+            # compute rotated fluxes
+            for i in eachdim(mesh)
+                for ii in eachindex(rotated_flux_values)
+                    flux_i_node = flux_values[i][ii]
+                    dxidxhatj_node = dxidxhatj[i, j][ii, e]
+                    rotated_flux_values[ii] = rotated_flux_values[ii] +
+                                              dxidxhatj_node * flux_i_node
+                end
+            end
+
+            # apply weak differentiation matrices to rotated fluxes
+            apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[j]),
+                                view(du, :, e), rotated_flux_values)
+        end
     end
-  end
 end
 
 function calc_interface_flux!(cache, surface_integral::SurfaceIntegralWeakForm,
                               mesh::DGMultiMesh,
                               have_nonconservative_terms::False, equations,
                               dg::DGMulti{NDIMS}) where {NDIMS}
-
-  @unpack surface_flux = surface_integral
-  md = mesh.md
-  @unpack mapM, mapP, nxyzJ, Jf = md
-  @unpack u_face_values, flux_face_values = cache
-
-  @threaded for face_node_index in each_face_node_global(mesh, dg, cache)
-
-    # inner (idM -> minus) and outer (idP -> plus) indices
-    idM, idP = mapM[face_node_index], mapP[face_node_index]
-    uM = u_face_values[idM]
-    uP = u_face_values[idP]
-    normal = SVector{NDIMS}(getindex.(nxyzJ, idM)) / Jf[idM]
-    flux_face_values[idM] = surface_flux(uM, uP, normal, equations) * Jf[idM]
-  end
+    @unpack surface_flux = surface_integral
+    md = mesh.md
+    @unpack mapM, mapP, nxyzJ, Jf = md
+    @unpack u_face_values, flux_face_values = cache
+
+    @threaded for face_node_index in each_face_node_global(mesh, dg, cache)
+
+        # inner (idM -> minus) and outer (idP -> plus) indices
+        idM, idP = mapM[face_node_index], mapP[face_node_index]
+        uM = u_face_values[idM]
+        uP = u_face_values[idP]
+        normal = SVector{NDIMS}(getindex.(nxyzJ, idM)) / Jf[idM]
+        flux_face_values[idM] = surface_flux(uM, uP, normal, equations) * Jf[idM]
+    end
 end
 
 function calc_interface_flux!(cache, surface_integral::SurfaceIntegralWeakForm,
                               mesh::DGMultiMesh,
                               have_nonconservative_terms::True, equations,
                               dg::DGMulti{NDIMS}) where {NDIMS}
-
-  flux_conservative, flux_nonconservative = surface_integral.surface_flux
-  md = mesh.md
-  @unpack mapM, mapP, nxyzJ, Jf = md
-  @unpack u_face_values, flux_face_values = cache
-
-  @threaded for face_node_index in each_face_node_global(mesh, dg, cache)
-
-    # inner (idM -> minus) and outer (idP -> plus) indices
-    idM, idP = mapM[face_node_index], mapP[face_node_index]
-    uM = u_face_values[idM]
-
-    # compute flux if node is not a boundary node
-    if idM != idP
-      uP = u_face_values[idP]
-      normal = SVector{NDIMS}(getindex.(nxyzJ, idM)) / Jf[idM]
-      conservative_part = flux_conservative(uM, uP, normal, equations)
-
-      # Two notes on the use of `flux_nonconservative`:
-      # 1. In contrast to other mesh types, only one nonconservative part needs to be
-      #    computed since we loop over the elements, not the unique interfaces.
-      # 2. In general, nonconservative fluxes can depend on both the contravariant
-      #    vectors (normal direction) at the current node and the averaged ones. However,
-      #    both are the same at watertight interfaces, so we pass `normal` twice.
-      nonconservative_part = flux_nonconservative(uM, uP, normal, normal, equations)
-      # The factor 0.5 is necessary for the nonconservative fluxes based on the
-      # interpretation of global SBP operators.
-      flux_face_values[idM] = (conservative_part + 0.5 * nonconservative_part) * Jf[idM]
+    flux_conservative, flux_nonconservative = surface_integral.surface_flux
+    md = mesh.md
+    @unpack mapM, mapP, nxyzJ, Jf = md
+    @unpack u_face_values, flux_face_values = cache
+
+    @threaded for face_node_index in each_face_node_global(mesh, dg, cache)
+
+        # inner (idM -> minus) and outer (idP -> plus) indices
+        idM, idP = mapM[face_node_index], mapP[face_node_index]
+        uM = u_face_values[idM]
+
+        # compute flux if node is not a boundary node
+        if idM != idP
+            uP = u_face_values[idP]
+            normal = SVector{NDIMS}(getindex.(nxyzJ, idM)) / Jf[idM]
+            conservative_part = flux_conservative(uM, uP, normal, equations)
+
+            # Two notes on the use of `flux_nonconservative`:
+            # 1. In contrast to other mesh types, only one nonconservative part needs to be
+            #    computed since we loop over the elements, not the unique interfaces.
+            # 2. In general, nonconservative fluxes can depend on both the contravariant
+            #    vectors (normal direction) at the current node and the averaged ones. However,
+            #    both are the same at watertight interfaces, so we pass `normal` twice.
+            nonconservative_part = flux_nonconservative(uM, uP, normal, normal,
+                                                        equations)
+            # The factor 0.5 is necessary for the nonconservative fluxes based on the
+            # interpretation of global SBP operators.
+            flux_face_values[idM] = (conservative_part + 0.5 * nonconservative_part) *
+                                    Jf[idM]
+        end
     end
-  end
 end
 
 # assumes cache.flux_face_values is computed and filled with
@@ -392,22 +416,22 @@ end
 function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations,
                                 surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGMulti, cache)
-  rd = dg.basis
-  apply_to_each_field(mul_by_accum!(rd.LIFT), du, cache.flux_face_values)
+    rd = dg.basis
+    apply_to_each_field(mul_by_accum!(rd.LIFT), du, cache.flux_face_values)
 end
 
 # Specialize for nodal SBP discretizations. Uses that Vf*u = u[Fmask,:]
 # We pass the `surface_integral` argument solely for dispatch
 function prolong2interfaces!(cache, u, mesh::DGMultiMesh, equations, surface_integral,
                              dg::DGMultiSBP)
-  rd = dg.basis
-  @unpack Fmask = rd
-  @unpack u_face_values = cache
-  @threaded for e in eachelement(mesh, dg, cache)
-    for (i,fid) in enumerate(Fmask)
-      u_face_values[i, e] = u[fid, e]
+    rd = dg.basis
+    @unpack Fmask = rd
+    @unpack u_face_values = cache
+    @threaded for e in eachelement(mesh, dg, cache)
+        for (i, fid) in enumerate(Fmask)
+            u_face_values[i, e] = u[fid, e]
+        end
     end
-  end
 end
 
 # Specialize for nodal SBP discretizations. Uses that du = LIFT*u is equivalent to
@@ -415,141 +439,152 @@ end
 function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations,
                                 surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGMultiSBP, cache)
-  rd = dg.basis
-  @unpack flux_face_values, lift_scalings = cache
+    rd = dg.basis
+    @unpack flux_face_values, lift_scalings = cache
 
-  @threaded for e in eachelement(mesh, dg, cache)
-    for i in each_face_node(mesh, dg, cache)
-      fid = rd.Fmask[i]
-      du[fid, e] = du[fid, e] + flux_face_values[i,e] * lift_scalings[i]
+    @threaded for e in eachelement(mesh, dg, cache)
+        for i in each_face_node(mesh, dg, cache)
+            fid = rd.Fmask[i]
+            du[fid, e] = du[fid, e] + flux_face_values[i, e] * lift_scalings[i]
+        end
     end
-  end
 end
 
 # do nothing for periodic (default) boundary conditions
-calc_boundary_flux!(cache, t, boundary_conditions::BoundaryConditionPeriodic,
-                    mesh, have_nonconservative_terms, equations, dg::DGMulti) = nothing
+function calc_boundary_flux!(cache, t, boundary_conditions::BoundaryConditionPeriodic,
+                             mesh, have_nonconservative_terms, equations, dg::DGMulti)
+    nothing
+end
 
 # "lispy tuple programming" instead of for loop for type stability
 function calc_boundary_flux!(cache, t, boundary_conditions, mesh,
                              have_nonconservative_terms, equations, dg::DGMulti)
 
-  # peel off first boundary condition
-  calc_single_boundary_flux!(cache, t, first(boundary_conditions), first(keys(boundary_conditions)),
-                             mesh, have_nonconservative_terms, equations, dg)
+    # peel off first boundary condition
+    calc_single_boundary_flux!(cache, t, first(boundary_conditions),
+                               first(keys(boundary_conditions)),
+                               mesh, have_nonconservative_terms, equations, dg)
 
-  # recurse on the remainder of the boundary conditions
-  calc_boundary_flux!(cache, t, Base.tail(boundary_conditions),
-                      mesh, have_nonconservative_terms, equations, dg)
+    # recurse on the remainder of the boundary conditions
+    calc_boundary_flux!(cache, t, Base.tail(boundary_conditions),
+                        mesh, have_nonconservative_terms, equations, dg)
 end
 
 # terminate recursion
-calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple{(),Tuple{}},
-                    mesh, have_nonconservative_terms, equations, dg::DGMulti) = nothing
+function calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple{(), Tuple{}},
+                             mesh, have_nonconservative_terms, equations, dg::DGMulti)
+    nothing
+end
 
 function calc_single_boundary_flux!(cache, t, boundary_condition, boundary_key, mesh,
-                                    have_nonconservative_terms::False, equations, dg::DGMulti{NDIMS}) where {NDIMS}
-
-  rd = dg.basis
-  md = mesh.md
-  @unpack u_face_values, flux_face_values = cache
-  @unpack xyzf, nxyzJ, Jf = md
-  @unpack surface_flux = dg.surface_integral
-
-  # reshape face/normal arrays to have size = (num_points_on_face, num_faces_total).
-  # mesh.boundary_faces indexes into the columns of these face-reshaped arrays.
-  num_faces = StartUpDG.num_faces(rd.element_type)
-  num_pts_per_face = rd.Nfq ÷ num_faces
-  num_faces_total = num_faces * md.num_elements
-
-  # This function was originally defined as
-  # `reshape_by_face(u) = reshape(view(u, :), num_pts_per_face, num_faces_total)`.
-  # This results in allocations due to https://github.com/JuliaLang/julia/issues/36313.
-  # To avoid allocations, we use Tim Holy's suggestion:
-  # https://github.com/JuliaLang/julia/issues/36313#issuecomment-782336300.
-  reshape_by_face(u) = Base.ReshapedArray(u, (num_pts_per_face, num_faces_total), ())
-
-  u_face_values = reshape_by_face(u_face_values)
-  flux_face_values = reshape_by_face(flux_face_values)
-  Jf = reshape_by_face(Jf)
-  nxyzJ, xyzf = reshape_by_face.(nxyzJ), reshape_by_face.(xyzf) # broadcast over nxyzJ::NTuple{NDIMS,Matrix}
-
-  # loop through boundary faces, which correspond to columns of reshaped u_face_values, ...
-  for f in mesh.boundary_faces[boundary_key]
-    for i in Base.OneTo(num_pts_per_face)
-      face_normal = SVector{NDIMS}(getindex.(nxyzJ, i, f)) / Jf[i,f]
-      face_coordinates = SVector{NDIMS}(getindex.(xyzf, i, f))
-      flux_face_values[i,f] = boundary_condition(u_face_values[i,f],
-                          face_normal, face_coordinates, t,
-                          surface_flux, equations) * Jf[i,f]
+                                    have_nonconservative_terms::False, equations,
+                                    dg::DGMulti{NDIMS}) where {NDIMS}
+    rd = dg.basis
+    md = mesh.md
+    @unpack u_face_values, flux_face_values = cache
+    @unpack xyzf, nxyzJ, Jf = md
+    @unpack surface_flux = dg.surface_integral
+
+    # reshape face/normal arrays to have size = (num_points_on_face, num_faces_total).
+    # mesh.boundary_faces indexes into the columns of these face-reshaped arrays.
+    num_faces = StartUpDG.num_faces(rd.element_type)
+    num_pts_per_face = rd.Nfq ÷ num_faces
+    num_faces_total = num_faces * md.num_elements
+
+    # This function was originally defined as
+    # `reshape_by_face(u) = reshape(view(u, :), num_pts_per_face, num_faces_total)`.
+    # This results in allocations due to https://github.com/JuliaLang/julia/issues/36313.
+    # To avoid allocations, we use Tim Holy's suggestion:
+    # https://github.com/JuliaLang/julia/issues/36313#issuecomment-782336300.
+    reshape_by_face(u) = Base.ReshapedArray(u, (num_pts_per_face, num_faces_total), ())
+
+    u_face_values = reshape_by_face(u_face_values)
+    flux_face_values = reshape_by_face(flux_face_values)
+    Jf = reshape_by_face(Jf)
+    nxyzJ, xyzf = reshape_by_face.(nxyzJ), reshape_by_face.(xyzf) # broadcast over nxyzJ::NTuple{NDIMS,Matrix}
+
+    # loop through boundary faces, which correspond to columns of reshaped u_face_values, ...
+    for f in mesh.boundary_faces[boundary_key]
+        for i in Base.OneTo(num_pts_per_face)
+            face_normal = SVector{NDIMS}(getindex.(nxyzJ, i, f)) / Jf[i, f]
+            face_coordinates = SVector{NDIMS}(getindex.(xyzf, i, f))
+            flux_face_values[i, f] = boundary_condition(u_face_values[i, f],
+                                                        face_normal, face_coordinates,
+                                                        t,
+                                                        surface_flux, equations) *
+                                     Jf[i, f]
+        end
     end
-  end
 
-  # Note: modifying the values of the reshaped array modifies the values of cache.flux_face_values.
-  # However, we don't have to re-reshape, since cache.flux_face_values still retains its original shape.
+    # Note: modifying the values of the reshaped array modifies the values of cache.flux_face_values.
+    # However, we don't have to re-reshape, since cache.flux_face_values still retains its original shape.
 end
 
 function calc_single_boundary_flux!(cache, t, boundary_condition, boundary_key, mesh,
-                                    have_nonconservative_terms::True, equations, dg::DGMulti{NDIMS}) where {NDIMS}
-
-  rd = dg.basis
-  md = mesh.md
-  surface_flux, nonconservative_flux = dg.surface_integral.surface_flux
-
-  # reshape face/normal arrays to have size = (num_points_on_face, num_faces_total).
-  # mesh.boundary_faces indexes into the columns of these face-reshaped arrays.
-  num_pts_per_face = rd.Nfq ÷ StartUpDG.num_faces(rd.element_type)
-  num_faces_total = StartUpDG.num_faces(rd.element_type) * md.num_elements
-
-  # This function was originally defined as
-  # `reshape_by_face(u) = reshape(view(u, :), num_pts_per_face, num_faces_total)`.
-  # This results in allocations due to https://github.com/JuliaLang/julia/issues/36313.
-  # To avoid allocations, we use Tim Holy's suggestion:
-  # https://github.com/JuliaLang/julia/issues/36313#issuecomment-782336300.
-  reshape_by_face(u) = Base.ReshapedArray(u, (num_pts_per_face, num_faces_total), ())
-
-  u_face_values = reshape_by_face(cache.u_face_values)
-  flux_face_values = reshape_by_face(cache.flux_face_values)
-  Jf = reshape_by_face(md.Jf)
-  nxyzJ, xyzf = reshape_by_face.(md.nxyzJ), reshape_by_face.(md.xyzf) # broadcast over nxyzJ::NTuple{NDIMS,Matrix}
-
-  # loop through boundary faces, which correspond to columns of reshaped u_face_values, ...
-  for f in mesh.boundary_faces[boundary_key]
-    for i in Base.OneTo(num_pts_per_face)
-      face_normal = SVector{NDIMS}(getindex.(nxyzJ, i, f)) / Jf[i,f]
-      face_coordinates = SVector{NDIMS}(getindex.(xyzf, i, f))
-
-      # Compute conservative and non-conservative fluxes separately.
-      # This imposes boundary conditions on the conservative part of the flux.
-      cons_flux_at_face_node = boundary_condition(u_face_values[i,f], face_normal, face_coordinates, t,
-                                                  surface_flux, equations)
-
-      # Compute pointwise nonconservative numerical flux at the boundary.
-      # In general, nonconservative fluxes can depend on both the contravariant
-      # vectors (normal direction) at the current node and the averaged ones.
-      # However, there is only one `face_normal` at boundaries, which we pass in twice.
-      # Note: This does not set any type of boundary condition for the nonconservative term
-      noncons_flux_at_face_node = nonconservative_flux(u_face_values[i,f], u_face_values[i,f],
-                                                       face_normal, face_normal, equations)
-
-      flux_face_values[i,f] = (cons_flux_at_face_node + 0.5 * noncons_flux_at_face_node) * Jf[i,f]
-
+                                    have_nonconservative_terms::True, equations,
+                                    dg::DGMulti{NDIMS}) where {NDIMS}
+    rd = dg.basis
+    md = mesh.md
+    surface_flux, nonconservative_flux = dg.surface_integral.surface_flux
+
+    # reshape face/normal arrays to have size = (num_points_on_face, num_faces_total).
+    # mesh.boundary_faces indexes into the columns of these face-reshaped arrays.
+    num_pts_per_face = rd.Nfq ÷ StartUpDG.num_faces(rd.element_type)
+    num_faces_total = StartUpDG.num_faces(rd.element_type) * md.num_elements
+
+    # This function was originally defined as
+    # `reshape_by_face(u) = reshape(view(u, :), num_pts_per_face, num_faces_total)`.
+    # This results in allocations due to https://github.com/JuliaLang/julia/issues/36313.
+    # To avoid allocations, we use Tim Holy's suggestion:
+    # https://github.com/JuliaLang/julia/issues/36313#issuecomment-782336300.
+    reshape_by_face(u) = Base.ReshapedArray(u, (num_pts_per_face, num_faces_total), ())
+
+    u_face_values = reshape_by_face(cache.u_face_values)
+    flux_face_values = reshape_by_face(cache.flux_face_values)
+    Jf = reshape_by_face(md.Jf)
+    nxyzJ, xyzf = reshape_by_face.(md.nxyzJ), reshape_by_face.(md.xyzf) # broadcast over nxyzJ::NTuple{NDIMS,Matrix}
+
+    # loop through boundary faces, which correspond to columns of reshaped u_face_values, ...
+    for f in mesh.boundary_faces[boundary_key]
+        for i in Base.OneTo(num_pts_per_face)
+            face_normal = SVector{NDIMS}(getindex.(nxyzJ, i, f)) / Jf[i, f]
+            face_coordinates = SVector{NDIMS}(getindex.(xyzf, i, f))
+
+            # Compute conservative and non-conservative fluxes separately.
+            # This imposes boundary conditions on the conservative part of the flux.
+            cons_flux_at_face_node = boundary_condition(u_face_values[i, f],
+                                                        face_normal, face_coordinates,
+                                                        t,
+                                                        surface_flux, equations)
+
+            # Compute pointwise nonconservative numerical flux at the boundary.
+            # In general, nonconservative fluxes can depend on both the contravariant
+            # vectors (normal direction) at the current node and the averaged ones.
+            # However, there is only one `face_normal` at boundaries, which we pass in twice.
+            # Note: This does not set any type of boundary condition for the nonconservative term
+            noncons_flux_at_face_node = nonconservative_flux(u_face_values[i, f],
+                                                             u_face_values[i, f],
+                                                             face_normal, face_normal,
+                                                             equations)
+
+            flux_face_values[i, f] = (cons_flux_at_face_node +
+                                      0.5 * noncons_flux_at_face_node) * Jf[i, f]
+        end
     end
-  end
 
-  # Note: modifying the values of the reshaped array modifies the values of cache.flux_face_values.
-  # However, we don't have to re-reshape, since cache.flux_face_values still retains its original shape.
+    # Note: modifying the values of the reshaped array modifies the values of cache.flux_face_values.
+    # However, we don't have to re-reshape, since cache.flux_face_values still retains its original shape.
 end
 
-
 # inverts Jacobian and scales by -1.0
-function invert_jacobian!(du, mesh::DGMultiMesh, equations, dg::DGMulti, cache; scaling=-1)
-  @threaded for e in eachelement(mesh, dg, cache)
-    invJ = cache.invJ[1, e]
-    for i in axes(du, 1)
-      du[i, e] *= scaling * invJ
+function invert_jacobian!(du, mesh::DGMultiMesh, equations, dg::DGMulti, cache;
+                          scaling = -1)
+    @threaded for e in eachelement(mesh, dg, cache)
+        invJ = cache.invJ[1, e]
+        for i in axes(du, 1)
+            du[i, e] *= scaling * invJ
+        end
     end
-  end
 end
 
 # inverts Jacobian using weight-adjusted DG, and scales by -1.0.
@@ -557,87 +592,92 @@ end
 #   "Weight-adjusted discontinuous Galerkin methods: curvilinear meshes."
 #   https://doi.org/10.1137/16M1089198
 function invert_jacobian!(du, mesh::DGMultiMesh{NDIMS, <:NonAffine}, equations,
-                          dg::DGMulti, cache; scaling=-1) where {NDIMS}
-  # Vq = interpolation matrix to quadrature points, Pq = quadrature-based L2 projection matrix
-  (; Pq, Vq) = dg.basis
-  (; local_values_threaded, invJ) = cache
+                          dg::DGMulti, cache; scaling = -1) where {NDIMS}
+    # Vq = interpolation matrix to quadrature points, Pq = quadrature-based L2 projection matrix
+    (; Pq, Vq) = dg.basis
+    (; local_values_threaded, invJ) = cache
 
-  @threaded for e in eachelement(mesh, dg, cache)
-    du_at_quad_points = local_values_threaded[Threads.threadid()]
+    @threaded for e in eachelement(mesh, dg, cache)
+        du_at_quad_points = local_values_threaded[Threads.threadid()]
 
-    # interpolate solution to quadrature
-    apply_to_each_field(mul_by!(Vq), du_at_quad_points, view(du, :, e))
+        # interpolate solution to quadrature
+        apply_to_each_field(mul_by!(Vq), du_at_quad_points, view(du, :, e))
 
-    # scale by quadrature points
-    for i in eachindex(du_at_quad_points)
-      du_at_quad_points[i] *= scaling * invJ[i, e]
-    end
+        # scale by quadrature points
+        for i in eachindex(du_at_quad_points)
+            du_at_quad_points[i] *= scaling * invJ[i, e]
+        end
 
-    # project back to polynomials
-    apply_to_each_field(mul_by!(Pq), view(du, :, e), du_at_quad_points)
-  end
+        # project back to polynomials
+        apply_to_each_field(mul_by!(Pq), view(du, :, e), du_at_quad_points)
+    end
 end
 
 # Multiple calc_sources! to resolve method ambiguities
-calc_sources!(du, u, t, source_terms::Nothing,
-              mesh, equations, dg::DGMulti, cache) = nothing
-calc_sources!(du, u, t, source_terms::Nothing,
-              mesh, equations, dg::DGMultiFluxDiffSBP, cache) = nothing
+function calc_sources!(du, u, t, source_terms::Nothing,
+                       mesh, equations, dg::DGMulti, cache)
+    nothing
+end
+function calc_sources!(du, u, t, source_terms::Nothing,
+                       mesh, equations, dg::DGMultiFluxDiffSBP, cache)
+    nothing
+end
 
 # uses quadrature + projection to compute source terms.
 function calc_sources!(du, u, t, source_terms,
                        mesh, equations, dg::DGMulti, cache)
-
-  rd = dg.basis
-  md = mesh.md
-  @unpack Pq = rd
-  @unpack u_values, local_values_threaded = cache
-  @threaded for e in eachelement(mesh, dg, cache)
-
-    source_values = local_values_threaded[Threads.threadid()]
-
-    u_e = view(u_values, :, e) # u_values should already be computed from volume integral
-
-    for i in each_quad_node(mesh, dg, cache)
-      source_values[i] = source_terms(u_e[i], SVector(getindex.(md.xyzq, i, e)),
-                                      t, equations)
+    rd = dg.basis
+    md = mesh.md
+    @unpack Pq = rd
+    @unpack u_values, local_values_threaded = cache
+    @threaded for e in eachelement(mesh, dg, cache)
+        source_values = local_values_threaded[Threads.threadid()]
+
+        u_e = view(u_values, :, e) # u_values should already be computed from volume integral
+
+        for i in each_quad_node(mesh, dg, cache)
+            source_values[i] = source_terms(u_e[i], SVector(getindex.(md.xyzq, i, e)),
+                                            t, equations)
+        end
+        apply_to_each_field(mul_by_accum!(Pq), view(du, :, e), source_values)
     end
-    apply_to_each_field(mul_by_accum!(Pq), view(du, :, e), source_values)
-  end
 end
 
 function rhs!(du, u, t, mesh, equations,
               initial_condition, boundary_conditions::BC, source_terms::Source,
               dg::DGMulti, cache) where {BC, Source}
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
 
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh, have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, u, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.volume_integral, dg, cache)
+    end
 
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache, u, mesh, equations, dg.surface_integral, dg)
+    end
 
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache, dg.surface_integral, mesh,
-    have_nonconservative_terms(equations), equations, dg)
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache, dg.surface_integral, mesh,
+                             have_nonconservative_terms(equations), equations, dg)
+    end
 
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, t, boundary_conditions, mesh,
-    have_nonconservative_terms(equations), equations, dg)
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux!(cache, t, boundary_conditions, mesh,
+                            have_nonconservative_terms(equations), equations, dg)
+    end
 
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations, dg.surface_integral, dg, cache)
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations, dg.surface_integral, dg, cache)
+    end
 
-  @trixi_timeit timer() "Jacobian" invert_jacobian!(
-    du, mesh, equations, dg, cache)
+    @trixi_timeit timer() "Jacobian" invert_jacobian!(du, mesh, equations, dg, cache)
 
-  @trixi_timeit timer() "source terms" calc_sources!(
-    du, u, t, source_terms, mesh, equations, dg, cache)
+    @trixi_timeit timer() "source terms" begin
+        calc_sources!(du, u, t, source_terms, mesh, equations, dg, cache)
+    end
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgmulti/dg_parabolic.jl b/src/solvers/dgmulti/dg_parabolic.jl
index c4015dbca04..72dbe2c4256 100644
--- a/src/solvers/dgmulti/dg_parabolic.jl
+++ b/src/solvers/dgmulti/dg_parabolic.jl
@@ -4,387 +4,417 @@ function create_cache_parabolic(mesh::DGMultiMesh,
                                 equations_parabolic::AbstractEquationsParabolic,
                                 dg::DGMulti, parabolic_scheme, RealT, uEltype)
 
-  # default to taking derivatives of all hyperbolic variables
-  # TODO: parabolic; utilize the parabolic variables in `equations_parabolic` to reduce memory usage in the parabolic cache
-  nvars = nvariables(equations_hyperbolic)
-
-  (; M, Vq, Pq, Drst) = dg.basis
-
-  # gradient operators: map from nodes to quadrature
-  strong_differentiation_matrices = map(A -> Vq * A, Drst)
-  gradient_lift_matrix = Vq * dg.basis.LIFT
-
-  # divergence operators: map from quadrature to nodes
-  weak_differentiation_matrices = map(A -> (M \ (-A' * M * Pq)), Drst)
-  divergence_lift_matrix = dg.basis.LIFT
-  projection_face_interpolation_matrix = dg.basis.Vf * dg.basis.Pq
-
-  # evaluate geometric terms at quadrature points in case the mesh is curved
-  (; md) = mesh
-  J = dg.basis.Vq * md.J
-  invJ = inv.(J)
-  dxidxhatj = map(x -> dg.basis.Vq * x, md.rstxyzJ)
-
-  # u_transformed stores "transformed" variables for computing the gradient
-  u_transformed = allocate_nested_array(uEltype, nvars, size(md.x), dg)
-  gradients = SVector{ndims(mesh)}(ntuple(_ -> similar(u_transformed, (dg.basis.Nq, mesh.md.num_elements)), ndims(mesh)))
-  flux_viscous = similar.(gradients)
-
-  u_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg)
-  scalar_flux_face_values = similar(u_face_values)
-  gradients_face_values = ntuple(_ -> similar(u_face_values), ndims(mesh))
-
-  local_u_values_threaded = [similar(u_transformed, dg.basis.Nq) for _ in 1:Threads.nthreads()]
-  local_flux_viscous_threaded = [SVector{ndims(mesh)}(ntuple(_ -> similar(u_transformed, dg.basis.Nq), ndims(mesh))) for _ in 1:Threads.nthreads()]
-  local_flux_face_values_threaded = [similar(scalar_flux_face_values[:, 1]) for _ in 1:Threads.nthreads()]
-
-  return (; u_transformed, gradients, flux_viscous,
+    # default to taking derivatives of all hyperbolic variables
+    # TODO: parabolic; utilize the parabolic variables in `equations_parabolic` to reduce memory usage in the parabolic cache
+    nvars = nvariables(equations_hyperbolic)
+
+    (; M, Vq, Pq, Drst) = dg.basis
+
+    # gradient operators: map from nodes to quadrature
+    strong_differentiation_matrices = map(A -> Vq * A, Drst)
+    gradient_lift_matrix = Vq * dg.basis.LIFT
+
+    # divergence operators: map from quadrature to nodes
+    weak_differentiation_matrices = map(A -> (M \ (-A' * M * Pq)), Drst)
+    divergence_lift_matrix = dg.basis.LIFT
+    projection_face_interpolation_matrix = dg.basis.Vf * dg.basis.Pq
+
+    # evaluate geometric terms at quadrature points in case the mesh is curved
+    (; md) = mesh
+    J = dg.basis.Vq * md.J
+    invJ = inv.(J)
+    dxidxhatj = map(x -> dg.basis.Vq * x, md.rstxyzJ)
+
+    # u_transformed stores "transformed" variables for computing the gradient
+    u_transformed = allocate_nested_array(uEltype, nvars, size(md.x), dg)
+    gradients = SVector{ndims(mesh)}(ntuple(_ -> similar(u_transformed,
+                                                         (dg.basis.Nq,
+                                                          mesh.md.num_elements)),
+                                            ndims(mesh)))
+    flux_viscous = similar.(gradients)
+
+    u_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg)
+    scalar_flux_face_values = similar(u_face_values)
+    gradients_face_values = ntuple(_ -> similar(u_face_values), ndims(mesh))
+
+    local_u_values_threaded = [similar(u_transformed, dg.basis.Nq)
+                               for _ in 1:Threads.nthreads()]
+    local_flux_viscous_threaded = [SVector{ndims(mesh)}(ntuple(_ -> similar(u_transformed,
+                                                                            dg.basis.Nq),
+                                                               ndims(mesh)))
+                                   for _ in 1:Threads.nthreads()]
+    local_flux_face_values_threaded = [similar(scalar_flux_face_values[:, 1])
+                                       for _ in 1:Threads.nthreads()]
+
+    return (; u_transformed, gradients, flux_viscous,
             weak_differentiation_matrices, strong_differentiation_matrices,
-            gradient_lift_matrix, projection_face_interpolation_matrix, divergence_lift_matrix,
+            gradient_lift_matrix, projection_face_interpolation_matrix,
+            divergence_lift_matrix,
             dxidxhatj, J, invJ, # geometric terms
             u_face_values, gradients_face_values, scalar_flux_face_values,
-            local_u_values_threaded, local_flux_viscous_threaded, local_flux_face_values_threaded)
+            local_u_values_threaded, local_flux_viscous_threaded,
+            local_flux_face_values_threaded)
 end
 
 # Transform solution variables prior to taking the gradient
 # (e.g., conservative to primitive variables). Defaults to doing nothing.
 # TODO: can we avoid copying data?
-function transform_variables!(u_transformed, u, mesh, equations_parabolic::AbstractEquationsParabolic,
+function transform_variables!(u_transformed, u, mesh,
+                              equations_parabolic::AbstractEquationsParabolic,
                               dg::DGMulti, parabolic_scheme, cache, cache_parabolic)
-  @threaded for i in eachindex(u)
-    u_transformed[i] = gradient_variable_transformation(equations_parabolic)(u[i], equations_parabolic)
-  end
+    @threaded for i in eachindex(u)
+        u_transformed[i] = gradient_variable_transformation(equations_parabolic)(u[i],
+                                                                                 equations_parabolic)
+    end
 end
 
 # TODO: reuse entropy projection computations for DGMultiFluxDiff{<:Polynomial} (including `GaussSBP` solvers)
 function calc_gradient_surface_integral!(gradients, u, scalar_flux_face_values,
                                          mesh, equations::AbstractEquationsParabolic,
                                          dg::DGMulti, cache, cache_parabolic)
-  (; gradient_lift_matrix, local_flux_face_values_threaded) = cache_parabolic
-  @threaded for e in eachelement(mesh, dg)
-    local_flux_values = local_flux_face_values_threaded[Threads.threadid()]
-    for dim in eachdim(mesh)
-      for i in eachindex(local_flux_values)
-        # compute flux * (nx, ny, nz)
-        local_flux_values[i] = scalar_flux_face_values[i, e] * mesh.md.nxyzJ[dim][i, e]
-      end
-      apply_to_each_field(mul_by_accum!(gradient_lift_matrix), view(gradients[dim], :, e), local_flux_values)
+    (; gradient_lift_matrix, local_flux_face_values_threaded) = cache_parabolic
+    @threaded for e in eachelement(mesh, dg)
+        local_flux_values = local_flux_face_values_threaded[Threads.threadid()]
+        for dim in eachdim(mesh)
+            for i in eachindex(local_flux_values)
+                # compute flux * (nx, ny, nz)
+                local_flux_values[i] = scalar_flux_face_values[i, e] *
+                                       mesh.md.nxyzJ[dim][i, e]
+            end
+            apply_to_each_field(mul_by_accum!(gradient_lift_matrix),
+                                view(gradients[dim], :, e), local_flux_values)
+        end
     end
-  end
 end
 
 function calc_gradient_volume_integral!(gradients, u, mesh::DGMultiMesh,
                                         equations::AbstractEquationsParabolic,
                                         dg::DGMulti, cache, cache_parabolic)
+    (; strong_differentiation_matrices) = cache_parabolic
 
-  (; strong_differentiation_matrices) = cache_parabolic
-
-  # compute volume contributions to gradients
-  @threaded for e in eachelement(mesh, dg)
-    for i in eachdim(mesh), j in eachdim(mesh)
+    # compute volume contributions to gradients
+    @threaded for e in eachelement(mesh, dg)
+        for i in eachdim(mesh), j in eachdim(mesh)
 
-      # We assume each element is affine (e.g., constant geometric terms) here.
-      dxidxhatj = mesh.md.rstxyzJ[i, j][1, e]
+            # We assume each element is affine (e.g., constant geometric terms) here.
+            dxidxhatj = mesh.md.rstxyzJ[i, j][1, e]
 
-      apply_to_each_field(mul_by_accum!(strong_differentiation_matrices[j], dxidxhatj),
-                          view(gradients[i], :, e), view(u, :, e))
+            apply_to_each_field(mul_by_accum!(strong_differentiation_matrices[j],
+                                              dxidxhatj),
+                                view(gradients[i], :, e), view(u, :, e))
+        end
     end
-  end
 end
 
 function calc_gradient_volume_integral!(gradients, u, mesh::DGMultiMesh{NDIMS, <:NonAffine},
                                         equations::AbstractEquationsParabolic,
                                         dg::DGMulti, cache, cache_parabolic) where {NDIMS}
+    (; strong_differentiation_matrices, dxidxhatj, local_flux_viscous_threaded) = cache_parabolic
 
-  (; strong_differentiation_matrices, dxidxhatj, local_flux_viscous_threaded) = cache_parabolic
+    # compute volume contributions to gradients
+    @threaded for e in eachelement(mesh, dg)
 
-  # compute volume contributions to gradients
-  @threaded for e in eachelement(mesh, dg)
-
-    # compute gradients with respect to reference coordinates
-    local_reference_gradients = local_flux_viscous_threaded[Threads.threadid()]
-    for i in eachdim(mesh)
-      apply_to_each_field(mul_by!(strong_differentiation_matrices[i]),
-                                  local_reference_gradients[i], view(u, :, e))
-    end
+        # compute gradients with respect to reference coordinates
+        local_reference_gradients = local_flux_viscous_threaded[Threads.threadid()]
+        for i in eachdim(mesh)
+            apply_to_each_field(mul_by!(strong_differentiation_matrices[i]),
+                                local_reference_gradients[i], view(u, :, e))
+        end
 
-    # rotate to physical frame on each element
-    for i in eachdim(mesh), j in eachdim(mesh)
-      for node in eachindex(local_reference_gradients[j])
-        gradients[i][node, e] = gradients[i][node, e] + dxidxhatj[i, j][node, e] * local_reference_gradients[j][node]
-      end
+        # rotate to physical frame on each element
+        for i in eachdim(mesh), j in eachdim(mesh)
+            for node in eachindex(local_reference_gradients[j])
+                gradients[i][node, e] = gradients[i][node, e] +
+                                        dxidxhatj[i, j][node, e] *
+                                        local_reference_gradients[j][node]
+            end
+        end
     end
-  end
 end
 
 function calc_gradient!(gradients, u::StructArray, t, mesh::DGMultiMesh,
                         equations::AbstractEquationsParabolic,
                         boundary_conditions, dg::DGMulti, cache, cache_parabolic)
+    for dim in eachindex(gradients)
+        reset_du!(gradients[dim], dg)
+    end
 
-  for dim in eachindex(gradients)
-    reset_du!(gradients[dim], dg)
-  end
-
-  calc_gradient_volume_integral!(gradients, u, mesh, equations, dg, cache, cache_parabolic)
-
-  (; u_face_values) = cache_parabolic
-  apply_to_each_field(mul_by!(dg.basis.Vf), u_face_values, u)
-
-  # compute fluxes at interfaces
-  (; scalar_flux_face_values) = cache_parabolic
-  (; mapM, mapP) = mesh.md
-  @threaded for face_node_index in each_face_node_global(mesh, dg)
-    idM, idP = mapM[face_node_index], mapP[face_node_index]
-    uM = u_face_values[idM]
-    uP = u_face_values[idP]
-    # Here, we use the "strong" formulation to compute the gradient. This guarantees that the parabolic
-    # formulation is symmetric and stable on curved meshes with variable geometric terms.
-    scalar_flux_face_values[idM] = 0.5 * (uP - uM)
-  end
-
-  calc_boundary_flux!(scalar_flux_face_values, u_face_values, t, Gradient(), boundary_conditions,
-                      mesh, equations, dg, cache, cache_parabolic)
+    calc_gradient_volume_integral!(gradients, u, mesh, equations, dg, cache,
+                                   cache_parabolic)
+
+    (; u_face_values) = cache_parabolic
+    apply_to_each_field(mul_by!(dg.basis.Vf), u_face_values, u)
+
+    # compute fluxes at interfaces
+    (; scalar_flux_face_values) = cache_parabolic
+    (; mapM, mapP) = mesh.md
+    @threaded for face_node_index in each_face_node_global(mesh, dg)
+        idM, idP = mapM[face_node_index], mapP[face_node_index]
+        uM = u_face_values[idM]
+        uP = u_face_values[idP]
+        # Here, we use the "strong" formulation to compute the gradient. This guarantees that the parabolic
+        # formulation is symmetric and stable on curved meshes with variable geometric terms.
+        scalar_flux_face_values[idM] = 0.5 * (uP - uM)
+    end
 
-  # compute surface contributions
-  calc_gradient_surface_integral!(gradients, u, scalar_flux_face_values,
-                                  mesh, equations, dg, cache, cache_parabolic)
+    calc_boundary_flux!(scalar_flux_face_values, u_face_values, t, Gradient(),
+                        boundary_conditions,
+                        mesh, equations, dg, cache, cache_parabolic)
 
-  invert_jacobian_gradient!(gradients, mesh, equations, dg, cache, cache_parabolic)
+    # compute surface contributions
+    calc_gradient_surface_integral!(gradients, u, scalar_flux_face_values,
+                                    mesh, equations, dg, cache, cache_parabolic)
 
+    invert_jacobian_gradient!(gradients, mesh, equations, dg, cache, cache_parabolic)
 end
 
 # affine mesh - constant Jacobian version
 function invert_jacobian_gradient!(gradients, mesh::DGMultiMesh, equations, dg::DGMulti,
                                    cache, cache_parabolic)
-  @threaded for e in eachelement(mesh, dg)
+    @threaded for e in eachelement(mesh, dg)
 
-    # Here, we exploit the fact that J is constant on affine elements,
-    # so we only have to access invJ once per element.
-    invJ = cache_parabolic.invJ[1, e]
+        # Here, we exploit the fact that J is constant on affine elements,
+        # so we only have to access invJ once per element.
+        invJ = cache_parabolic.invJ[1, e]
 
-    for dim in eachdim(mesh)
-      for i in axes(gradients[dim], 1)
-        gradients[dim][i, e] = gradients[dim][i, e] * invJ
-      end
+        for dim in eachdim(mesh)
+            for i in axes(gradients[dim], 1)
+                gradients[dim][i, e] = gradients[dim][i, e] * invJ
+            end
+        end
     end
-  end
 end
 
 # non-affine mesh - variable Jacobian version
-function invert_jacobian_gradient!(gradients, mesh::DGMultiMesh{NDIMS, <:NonAffine}, equations,
+function invert_jacobian_gradient!(gradients, mesh::DGMultiMesh{NDIMS, <:NonAffine},
+                                   equations,
                                    dg::DGMulti, cache, cache_parabolic) where {NDIMS}
-  (; invJ) = cache_parabolic
-  @threaded for e in eachelement(mesh, dg)
-    for dim in eachdim(mesh)
-      for i in axes(gradients[dim], 1)
-        gradients[dim][i, e] = gradients[dim][i, e] * invJ[i, e]
-      end
+    (; invJ) = cache_parabolic
+    @threaded for e in eachelement(mesh, dg)
+        for dim in eachdim(mesh)
+            for i in axes(gradients[dim], 1)
+                gradients[dim][i, e] = gradients[dim][i, e] * invJ[i, e]
+            end
+        end
     end
-  end
 end
 
 # do nothing for periodic domains
 function calc_boundary_flux!(flux, u, t, operator_type, ::BoundaryConditionPeriodic,
                              mesh, equations::AbstractEquationsParabolic, dg::DGMulti,
                              cache, cache_parabolic)
-  return nothing
+    return nothing
 end
 
 # "lispy tuple programming" instead of for loop for type stability
 function calc_boundary_flux!(flux, u, t, operator_type, boundary_conditions,
                              mesh, equations, dg::DGMulti, cache, cache_parabolic)
 
-  # peel off first boundary condition
-  calc_single_boundary_flux!(flux, u, t, operator_type, first(boundary_conditions), first(keys(boundary_conditions)),
-                             mesh, equations, dg, cache, cache_parabolic)
+    # peel off first boundary condition
+    calc_single_boundary_flux!(flux, u, t, operator_type, first(boundary_conditions),
+                               first(keys(boundary_conditions)),
+                               mesh, equations, dg, cache, cache_parabolic)
 
-  # recurse on the remainder of the boundary conditions
-  calc_boundary_flux!(flux, u, t, operator_type, Base.tail(boundary_conditions),
-                      mesh, equations, dg, cache, cache_parabolic)
+    # recurse on the remainder of the boundary conditions
+    calc_boundary_flux!(flux, u, t, operator_type, Base.tail(boundary_conditions),
+                        mesh, equations, dg, cache, cache_parabolic)
 end
 
 # terminate recursion
-calc_boundary_flux!(flux, u, t, operator_type, boundary_conditions::NamedTuple{(),Tuple{}},
-                    mesh, equations, dg::DGMulti, cache, cache_parabolic) = nothing
+function calc_boundary_flux!(flux, u, t, operator_type,
+                             boundary_conditions::NamedTuple{(), Tuple{}},
+                             mesh, equations, dg::DGMulti, cache, cache_parabolic)
+    nothing
+end
 
 function calc_single_boundary_flux!(flux_face_values, u_face_values, t,
                                     operator_type, boundary_condition, boundary_key,
-                                    mesh, equations, dg::DGMulti{NDIMS}, cache, cache_parabolic) where {NDIMS}
-  rd = dg.basis
-  md = mesh.md
-
-  num_faces = StartUpDG.num_faces(rd.element_type)
-  num_pts_per_face = rd.Nfq ÷ num_faces
-  (; xyzf, nxyz) = md
-  for f in mesh.boundary_faces[boundary_key]
-    for i in Base.OneTo(num_pts_per_face)
-
-      # reverse engineer element + face node indices (avoids reshaping arrays)
-      e = ((f-1) ÷ num_faces) + 1
-      fid = i + ((f-1) % num_faces) * num_pts_per_face
-
-      face_normal = SVector{NDIMS}(getindex.(nxyz, fid, e))
-      face_coordinates = SVector{NDIMS}(getindex.(xyzf, fid, e))
-
-      # for both the gradient and the divergence, the boundary flux is scalar valued.
-      # for the gradient, it is the solution; for divergence, it is the normal flux.
-      flux_face_values[fid,e] = boundary_condition(flux_face_values[fid,e], u_face_values[fid,e],
-                                                   face_normal, face_coordinates, t,
-                                                   operator_type, equations)
-
-      # Here, we use the "strong form" for the Gradient (and the "weak form" for Divergence).
-      # `flux_face_values` should contain the boundary values for `u`, and we
-      # subtract off `u_face_values[fid, e]` because we are using the strong formulation to
-      # compute the gradient.
-      if operator_type isa Gradient
-        flux_face_values[fid, e] = flux_face_values[fid, e] - u_face_values[fid, e]
-      end
-
+                                    mesh, equations, dg::DGMulti{NDIMS}, cache,
+                                    cache_parabolic) where {NDIMS}
+    rd = dg.basis
+    md = mesh.md
+
+    num_faces = StartUpDG.num_faces(rd.element_type)
+    num_pts_per_face = rd.Nfq ÷ num_faces
+    (; xyzf, nxyz) = md
+    for f in mesh.boundary_faces[boundary_key]
+        for i in Base.OneTo(num_pts_per_face)
+
+            # reverse engineer element + face node indices (avoids reshaping arrays)
+            e = ((f - 1) ÷ num_faces) + 1
+            fid = i + ((f - 1) % num_faces) * num_pts_per_face
+
+            face_normal = SVector{NDIMS}(getindex.(nxyz, fid, e))
+            face_coordinates = SVector{NDIMS}(getindex.(xyzf, fid, e))
+
+            # for both the gradient and the divergence, the boundary flux is scalar valued.
+            # for the gradient, it is the solution; for divergence, it is the normal flux.
+            flux_face_values[fid, e] = boundary_condition(flux_face_values[fid, e],
+                                                          u_face_values[fid, e],
+                                                          face_normal, face_coordinates, t,
+                                                          operator_type, equations)
+
+            # Here, we use the "strong form" for the Gradient (and the "weak form" for Divergence).
+            # `flux_face_values` should contain the boundary values for `u`, and we
+            # subtract off `u_face_values[fid, e]` because we are using the strong formulation to
+            # compute the gradient.
+            if operator_type isa Gradient
+                flux_face_values[fid, e] = flux_face_values[fid, e] - u_face_values[fid, e]
+            end
+        end
     end
-  end
-  return nothing
+    return nothing
 end
 
 function calc_viscous_fluxes!(flux_viscous, u, gradients, mesh::DGMultiMesh,
                               equations::AbstractEquationsParabolic,
                               dg::DGMulti, cache, cache_parabolic)
+    for dim in eachdim(mesh)
+        reset_du!(flux_viscous[dim], dg)
+    end
 
-  for dim in eachdim(mesh)
-    reset_du!(flux_viscous[dim], dg)
-  end
-
-  (; local_u_values_threaded) = cache_parabolic
+    (; local_u_values_threaded) = cache_parabolic
 
-  @threaded for e in eachelement(mesh, dg)
+    @threaded for e in eachelement(mesh, dg)
 
-    # reset local storage for each element, interpolate u to quadrature points
-    # TODO: DGMulti. Specialize for nodal collocation methods (SBP, GaussSBP)?
-    local_u_values = local_u_values_threaded[Threads.threadid()]
-    fill!(local_u_values, zero(eltype(local_u_values)))
-    apply_to_each_field(mul_by!(dg.basis.Vq), local_u_values, view(u, :, e))
+        # reset local storage for each element, interpolate u to quadrature points
+        # TODO: DGMulti. Specialize for nodal collocation methods (SBP, GaussSBP)?
+        local_u_values = local_u_values_threaded[Threads.threadid()]
+        fill!(local_u_values, zero(eltype(local_u_values)))
+        apply_to_each_field(mul_by!(dg.basis.Vq), local_u_values, view(u, :, e))
 
-    # compute viscous flux at quad points
-    for i in eachindex(local_u_values)
-      u_i = local_u_values[i]
-      gradients_i = getindex.(gradients, i, e)
-      for dim in eachdim(mesh)
-        flux_viscous_i = flux(u_i, gradients_i, dim, equations)
-        setindex!(flux_viscous[dim], flux_viscous_i, i, e)
-      end
+        # compute viscous flux at quad points
+        for i in eachindex(local_u_values)
+            u_i = local_u_values[i]
+            gradients_i = getindex.(gradients, i, e)
+            for dim in eachdim(mesh)
+                flux_viscous_i = flux(u_i, gradients_i, dim, equations)
+                setindex!(flux_viscous[dim], flux_viscous_i, i, e)
+            end
+        end
     end
-
-  end
 end
 
 # no penalization for a BR1 parabolic solver
-function calc_viscous_penalty!(scalar_flux_face_values, u_face_values, t, boundary_conditions,
+function calc_viscous_penalty!(scalar_flux_face_values, u_face_values, t,
+                               boundary_conditions,
                                mesh, equations::AbstractEquationsParabolic, dg::DGMulti,
-                               parabolic_scheme::ViscousFormulationBassiRebay1, cache, cache_parabolic)
-  return nothing
+                               parabolic_scheme::ViscousFormulationBassiRebay1, cache,
+                               cache_parabolic)
+    return nothing
 end
 
-function calc_viscous_penalty!(scalar_flux_face_values, u_face_values, t, boundary_conditions,
+function calc_viscous_penalty!(scalar_flux_face_values, u_face_values, t,
+                               boundary_conditions,
                                mesh, equations::AbstractEquationsParabolic, dg::DGMulti,
                                parabolic_scheme, cache, cache_parabolic)
-  # compute fluxes at interfaces
-  (; scalar_flux_face_values) = cache_parabolic
-  (; mapM, mapP) = mesh.md
-  @threaded for face_node_index in each_face_node_global(mesh, dg)
-    idM, idP = mapM[face_node_index], mapP[face_node_index]
-    uM, uP = u_face_values[idM], u_face_values[idP]
-    scalar_flux_face_values[idM] = scalar_flux_face_values[idM] + penalty(uP, uM, equations, parabolic_scheme)
-  end
-  return nothing
+    # compute fluxes at interfaces
+    (; scalar_flux_face_values) = cache_parabolic
+    (; mapM, mapP) = mesh.md
+    @threaded for face_node_index in each_face_node_global(mesh, dg)
+        idM, idP = mapM[face_node_index], mapP[face_node_index]
+        uM, uP = u_face_values[idM], u_face_values[idP]
+        scalar_flux_face_values[idM] = scalar_flux_face_values[idM] +
+                                       penalty(uP, uM, equations, parabolic_scheme)
+    end
+    return nothing
 end
 
 function calc_divergence_volume_integral!(du, u, flux_viscous, mesh::DGMultiMesh,
                                           equations::AbstractEquationsParabolic,
                                           dg::DGMulti, cache, cache_parabolic)
-  (; weak_differentiation_matrices) = cache_parabolic
-
-  # compute volume contributions to divergence
-  @threaded for e in eachelement(mesh, dg)
-    for i in eachdim(mesh), j in eachdim(mesh)
-      dxidxhatj = mesh.md.rstxyzJ[i, j][1, e] # assumes mesh is affine
-      apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[j], dxidxhatj),
-                          view(du, :, e), view(flux_viscous[i], :, e))
+    (; weak_differentiation_matrices) = cache_parabolic
+
+    # compute volume contributions to divergence
+    @threaded for e in eachelement(mesh, dg)
+        for i in eachdim(mesh), j in eachdim(mesh)
+            dxidxhatj = mesh.md.rstxyzJ[i, j][1, e] # assumes mesh is affine
+            apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[j], dxidxhatj),
+                                view(du, :, e), view(flux_viscous[i], :, e))
+        end
     end
-  end
 end
 
-function calc_divergence_volume_integral!(du, u, flux_viscous, mesh::DGMultiMesh{NDIMS, <:NonAffine},
+function calc_divergence_volume_integral!(du, u, flux_viscous,
+                                          mesh::DGMultiMesh{NDIMS, <:NonAffine},
                                           equations::AbstractEquationsParabolic,
                                           dg::DGMulti, cache, cache_parabolic) where {NDIMS}
-  (; weak_differentiation_matrices, dxidxhatj, local_flux_viscous_threaded) = cache_parabolic
-
-  # compute volume contributions to divergence
-  @threaded for e in eachelement(mesh, dg)
-
-    local_viscous_flux = local_flux_viscous_threaded[Threads.threadid()][1]
-    for i in eachdim(mesh)
-      # rotate flux to reference coordinates
-      fill!(local_viscous_flux, zero(eltype(local_viscous_flux)))
-      for j in eachdim(mesh)
-        for node in eachindex(local_viscous_flux)
-          local_viscous_flux[node] = local_viscous_flux[node] + dxidxhatj[j, i][node, e] * flux_viscous[j][node, e]
+    (; weak_differentiation_matrices, dxidxhatj, local_flux_viscous_threaded) = cache_parabolic
+
+    # compute volume contributions to divergence
+    @threaded for e in eachelement(mesh, dg)
+        local_viscous_flux = local_flux_viscous_threaded[Threads.threadid()][1]
+        for i in eachdim(mesh)
+            # rotate flux to reference coordinates
+            fill!(local_viscous_flux, zero(eltype(local_viscous_flux)))
+            for j in eachdim(mesh)
+                for node in eachindex(local_viscous_flux)
+                    local_viscous_flux[node] = local_viscous_flux[node] +
+                                               dxidxhatj[j, i][node, e] *
+                                               flux_viscous[j][node, e]
+                end
+            end
+
+            # differentiate with respect to reference coordinates
+            apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[i]),
+                                view(du, :, e), local_viscous_flux)
         end
-      end
-
-      # differentiate with respect to reference coordinates
-      apply_to_each_field(mul_by_accum!(weak_differentiation_matrices[i]),
-                          view(du, :, e), local_viscous_flux)
     end
-  end
 end
 
 function calc_divergence!(du, u::StructArray, t, flux_viscous, mesh::DGMultiMesh,
                           equations::AbstractEquationsParabolic,
-                          boundary_conditions, dg::DGMulti, parabolic_scheme, cache, cache_parabolic)
+                          boundary_conditions, dg::DGMulti, parabolic_scheme, cache,
+                          cache_parabolic)
+    reset_du!(du, dg)
 
-  reset_du!(du, dg)
+    calc_divergence_volume_integral!(du, u, flux_viscous, mesh, equations, dg, cache,
+                                     cache_parabolic)
 
-  calc_divergence_volume_integral!(du, u, flux_viscous, mesh, equations, dg, cache, cache_parabolic)
-
-  # interpolates from solution coefficients to face quadrature points
-  (; projection_face_interpolation_matrix) = cache_parabolic
-  flux_viscous_face_values = cache_parabolic.gradients_face_values # reuse storage
-  for dim in eachdim(mesh)
-    apply_to_each_field(mul_by!(projection_face_interpolation_matrix), flux_viscous_face_values[dim], flux_viscous[dim])
-  end
-
-  # compute fluxes at interfaces
-  (; scalar_flux_face_values) = cache_parabolic
-  (; mapM, mapP, nxyzJ) = mesh.md
-
-  @threaded for face_node_index in each_face_node_global(mesh, dg, cache, cache_parabolic)
-    idM, idP = mapM[face_node_index], mapP[face_node_index]
-
-    # compute f(u, ∇u) ⋅ n
-    flux_face_value = zero(eltype(scalar_flux_face_values))
+    # interpolates from solution coefficients to face quadrature points
+    (; projection_face_interpolation_matrix) = cache_parabolic
+    flux_viscous_face_values = cache_parabolic.gradients_face_values # reuse storage
     for dim in eachdim(mesh)
-      fM = flux_viscous_face_values[dim][idM]
-      fP = flux_viscous_face_values[dim][idP]
-      # Here, we use the "weak" formulation to compute the divergence (to ensure stability on curved meshes).
-      flux_face_value = flux_face_value + 0.5 * (fP + fM) * nxyzJ[dim][face_node_index]
+        apply_to_each_field(mul_by!(projection_face_interpolation_matrix),
+                            flux_viscous_face_values[dim], flux_viscous[dim])
+    end
+
+    # compute fluxes at interfaces
+    (; scalar_flux_face_values) = cache_parabolic
+    (; mapM, mapP, nxyzJ) = mesh.md
+
+    @threaded for face_node_index in each_face_node_global(mesh, dg, cache, cache_parabolic)
+        idM, idP = mapM[face_node_index], mapP[face_node_index]
+
+        # compute f(u, ∇u) ⋅ n
+        flux_face_value = zero(eltype(scalar_flux_face_values))
+        for dim in eachdim(mesh)
+            fM = flux_viscous_face_values[dim][idM]
+            fP = flux_viscous_face_values[dim][idP]
+            # Here, we use the "weak" formulation to compute the divergence (to ensure stability on curved meshes).
+            flux_face_value = flux_face_value +
+                              0.5 * (fP + fM) * nxyzJ[dim][face_node_index]
+        end
+        scalar_flux_face_values[idM] = flux_face_value
     end
-    scalar_flux_face_values[idM] = flux_face_value
-  end
 
-  calc_boundary_flux!(scalar_flux_face_values, cache_parabolic.u_face_values, t, Divergence(),
-                      boundary_conditions, mesh, equations, dg, cache, cache_parabolic)
+    calc_boundary_flux!(scalar_flux_face_values, cache_parabolic.u_face_values, t,
+                        Divergence(),
+                        boundary_conditions, mesh, equations, dg, cache, cache_parabolic)
 
-  calc_viscous_penalty!(scalar_flux_face_values, cache_parabolic.u_face_values, t,
-                        boundary_conditions, mesh, equations, dg, parabolic_scheme,
-                        cache, cache_parabolic)
+    calc_viscous_penalty!(scalar_flux_face_values, cache_parabolic.u_face_values, t,
+                          boundary_conditions, mesh, equations, dg, parabolic_scheme,
+                          cache, cache_parabolic)
 
-  # surface contributions
-  apply_to_each_field(mul_by_accum!(cache_parabolic.divergence_lift_matrix), du, scalar_flux_face_values)
+    # surface contributions
+    apply_to_each_field(mul_by_accum!(cache_parabolic.divergence_lift_matrix), du,
+                        scalar_flux_face_values)
 
-  # Note: we do not flip the sign of the geometric Jacobian here.
-  # This is because the parabolic fluxes are assumed to be of the form
-  #   `du/dt + df/dx = dg/dx + source(x,t)`,
-  # where f(u) is the inviscid flux and g(u) is the viscous flux.
-  invert_jacobian!(du, mesh, equations, dg, cache; scaling=1.0)
+    # Note: we do not flip the sign of the geometric Jacobian here.
+    # This is because the parabolic fluxes are assumed to be of the form
+    #   `du/dt + df/dx = dg/dx + source(x,t)`,
+    # where f(u) is the inviscid flux and g(u) is the viscous flux.
+    invert_jacobian!(du, mesh, equations, dg, cache; scaling = 1.0)
 end
 
 # assumptions: parabolic terms are of the form div(f(u, grad(u))) and
@@ -393,32 +423,31 @@ end
 #               2. compute f(u, grad(u))
 #               3. compute div(u)
 # boundary conditions will be applied to both grad(u) and div(u).
-function rhs_parabolic!(du, u, t, mesh::DGMultiMesh, equations_parabolic::AbstractEquationsParabolic,
+function rhs_parabolic!(du, u, t, mesh::DGMultiMesh,
+                        equations_parabolic::AbstractEquationsParabolic,
                         initial_condition, boundary_conditions, source_terms,
                         dg::DGMulti, parabolic_scheme, cache, cache_parabolic)
+    reset_du!(du, dg)
 
-  reset_du!(du, dg)
-
-  @trixi_timeit timer() "transform variables" begin
-    (; u_transformed, gradients, flux_viscous) = cache_parabolic
-    transform_variables!(u_transformed, u, mesh, equations_parabolic,
-                        dg, parabolic_scheme, cache, cache_parabolic)
-  end
-
-  @trixi_timeit timer() "calc gradient" begin
-    calc_gradient!(gradients, u_transformed, t, mesh, equations_parabolic,
-                  boundary_conditions, dg, cache, cache_parabolic)
-  end
+    @trixi_timeit timer() "transform variables" begin
+        (; u_transformed, gradients, flux_viscous) = cache_parabolic
+        transform_variables!(u_transformed, u, mesh, equations_parabolic,
+                             dg, parabolic_scheme, cache, cache_parabolic)
+    end
 
-  @trixi_timeit timer() "calc viscous fluxes" begin
-    calc_viscous_fluxes!(flux_viscous, u_transformed, gradients,
-                        mesh, equations_parabolic, dg, cache, cache_parabolic)
-  end
+    @trixi_timeit timer() "calc gradient" begin
+        calc_gradient!(gradients, u_transformed, t, mesh, equations_parabolic,
+                       boundary_conditions, dg, cache, cache_parabolic)
+    end
 
-  @trixi_timeit timer() "calc divergence" begin
-    calc_divergence!(du, u_transformed, t, flux_viscous, mesh, equations_parabolic,
-                    boundary_conditions, dg, parabolic_scheme, cache, cache_parabolic)
-  end
-  return nothing
+    @trixi_timeit timer() "calc viscous fluxes" begin
+        calc_viscous_fluxes!(flux_viscous, u_transformed, gradients,
+                             mesh, equations_parabolic, dg, cache, cache_parabolic)
+    end
 
+    @trixi_timeit timer() "calc divergence" begin
+        calc_divergence!(du, u_transformed, t, flux_viscous, mesh, equations_parabolic,
+                         boundary_conditions, dg, parabolic_scheme, cache, cache_parabolic)
+    end
+    return nothing
 end
diff --git a/src/solvers/dgmulti/flux_differencing.jl b/src/solvers/dgmulti/flux_differencing.jl
index 1031c837efa..884a8fac43b 100644
--- a/src/solvers/dgmulti/flux_differencing.jl
+++ b/src/solvers/dgmulti/flux_differencing.jl
@@ -3,6 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 #   hadamard_sum!(du, A,
 #                 flux_is_symmetric, volume_flux,
@@ -24,158 +25,168 @@
 @inline function hadamard_sum!(du, A,
                                flux_is_symmetric::True, volume_flux,
                                orientation_or_normal_direction, u, equations)
-  row_ids, col_ids = axes(A)
-
-  for i in row_ids
-    u_i = u[i]
-    du_i = du[i]
-    for j in col_ids
-      # This routine computes only the upper-triangular part of the hadamard sum (A .* F).
-      # We avoid computing the lower-triangular part, and instead accumulate those contributions
-      # while computing the upper-triangular part (using the fact that A is skew-symmetric and F
-      # is symmetric).
-      if j > i
-        u_j = u[j]
-        AF_ij = 2 * A[i,j] * volume_flux(u_i, u_j, orientation_or_normal_direction, equations)
-        du_i = du_i + AF_ij
-        du[j] = du[j] - AF_ij
-      end
+    row_ids, col_ids = axes(A)
+
+    for i in row_ids
+        u_i = u[i]
+        du_i = du[i]
+        for j in col_ids
+            # This routine computes only the upper-triangular part of the hadamard sum (A .* F).
+            # We avoid computing the lower-triangular part, and instead accumulate those contributions
+            # while computing the upper-triangular part (using the fact that A is skew-symmetric and F
+            # is symmetric).
+            if j > i
+                u_j = u[j]
+                AF_ij = 2 * A[i, j] *
+                        volume_flux(u_i, u_j, orientation_or_normal_direction,
+                                    equations)
+                du_i = du_i + AF_ij
+                du[j] = du[j] - AF_ij
+            end
+        end
+        du[i] = du_i
     end
-    du[i] = du_i
-  end
 end
 
 # Version for dense operators and non-symmetric fluxes
 @inline function hadamard_sum!(du, A,
                                flux_is_symmetric::False, volume_flux,
                                orientation::Integer, u, equations)
-  row_ids, col_ids = axes(A)
-
-  for i in row_ids
-    u_i = u[i]
-    du_i = du[i]
-    for j in col_ids
-      u_j = u[j]
-      f_ij = volume_flux(u_i, u_j, orientation, equations)
-      du_i = du_i + 2 * A[i,j] * f_ij
+    row_ids, col_ids = axes(A)
+
+    for i in row_ids
+        u_i = u[i]
+        du_i = du[i]
+        for j in col_ids
+            u_j = u[j]
+            f_ij = volume_flux(u_i, u_j, orientation, equations)
+            du_i = du_i + 2 * A[i, j] * f_ij
+        end
+        du[i] = du_i
     end
-    du[i] = du_i
-  end
 end
 
 @inline function hadamard_sum!(du, A,
                                flux_is_symmetric::False, volume_flux,
                                normal_direction::AbstractVector, u, equations)
-  row_ids, col_ids = axes(A)
-
-  for i in row_ids
-    u_i = u[i]
-    du_i = du[i]
-    for j in col_ids
-      u_j = u[j]
-      # The `normal_direction::AbstractVector` has to be passed in twice.
-      # This is because on curved meshes, nonconservative fluxes are
-      # evaluated using both the normal and its average at interfaces.
-      f_ij = volume_flux(u_i, u_j, normal_direction, normal_direction, equations)
-      du_i = du_i + 2 * A[i,j] * f_ij
+    row_ids, col_ids = axes(A)
+
+    for i in row_ids
+        u_i = u[i]
+        du_i = du[i]
+        for j in col_ids
+            u_j = u[j]
+            # The `normal_direction::AbstractVector` has to be passed in twice.
+            # This is because on curved meshes, nonconservative fluxes are
+            # evaluated using both the normal and its average at interfaces.
+            f_ij = volume_flux(u_i, u_j, normal_direction, normal_direction, equations)
+            du_i = du_i + 2 * A[i, j] * f_ij
+        end
+        du[i] = du_i
     end
-    du[i] = du_i
-  end
 end
 
 # Version for sparse operators and symmetric fluxes
-@inline function hadamard_sum!(du, A::LinearAlgebra.Adjoint{<:Any, <:AbstractSparseMatrixCSC},
+@inline function hadamard_sum!(du,
+                               A::LinearAlgebra.Adjoint{<:Any, <:AbstractSparseMatrixCSC
+                                                        },
                                flux_is_symmetric::True, volume_flux,
                                orientation_or_normal_direction, u, equations)
-  A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
-  row_ids = axes(A, 2)
-  rows = rowvals(A_base)
-  vals = nonzeros(A_base)
-
-  for i in row_ids
-    u_i = u[i]
-    du_i = du[i]
-    for id in nzrange(A_base, i)
-      j = rows[id]
-      # This routine computes only the upper-triangular part of the hadamard sum (A .* F).
-      # We avoid computing the lower-triangular part, and instead accumulate those contributions
-      # while computing the upper-triangular part (using the fact that A is skew-symmetric and F
-      # is symmetric).
-      if j > i
-        u_j = u[j]
-        A_ij = vals[id]
-        AF_ij = 2 * A_ij * volume_flux(u_i, u_j, orientation_or_normal_direction, equations)
-        du_i = du_i + AF_ij
-        du[j] = du[j] - AF_ij
-      end
+    A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
+    row_ids = axes(A, 2)
+    rows = rowvals(A_base)
+    vals = nonzeros(A_base)
+
+    for i in row_ids
+        u_i = u[i]
+        du_i = du[i]
+        for id in nzrange(A_base, i)
+            j = rows[id]
+            # This routine computes only the upper-triangular part of the hadamard sum (A .* F).
+            # We avoid computing the lower-triangular part, and instead accumulate those contributions
+            # while computing the upper-triangular part (using the fact that A is skew-symmetric and F
+            # is symmetric).
+            if j > i
+                u_j = u[j]
+                A_ij = vals[id]
+                AF_ij = 2 * A_ij *
+                        volume_flux(u_i, u_j, orientation_or_normal_direction,
+                                    equations)
+                du_i = du_i + AF_ij
+                du[j] = du[j] - AF_ij
+            end
+        end
+        du[i] = du_i
     end
-    du[i] = du_i
-  end
 end
 
 # Version for sparse operators and symmetric fluxes with curved meshes
-@inline function hadamard_sum!(du, A::LinearAlgebra.Adjoint{<:Any, <:AbstractSparseMatrixCSC},
+@inline function hadamard_sum!(du,
+                               A::LinearAlgebra.Adjoint{<:Any, <:AbstractSparseMatrixCSC
+                                                        },
                                flux_is_symmetric::True, volume_flux,
                                normal_directions::AbstractVector{<:AbstractVector},
                                u, equations)
-  A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
-  row_ids = axes(A, 2)
-  rows = rowvals(A_base)
-  vals = nonzeros(A_base)
-
-  for i in row_ids
-    u_i = u[i]
-    du_i = du[i]
-    for id in nzrange(A_base, i)
-      j = rows[id]
-      # This routine computes only the upper-triangular part of the hadamard sum (A .* F).
-      # We avoid computing the lower-triangular part, and instead accumulate those contributions
-      # while computing the upper-triangular part (using the fact that A is skew-symmetric and F
-      # is symmetric).
-      if j > i
-        u_j = u[j]
-        A_ij = vals[id]
-
-        # provably entropy stable de-aliasing of geometric terms
-        normal_direction = 0.5 * (getindex.(normal_directions, i) + getindex.(normal_directions, j))
-
-        AF_ij = 2 * A_ij * volume_flux(u_i, u_j, normal_direction, equations)
-        du_i = du_i + AF_ij
-        du[j] = du[j] - AF_ij
-      end
+    A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
+    row_ids = axes(A, 2)
+    rows = rowvals(A_base)
+    vals = nonzeros(A_base)
+
+    for i in row_ids
+        u_i = u[i]
+        du_i = du[i]
+        for id in nzrange(A_base, i)
+            j = rows[id]
+            # This routine computes only the upper-triangular part of the hadamard sum (A .* F).
+            # We avoid computing the lower-triangular part, and instead accumulate those contributions
+            # while computing the upper-triangular part (using the fact that A is skew-symmetric and F
+            # is symmetric).
+            if j > i
+                u_j = u[j]
+                A_ij = vals[id]
+
+                # provably entropy stable de-aliasing of geometric terms
+                normal_direction = 0.5 * (getindex.(normal_directions, i) +
+                                    getindex.(normal_directions, j))
+
+                AF_ij = 2 * A_ij * volume_flux(u_i, u_j, normal_direction, equations)
+                du_i = du_i + AF_ij
+                du[j] = du[j] - AF_ij
+            end
+        end
+        du[i] = du_i
     end
-    du[i] = du_i
-  end
 end
 
 # TODO: DGMulti. Fix for curved meshes.
 # Version for sparse operators and non-symmetric fluxes
-@inline function hadamard_sum!(du, A::LinearAlgebra.Adjoint{<:Any, <:AbstractSparseMatrixCSC},
+@inline function hadamard_sum!(du,
+                               A::LinearAlgebra.Adjoint{<:Any, <:AbstractSparseMatrixCSC
+                                                        },
                                flux_is_symmetric::False, volume_flux,
                                normal_direction::AbstractVector, u, equations)
-  A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
-  row_ids = axes(A, 2)
-  rows = rowvals(A_base)
-  vals = nonzeros(A_base)
-
-  for i in row_ids
-    u_i = u[i]
-    du_i = du[i]
-    for id in nzrange(A_base, i)
-      A_ij = vals[id]
-      j = rows[id]
-      # The `normal_direction::AbstractVector` has to be passed in twice.
-      # This is because on curved meshes, nonconservative fluxes are
-      # evaluated using both the normal and its average at interfaces.
-      u_j = u[j]
-      f_ij = volume_flux(u_i, u_j, normal_direction, normal_direction, equations)
-      du_i = du_i + 2 * A_ij * f_ij
+    A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
+    row_ids = axes(A, 2)
+    rows = rowvals(A_base)
+    vals = nonzeros(A_base)
+
+    for i in row_ids
+        u_i = u[i]
+        du_i = du[i]
+        for id in nzrange(A_base, i)
+            A_ij = vals[id]
+            j = rows[id]
+            # The `normal_direction::AbstractVector` has to be passed in twice.
+            # This is because on curved meshes, nonconservative fluxes are
+            # evaluated using both the normal and its average at interfaces.
+            u_j = u[j]
+            f_ij = volume_flux(u_i, u_j, normal_direction, normal_direction, equations)
+            du_i = du_i + 2 * A_ij * f_ij
+        end
+        du[i] = du_i
     end
-    du[i] = du_i
-  end
 end
 
-
 # For DGMulti implementations, we construct "physical" differentiation operators by taking linear
 # combinations of reference differentiation operators scaled by geometric change of variables terms.
 # We use a lazy evaluation of physical differentiation operators, so that we can compute linear
@@ -183,36 +194,49 @@ end
 @inline function build_lazy_physical_derivative(element, orientation,
                                                 mesh::DGMultiMesh{1}, dg, cache,
                                                 operator_scaling = 1.0)
-  @unpack Qrst_skew = cache
-  @unpack rxJ = mesh.md
-  # ignore orientation
-  return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (rxJ[1,element],))
+    @unpack Qrst_skew = cache
+    @unpack rxJ = mesh.md
+    # ignore orientation
+    return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (rxJ[1, element],))
 end
 
 @inline function build_lazy_physical_derivative(element, orientation,
                                                 mesh::DGMultiMesh{2}, dg, cache,
                                                 operator_scaling = 1.0)
-  @unpack Qrst_skew = cache
-  @unpack rxJ, sxJ, ryJ, syJ = mesh.md
-  if orientation == 1
-    return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (rxJ[1,element], sxJ[1,element]))
-  else # if orientation == 2
-    return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (ryJ[1,element], syJ[1,element]))
-  end
+    @unpack Qrst_skew = cache
+    @unpack rxJ, sxJ, ryJ, syJ = mesh.md
+    if orientation == 1
+        return LazyMatrixLinearCombo(Qrst_skew,
+                                     operator_scaling .*
+                                     (rxJ[1, element], sxJ[1, element]))
+    else # if orientation == 2
+        return LazyMatrixLinearCombo(Qrst_skew,
+                                     operator_scaling .*
+                                     (ryJ[1, element], syJ[1, element]))
+    end
 end
 
 @inline function build_lazy_physical_derivative(element, orientation,
                                                 mesh::DGMultiMesh{3}, dg, cache,
                                                 operator_scaling = 1.0)
-  @unpack Qrst_skew = cache
-  @unpack rxJ, sxJ, txJ, ryJ, syJ, tyJ, rzJ, szJ, tzJ = mesh.md
-  if orientation == 1
-    return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (rxJ[1, element], sxJ[1, element], txJ[1, element]))
-  elseif orientation == 2
-    return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (ryJ[1, element], syJ[1, element], tyJ[1, element]))
-  else # if orientation == 3
-    return LazyMatrixLinearCombo(Qrst_skew, operator_scaling .* (rzJ[1, element], szJ[1, element], tzJ[1, element]))
-  end
+    @unpack Qrst_skew = cache
+    @unpack rxJ, sxJ, txJ, ryJ, syJ, tyJ, rzJ, szJ, tzJ = mesh.md
+    if orientation == 1
+        return LazyMatrixLinearCombo(Qrst_skew,
+                                     operator_scaling .*
+                                     (rxJ[1, element], sxJ[1, element],
+                                      txJ[1, element]))
+    elseif orientation == 2
+        return LazyMatrixLinearCombo(Qrst_skew,
+                                     operator_scaling .*
+                                     (ryJ[1, element], syJ[1, element],
+                                      tyJ[1, element]))
+    else # if orientation == 3
+        return LazyMatrixLinearCombo(Qrst_skew,
+                                     operator_scaling .*
+                                     (rzJ[1, element], szJ[1, element],
+                                      tzJ[1, element]))
+    end
 end
 
 # Return the contravariant basis vector corresponding to the Cartesian
@@ -222,79 +246,84 @@ end
 # and jth reference coordinate, respectively. These are geometric terms which
 # appear when using the chain rule to compute physical derivatives as a linear
 # combination of reference derivatives.
-@inline function get_contravariant_vector(element, orientation, mesh::DGMultiMesh{NDIMS}, cache) where {NDIMS}
-  # note that rstxyzJ = [rxJ, sxJ, txJ; ryJ syJ tyJ; rzJ szJ tzJ], so that this will return
-  # SVector{2}(rxJ[1, element], ryJ[1, element]) in 2D.
-
-  # assumes geometric terms are constant on each element
-  dxidxhatj = mesh.md.rstxyzJ
-  return SVector{NDIMS}(getindex.(dxidxhatj[:, orientation], 1, element))
+@inline function get_contravariant_vector(element, orientation,
+                                          mesh::DGMultiMesh{NDIMS}, cache) where {NDIMS}
+    # note that rstxyzJ = [rxJ, sxJ, txJ; ryJ syJ tyJ; rzJ szJ tzJ], so that this will return
+    # SVector{2}(rxJ[1, element], ryJ[1, element]) in 2D.
+
+    # assumes geometric terms are constant on each element
+    dxidxhatj = mesh.md.rstxyzJ
+    return SVector{NDIMS}(getindex.(dxidxhatj[:, orientation], 1, element))
 end
 
-@inline function get_contravariant_vector(element, orientation, mesh::DGMultiMesh{NDIMS, NonAffine}, cache) where {NDIMS}
-  # note that rstxyzJ = [rxJ, sxJ, txJ; ryJ syJ tyJ; rzJ szJ tzJ]
+@inline function get_contravariant_vector(element, orientation,
+                                          mesh::DGMultiMesh{NDIMS, NonAffine},
+                                          cache) where {NDIMS}
+    # note that rstxyzJ = [rxJ, sxJ, txJ; ryJ syJ tyJ; rzJ szJ tzJ]
 
-  # assumes geometric terms vary spatially over each element
-  (; dxidxhatj) = cache
-  return SVector{NDIMS}(view.(dxidxhatj[:, orientation], :, element))
+    # assumes geometric terms vary spatially over each element
+    (; dxidxhatj) = cache
+    return SVector{NDIMS}(view.(dxidxhatj[:, orientation], :, element))
 end
 
 # use hybridized SBP operators for general flux differencing schemes.
 function compute_flux_differencing_SBP_matrices(dg::DGMulti)
-  compute_flux_differencing_SBP_matrices(dg, has_sparse_operators(dg))
+    compute_flux_differencing_SBP_matrices(dg, has_sparse_operators(dg))
 end
 
 function compute_flux_differencing_SBP_matrices(dg::DGMulti, sparse_operators)
-  rd = dg.basis
-  Qrst_hybridized, VhP, Ph = StartUpDG.hybridized_SBP_operators(rd)
-  Qrst_skew = map(A -> 0.5 * (A - A'), Qrst_hybridized)
-  if sparse_operators == true
-    Qrst_skew = map(Qi -> droptol!(sparse(Qi'), 100 * eps(eltype(Qi)))', Qrst_skew)
-  end
-  return Qrst_skew, VhP, Ph
+    rd = dg.basis
+    Qrst_hybridized, VhP, Ph = StartUpDG.hybridized_SBP_operators(rd)
+    Qrst_skew = map(A -> 0.5 * (A - A'), Qrst_hybridized)
+    if sparse_operators == true
+        Qrst_skew = map(Qi -> droptol!(sparse(Qi'), 100 * eps(eltype(Qi)))', Qrst_skew)
+    end
+    return Qrst_skew, VhP, Ph
 end
 
 # use traditional multidimensional SBP operators for SBP approximation types.
-function compute_flux_differencing_SBP_matrices(dg::DGMultiFluxDiffSBP, sparse_operators)
-  rd = dg.basis
-  @unpack M, Drst, Pq = rd
-  Qrst = map(D -> M * D, Drst)
-  Qrst_skew = map(A -> 0.5 * (A - A'), Qrst)
-  if sparse_operators == true
-    Qrst_skew = map(Qi -> droptol!(sparse(Qi'), 100 * eps(eltype(Qi)))', Qrst_skew)
-  end
-  return Qrst_skew
+function compute_flux_differencing_SBP_matrices(dg::DGMultiFluxDiffSBP,
+                                                sparse_operators)
+    rd = dg.basis
+    @unpack M, Drst, Pq = rd
+    Qrst = map(D -> M * D, Drst)
+    Qrst_skew = map(A -> 0.5 * (A - A'), Qrst)
+    if sparse_operators == true
+        Qrst_skew = map(Qi -> droptol!(sparse(Qi'), 100 * eps(eltype(Qi)))', Qrst_skew)
+    end
+    return Qrst_skew
 end
 
-
 # For flux differencing SBP-type approximations, store solutions in Matrix{SVector{nvars}}.
 # This results in a slight speedup for `calc_volume_integral!`.
 function allocate_nested_array(uEltype, nvars, array_dimensions, dg::DGMultiFluxDiffSBP)
-  return zeros(SVector{nvars, uEltype}, array_dimensions...)
+    return zeros(SVector{nvars, uEltype}, array_dimensions...)
 end
 
-function create_cache(mesh::DGMultiMesh, equations, dg::DGMultiFluxDiffSBP, RealT, uEltype)
-
-  rd = dg.basis
-  md = mesh.md
+function create_cache(mesh::DGMultiMesh, equations, dg::DGMultiFluxDiffSBP, RealT,
+                      uEltype)
+    rd = dg.basis
+    md = mesh.md
 
-  # for use with flux differencing schemes
-  Qrst_skew = compute_flux_differencing_SBP_matrices(dg)
+    # for use with flux differencing schemes
+    Qrst_skew = compute_flux_differencing_SBP_matrices(dg)
 
-  # Todo: DGMulti. Factor common storage into a struct (MeshDataCache?) for reuse across solvers?
-  # storage for volume quadrature values, face quadrature values, flux values
-  nvars = nvariables(equations)
-  u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg)
-  u_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg)
-  flux_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg)
-  lift_scalings = rd.wf ./ rd.wq[rd.Fmask] # lift scalings for diag-norm SBP operators
+    # Todo: DGMulti. Factor common storage into a struct (MeshDataCache?) for reuse across solvers?
+    # storage for volume quadrature values, face quadrature values, flux values
+    nvars = nvariables(equations)
+    u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg)
+    u_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg)
+    flux_face_values = allocate_nested_array(uEltype, nvars, size(md.xf), dg)
+    lift_scalings = rd.wf ./ rd.wq[rd.Fmask] # lift scalings for diag-norm SBP operators
 
-  local_values_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) for _ in 1:Threads.nthreads()]
+    local_values_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg)
+                             for _ in 1:Threads.nthreads()]
 
-  # Use an array of SVectors (chunks of `nvars` are contiguous in memory) to speed up flux differencing
-  fluxdiff_local_threaded = [zeros(SVector{nvars, uEltype}, rd.Nq) for _ in 1:Threads.nthreads()]
+    # Use an array of SVectors (chunks of `nvars` are contiguous in memory) to speed up flux differencing
+    fluxdiff_local_threaded = [zeros(SVector{nvars, uEltype}, rd.Nq)
+                               for _ in 1:Threads.nthreads()]
 
-  return (; md, Qrst_skew, dxidxhatj = md.rstxyzJ,
+    return (; md, Qrst_skew, dxidxhatj = md.rstxyzJ,
             invJ = inv.(md.J), lift_scalings, inv_wq = inv.(rd.wq),
             u_values, u_face_values, flux_face_values,
             local_values_threaded, fluxdiff_local_threaded)
@@ -302,93 +331,101 @@ end
 
 # most general create_cache: works for `DGMultiFluxDiff{<:Polynomial}`
 function create_cache(mesh::DGMultiMesh, equations, dg::DGMultiFluxDiff, RealT, uEltype)
-
-  rd = dg.basis
-  @unpack md = mesh
-
-  Qrst_skew, VhP, Ph = compute_flux_differencing_SBP_matrices(dg)
-
-  # temp storage for entropy variables at volume quad points
-  nvars = nvariables(equations)
-  entropy_var_values = allocate_nested_array(uEltype, nvars, (rd.Nq, md.num_elements), dg)
-
-  # storage for all quadrature points (concatenated volume / face quadrature points)
-  num_quad_points_total = rd.Nq + rd.Nfq
-  entropy_projected_u_values = allocate_nested_array(uEltype, nvars, (num_quad_points_total, md.num_elements), dg)
-  projected_entropy_var_values = allocate_nested_array(uEltype, nvars, (num_quad_points_total, md.num_elements), dg)
-
-  # For this specific solver, `prolong2interfaces` will not be used anymore.
-  # Instead, this step is also performed in `entropy_projection!`. Thus, we set
-  # `u_face_values` as a `view` into `entropy_projected_u_values`. We do not do
-  # the same for `u_values` since we will use that with LoopVectorization, which
-  # cannot handle such views as of v0.12.66, the latest version at the time of writing.
-  u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg)
-  u_face_values = view(entropy_projected_u_values, rd.Nq+1:num_quad_points_total, :)
-  flux_face_values = similar(u_face_values)
-
-  # local storage for interface fluxes, rhs, and source
-  local_values_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg) for _ in 1:Threads.nthreads()]
-
-  # Use an array of SVectors (chunks of `nvars` are contiguous in memory) to speed up flux differencing
-  # The result is then transferred to rhs_local_threaded::StructArray{<:SVector} before
-  # projecting it and storing it into `du`.
-  fluxdiff_local_threaded = [zeros(SVector{nvars, uEltype}, num_quad_points_total) for _ in 1:Threads.nthreads()]
-  rhs_local_threaded = [allocate_nested_array(uEltype, nvars, (num_quad_points_total,), dg)  for _ in 1:Threads.nthreads()]
-
-  # interpolate geometric terms to both quadrature and face values for curved meshes
-  (; Vq, Vf) = dg.basis
-  interpolated_geometric_terms = map(x -> [Vq; Vf] * x, mesh.md.rstxyzJ)
-  J = rd.Vq * md.J
-
-  return (; md, Qrst_skew, VhP, Ph,
+    rd = dg.basis
+    @unpack md = mesh
+
+    Qrst_skew, VhP, Ph = compute_flux_differencing_SBP_matrices(dg)
+
+    # temp storage for entropy variables at volume quad points
+    nvars = nvariables(equations)
+    entropy_var_values = allocate_nested_array(uEltype, nvars, (rd.Nq, md.num_elements),
+                                               dg)
+
+    # storage for all quadrature points (concatenated volume / face quadrature points)
+    num_quad_points_total = rd.Nq + rd.Nfq
+    entropy_projected_u_values = allocate_nested_array(uEltype, nvars,
+                                                       (num_quad_points_total,
+                                                        md.num_elements), dg)
+    projected_entropy_var_values = allocate_nested_array(uEltype, nvars,
+                                                         (num_quad_points_total,
+                                                          md.num_elements), dg)
+
+    # For this specific solver, `prolong2interfaces` will not be used anymore.
+    # Instead, this step is also performed in `entropy_projection!`. Thus, we set
+    # `u_face_values` as a `view` into `entropy_projected_u_values`. We do not do
+    # the same for `u_values` since we will use that with LoopVectorization, which
+    # cannot handle such views as of v0.12.66, the latest version at the time of writing.
+    u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg)
+    u_face_values = view(entropy_projected_u_values, (rd.Nq + 1):num_quad_points_total,
+                         :)
+    flux_face_values = similar(u_face_values)
+
+    # local storage for interface fluxes, rhs, and source
+    local_values_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg)
+                             for _ in 1:Threads.nthreads()]
+
+    # Use an array of SVectors (chunks of `nvars` are contiguous in memory) to speed up flux differencing
+    # The result is then transferred to rhs_local_threaded::StructArray{<:SVector} before
+    # projecting it and storing it into `du`.
+    fluxdiff_local_threaded = [zeros(SVector{nvars, uEltype}, num_quad_points_total)
+                               for _ in 1:Threads.nthreads()]
+    rhs_local_threaded = [allocate_nested_array(uEltype, nvars,
+                                                (num_quad_points_total,), dg)
+                          for _ in 1:Threads.nthreads()]
+
+    # interpolate geometric terms to both quadrature and face values for curved meshes
+    (; Vq, Vf) = dg.basis
+    interpolated_geometric_terms = map(x -> [Vq; Vf] * x, mesh.md.rstxyzJ)
+    J = rd.Vq * md.J
+
+    return (; md, Qrst_skew, VhP, Ph,
             invJ = inv.(J), dxidxhatj = interpolated_geometric_terms,
-            entropy_var_values, projected_entropy_var_values, entropy_projected_u_values,
+            entropy_var_values, projected_entropy_var_values,
+            entropy_projected_u_values,
             u_values, u_face_values, flux_face_values,
             local_values_threaded, fluxdiff_local_threaded, rhs_local_threaded)
 end
 
-
 # TODO: DGMulti. Address hard-coding of `entropy2cons!` and `cons2entropy!` for this function.
 function entropy_projection!(cache, u, mesh::DGMultiMesh, equations, dg::DGMulti)
+    rd = dg.basis
+    @unpack Vq = rd
+    @unpack VhP, entropy_var_values, u_values = cache
+    @unpack projected_entropy_var_values, entropy_projected_u_values = cache
 
-  rd = dg.basis
-  @unpack Vq = rd
-  @unpack VhP, entropy_var_values, u_values = cache
-  @unpack projected_entropy_var_values, entropy_projected_u_values = cache
-
-  apply_to_each_field(mul_by!(Vq), u_values, u)
+    apply_to_each_field(mul_by!(Vq), u_values, u)
 
-  cons2entropy!(entropy_var_values, u_values, equations)
+    cons2entropy!(entropy_var_values, u_values, equations)
 
-  # "VhP" fuses the projection "P" with interpolation to volume and face quadrature "Vh"
-  apply_to_each_field(mul_by!(VhP), projected_entropy_var_values, entropy_var_values)
+    # "VhP" fuses the projection "P" with interpolation to volume and face quadrature "Vh"
+    apply_to_each_field(mul_by!(VhP), projected_entropy_var_values, entropy_var_values)
 
-  entropy2cons!(entropy_projected_u_values, projected_entropy_var_values, equations)
-  return nothing
+    entropy2cons!(entropy_projected_u_values, projected_entropy_var_values, equations)
+    return nothing
 end
 
 @inline function cons2entropy!(entropy_var_values::StructArray,
-                               u_values          ::StructArray,
+                               u_values::StructArray,
                                equations)
-  @threaded for i in eachindex(u_values)
-    entropy_var_values[i] = cons2entropy(u_values[i], equations)
-  end
+    @threaded for i in eachindex(u_values)
+        entropy_var_values[i] = cons2entropy(u_values[i], equations)
+    end
 end
 
-@inline function entropy2cons!(entropy_projected_u_values  ::StructArray,
+@inline function entropy2cons!(entropy_projected_u_values::StructArray,
                                projected_entropy_var_values::StructArray,
                                equations)
-  @threaded for i in eachindex(projected_entropy_var_values)
-    entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations)
-  end
+    @threaded for i in eachindex(projected_entropy_var_values)
+        entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i],
+                                                     equations)
+    end
 end
 
-
 # Trait-like system to dispatch based on whether or not the SBP operators are sparse.
 # Designed to be extendable to include specialized `approximation_types` too.
 @inline function has_sparse_operators(dg::DGMultiFluxDiff)
-  rd = dg.basis
-  return has_sparse_operators(rd.element_type, rd.approximation_type)
+    rd = dg.basis
+    return has_sparse_operators(rd.element_type, rd.approximation_type)
 end
 
 # General fallback for DGMulti solvers:
@@ -400,15 +437,24 @@ end
 
 # For traditional SBP operators on triangles, the operators are fully dense. We avoid using
 # sum factorization here, which is slower for fully dense matrices.
-@inline has_sparse_operators(::Union{Tri, Tet}, approx_type::AT) where {AT <: SBP} = False()
+@inline function has_sparse_operators(::Union{Tri, Tet},
+                                      approx_type::AT) where {AT <: SBP}
+    False()
+end
 
 # SBP/GaussSBP operators on quads/hexes use tensor-product operators. Thus, sum factorization is
 # more efficient and we use the sparsity structure.
-@inline has_sparse_operators(::Union{Quad, Hex}, approx_type::AT) where {AT <: SBP} = True()
+@inline function has_sparse_operators(::Union{Quad, Hex},
+                                      approx_type::AT) where {AT <: SBP}
+    True()
+end
 @inline has_sparse_operators(::Union{Quad, Hex}, approx_type::GaussSBP) = True()
 
 # FD SBP methods have sparse operators
-@inline has_sparse_operators(::Union{Line, Quad, Hex}, approx_type::AbstractDerivativeOperator) = True()
+@inline function has_sparse_operators(::Union{Line, Quad, Hex},
+                                      approx_type::AbstractDerivativeOperator)
+    True()
+end
 
 # Computes flux differencing contribution from each Cartesian direction over a single element.
 # For dense operators, we do not use sum factorization.
@@ -416,35 +462,35 @@ end
                                           has_nonconservative_terms::False, volume_flux,
                                           has_sparse_operators::False, mesh,
                                           equations, dg, cache)
-
-  for dim in eachdim(mesh)
-    Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache)
-    # True() indicates the volume flux is symmetric
-    hadamard_sum!(fluxdiff_local, Qi_skew,
-                  True(), volume_flux,
-                  dim, u_local, equations)
-  end
+    for dim in eachdim(mesh)
+        Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache)
+        # True() indicates the volume flux is symmetric
+        hadamard_sum!(fluxdiff_local, Qi_skew,
+                      True(), volume_flux,
+                      dim, u_local, equations)
+    end
 end
 
 @inline function local_flux_differencing!(fluxdiff_local, u_local, element_index,
                                           has_nonconservative_terms::True, volume_flux,
                                           has_sparse_operators::False, mesh,
                                           equations, dg, cache)
-  flux_conservative, flux_nonconservative = volume_flux
-  for dim in eachdim(mesh)
-    Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache)
-    # True() indicates the flux is symmetric.
-    hadamard_sum!(fluxdiff_local, Qi_skew,
-                  True(), flux_conservative,
-                  dim, u_local, equations)
-
-    # The final argument .5 scales the operator by 1/2 for the nonconservative terms.
-    half_Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache, 0.5)
-    # False() indicates the flux is non-symmetric.
-    hadamard_sum!(fluxdiff_local, half_Qi_skew,
-                  False(), flux_nonconservative,
-                  dim, u_local, equations)
-  end
+    flux_conservative, flux_nonconservative = volume_flux
+    for dim in eachdim(mesh)
+        Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg, cache)
+        # True() indicates the flux is symmetric.
+        hadamard_sum!(fluxdiff_local, Qi_skew,
+                      True(), flux_conservative,
+                      dim, u_local, equations)
+
+        # The final argument .5 scales the operator by 1/2 for the nonconservative terms.
+        half_Qi_skew = build_lazy_physical_derivative(element_index, dim, mesh, dg,
+                                                      cache, 0.5)
+        # False() indicates the flux is non-symmetric.
+        hadamard_sum!(fluxdiff_local, half_Qi_skew,
+                      False(), flux_nonconservative,
+                      dim, u_local, equations)
+    end
 end
 
 # When the operators are sparse, we use the sum-factorization approach to
@@ -453,54 +499,54 @@ end
                                           has_nonconservative_terms::False, volume_flux,
                                           has_sparse_operators::True, mesh,
                                           equations, dg, cache)
-  @unpack Qrst_skew = cache
-  for dim in eachdim(mesh)
-    # There are two ways to write this flux differencing discretization on affine meshes.
-    #
-    # 1. Use numerical fluxes in Cartesian directions and sum up the discrete derivative
-    #    operators per coordinate direction accordingly.
-    # 2. Use discrete derivative operators per coordinate direction and corresponding
-    #    numerical fluxes in arbitrary (non-Cartesian) space directions.
-    #
-    # The first option makes it necessary to sum up the individual sparsity
-    # patterns of each reference coordinate direction. On tensor-product
-    # elements such as `Quad()` or `Hex()` elements, this increases the number of
-    # potentially expensive numerical flux evaluations by a factor of `ndims(mesh)`.
-    # Thus, we use the second option below (which basically corresponds to the
-    # well-known sum factorization on tensor product elements).
-    # Note that there is basically no difference for dense derivative operators.
-    normal_direction = get_contravariant_vector(element_index, dim, mesh, cache)
-    Q_skew = Qrst_skew[dim]
-
-    # True() indicates the flux is symmetric
-    hadamard_sum!(fluxdiff_local, Q_skew,
-                  True(), volume_flux,
-                  normal_direction, u_local, equations)
-  end
+    @unpack Qrst_skew = cache
+    for dim in eachdim(mesh)
+        # There are two ways to write this flux differencing discretization on affine meshes.
+        #
+        # 1. Use numerical fluxes in Cartesian directions and sum up the discrete derivative
+        #    operators per coordinate direction accordingly.
+        # 2. Use discrete derivative operators per coordinate direction and corresponding
+        #    numerical fluxes in arbitrary (non-Cartesian) space directions.
+        #
+        # The first option makes it necessary to sum up the individual sparsity
+        # patterns of each reference coordinate direction. On tensor-product
+        # elements such as `Quad()` or `Hex()` elements, this increases the number of
+        # potentially expensive numerical flux evaluations by a factor of `ndims(mesh)`.
+        # Thus, we use the second option below (which basically corresponds to the
+        # well-known sum factorization on tensor product elements).
+        # Note that there is basically no difference for dense derivative operators.
+        normal_direction = get_contravariant_vector(element_index, dim, mesh, cache)
+        Q_skew = Qrst_skew[dim]
+
+        # True() indicates the flux is symmetric
+        hadamard_sum!(fluxdiff_local, Q_skew,
+                      True(), volume_flux,
+                      normal_direction, u_local, equations)
+    end
 end
 
 @inline function local_flux_differencing!(fluxdiff_local, u_local, element_index,
                                           has_nonconservative_terms::True, volume_flux,
                                           has_sparse_operators::True, mesh,
                                           equations, dg, cache)
-  @unpack Qrst_skew = cache
-  flux_conservative, flux_nonconservative = volume_flux
-  for dim in eachdim(mesh)
-    normal_direction = get_contravariant_vector(element_index, dim, mesh, cache)
-    Q_skew = Qrst_skew[dim]
-
-    # True() indicates the flux is symmetric
-    hadamard_sum!(fluxdiff_local, Q_skew,
-                  True(), flux_conservative,
-                  normal_direction, u_local, equations)
-
-    # We scale the operator by 1/2 for the nonconservative terms.
-    half_Q_skew = LazyMatrixLinearCombo((Q_skew, ), (0.5, ))
-    # False() indicates the flux is non-symmetric
-    hadamard_sum!(fluxdiff_local, half_Q_skew,
-                  False(), flux_nonconservative,
-                  normal_direction, u_local, equations)
-  end
+    @unpack Qrst_skew = cache
+    flux_conservative, flux_nonconservative = volume_flux
+    for dim in eachdim(mesh)
+        normal_direction = get_contravariant_vector(element_index, dim, mesh, cache)
+        Q_skew = Qrst_skew[dim]
+
+        # True() indicates the flux is symmetric
+        hadamard_sum!(fluxdiff_local, Q_skew,
+                      True(), flux_conservative,
+                      normal_direction, u_local, equations)
+
+        # We scale the operator by 1/2 for the nonconservative terms.
+        half_Q_skew = LazyMatrixLinearCombo((Q_skew,), (0.5,))
+        # False() indicates the flux is non-symmetric
+        hadamard_sum!(fluxdiff_local, half_Q_skew,
+                      False(), flux_nonconservative,
+                      normal_direction, u_local, equations)
+    end
 end
 
 # calculates volume integral for <:Polynomial approximation types. We
@@ -510,101 +556,109 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh,
                                have_nonconservative_terms, equations,
                                volume_integral, dg::DGMultiFluxDiff,
                                cache)
-
-  @unpack entropy_projected_u_values, Ph = cache
-  @unpack fluxdiff_local_threaded, rhs_local_threaded = cache
-
-  @threaded for e in eachelement(mesh, dg, cache)
-    fluxdiff_local = fluxdiff_local_threaded[Threads.threadid()]
-    fill!(fluxdiff_local, zero(eltype(fluxdiff_local)))
-    u_local = view(entropy_projected_u_values, :, e)
-
-    local_flux_differencing!(fluxdiff_local, u_local, e,
-                             have_nonconservative_terms, volume_integral.volume_flux,
-                             has_sparse_operators(dg),
-                             mesh, equations, dg, cache)
-
-    # convert fluxdiff_local::Vector{<:SVector} to StructArray{<:SVector} for faster
-    # apply_to_each_field performance.
-    rhs_local = rhs_local_threaded[Threads.threadid()]
-    for i in Base.OneTo(length(fluxdiff_local))
-      rhs_local[i] = fluxdiff_local[i]
+    @unpack entropy_projected_u_values, Ph = cache
+    @unpack fluxdiff_local_threaded, rhs_local_threaded = cache
+
+    @threaded for e in eachelement(mesh, dg, cache)
+        fluxdiff_local = fluxdiff_local_threaded[Threads.threadid()]
+        fill!(fluxdiff_local, zero(eltype(fluxdiff_local)))
+        u_local = view(entropy_projected_u_values, :, e)
+
+        local_flux_differencing!(fluxdiff_local, u_local, e,
+                                 have_nonconservative_terms,
+                                 volume_integral.volume_flux,
+                                 has_sparse_operators(dg),
+                                 mesh, equations, dg, cache)
+
+        # convert fluxdiff_local::Vector{<:SVector} to StructArray{<:SVector} for faster
+        # apply_to_each_field performance.
+        rhs_local = rhs_local_threaded[Threads.threadid()]
+        for i in Base.OneTo(length(fluxdiff_local))
+            rhs_local[i] = fluxdiff_local[i]
+        end
+        apply_to_each_field(mul_by_accum!(Ph), view(du, :, e), rhs_local)
     end
-    apply_to_each_field(mul_by_accum!(Ph), view(du, :, e), rhs_local)
-  end
 end
 
 function calc_volume_integral!(du, u, mesh::DGMultiMesh,
                                have_nonconservative_terms, equations,
                                volume_integral, dg::DGMultiFluxDiffSBP,
                                cache)
-
-  @unpack fluxdiff_local_threaded, inv_wq = cache
-
-  @threaded for e in eachelement(mesh, dg, cache)
-    fluxdiff_local = fluxdiff_local_threaded[Threads.threadid()]
-    fill!(fluxdiff_local, zero(eltype(fluxdiff_local)))
-    u_local = view(u, :, e)
-
-    local_flux_differencing!(fluxdiff_local, u_local, e,
-                             have_nonconservative_terms, volume_integral.volume_flux,
-                             has_sparse_operators(dg),
-                             mesh, equations, dg, cache)
-
-    for i in each_quad_node(mesh, dg, cache)
-      du[i, e] = du[i, e] + fluxdiff_local[i] * inv_wq[i]
+    @unpack fluxdiff_local_threaded, inv_wq = cache
+
+    @threaded for e in eachelement(mesh, dg, cache)
+        fluxdiff_local = fluxdiff_local_threaded[Threads.threadid()]
+        fill!(fluxdiff_local, zero(eltype(fluxdiff_local)))
+        u_local = view(u, :, e)
+
+        local_flux_differencing!(fluxdiff_local, u_local, e,
+                                 have_nonconservative_terms,
+                                 volume_integral.volume_flux,
+                                 has_sparse_operators(dg),
+                                 mesh, equations, dg, cache)
+
+        for i in each_quad_node(mesh, dg, cache)
+            du[i, e] = du[i, e] + fluxdiff_local[i] * inv_wq[i]
+        end
     end
-  end
 end
 
-
 # Specialize since `u_values` isn't computed for DGMultiFluxDiffSBP solvers.
 function calc_sources!(du, u, t, source_terms,
                        mesh, equations, dg::DGMultiFluxDiffSBP, cache)
-  md = mesh.md
+    md = mesh.md
 
-  @threaded for e in eachelement(mesh, dg, cache)
-    for i in each_quad_node(mesh, dg, cache)
-      du[i, e] += source_terms(u[i, e], SVector(getindex.(md.xyzq, i, e)), t, equations)
+    @threaded for e in eachelement(mesh, dg, cache)
+        for i in each_quad_node(mesh, dg, cache)
+            du[i, e] += source_terms(u[i, e], SVector(getindex.(md.xyzq, i, e)), t,
+                                     equations)
+        end
     end
-  end
 end
 
-
 # Specializes on Polynomial (e.g., modal) DG methods with a flux differencing volume integral, e.g.,
 # an entropy conservative/stable discretization. For modal DG schemes, an extra `entropy_projection!`
 # is required (see https://doi.org/10.1016/j.jcp.2018.02.033, Section 4.3).
 # Also called by DGMultiFluxDiff{<:GaussSBP} solvers.
 function rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions::BC,
               source_terms::Source, dg::DGMultiFluxDiff, cache) where {Source, BC}
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
 
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # this function evaluates the solution at volume and face quadrature points (which was previously
-  # done in `prolong2interfaces` and `calc_volume_integral`)
-  @trixi_timeit timer() "entropy_projection!" entropy_projection!(cache, u, mesh, equations, dg)
+    # this function evaluates the solution at volume and face quadrature points (which was previously
+    # done in `prolong2interfaces` and `calc_volume_integral`)
+    @trixi_timeit timer() "entropy_projection!" begin
+        entropy_projection!(cache, u, mesh, equations, dg)
+    end
 
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh, have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, u, mesh, have_nonconservative_terms(equations),
+                              equations,
+                              dg.volume_integral, dg, cache)
+    end
 
-  # the following functions are the same as in VolumeIntegralWeakForm, and can be reused from dg.jl
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(cache, dg.surface_integral, mesh,
-                                                              have_nonconservative_terms(equations),
-                                                              equations, dg)
+    # the following functions are the same as in VolumeIntegralWeakForm, and can be reused from dg.jl
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache, dg.surface_integral, mesh,
+                             have_nonconservative_terms(equations), equations, dg)
+    end
 
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(cache, t, boundary_conditions, mesh,
-                                                            have_nonconservative_terms(equations), equations, dg)
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux!(cache, t, boundary_conditions, mesh,
+                            have_nonconservative_terms(equations), equations, dg)
+    end
 
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(du, u, mesh, equations,
-                                                                  dg.surface_integral, dg, cache)
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations,
+                               dg.surface_integral, dg, cache)
+    end
 
-  @trixi_timeit timer() "Jacobian" invert_jacobian!(du, mesh, equations, dg, cache)
+    @trixi_timeit timer() "Jacobian" invert_jacobian!(du, mesh, equations, dg, cache)
 
-  @trixi_timeit timer() "source terms" calc_sources!(du, u, t, source_terms,
-                                                     mesh, equations, dg, cache)
+    @trixi_timeit timer() "source terms" begin
+        calc_sources!(du, u, t, source_terms, mesh, equations, dg, cache)
+    end
 
-  return nothing
+    return nothing
 end
 
 # Specializes on SBP (e.g., nodal/collocation) DG methods with a flux differencing volume
@@ -614,36 +668,40 @@ end
 function rhs!(du, u, t, mesh, equations,
               initial_condition, boundary_conditions::BC, source_terms::Source,
               dg::DGMultiFluxDiffSBP, cache) where {BC, Source}
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    @trixi_timeit timer() "volume integral" calc_volume_integral!(du, u, mesh,
+                                                                  have_nonconservative_terms(equations),
+                                                                  equations,
+                                                                  dg.volume_integral,
+                                                                  dg, cache)
+
+    @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(cache, u, mesh,
+                                                                   equations,
+                                                                   dg.surface_integral,
+                                                                   dg)
+
+    @trixi_timeit timer() "interface flux" calc_interface_flux!(cache,
+                                                                dg.surface_integral,
+                                                                mesh,
+                                                                have_nonconservative_terms(equations),
+                                                                equations, dg)
+
+    @trixi_timeit timer() "boundary flux" calc_boundary_flux!(cache, t,
+                                                              boundary_conditions, mesh,
+                                                              have_nonconservative_terms(equations),
+                                                              equations, dg)
 
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh, have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
-
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache, dg.surface_integral, mesh,
-    have_nonconservative_terms(equations), equations, dg)
-
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, t, boundary_conditions, mesh,
-    have_nonconservative_terms(equations), equations, dg)
-
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations, dg.surface_integral, dg, cache)
+    @trixi_timeit timer() "surface integral" calc_surface_integral!(du, u, mesh,
+                                                                    equations,
+                                                                    dg.surface_integral,
+                                                                    dg, cache)
 
-  @trixi_timeit timer() "Jacobian" invert_jacobian!(
-    du, mesh, equations, dg, cache)
+    @trixi_timeit timer() "Jacobian" invert_jacobian!(du, mesh, equations, dg, cache)
 
-  @trixi_timeit timer() "source terms" calc_sources!(
-    du, u, t, source_terms, mesh, equations, dg, cache)
+    @trixi_timeit timer() "source terms" calc_sources!(du, u, t, source_terms, mesh,
+                                                       equations, dg, cache)
 
-  return nothing
+    return nothing
 end
-
-
-
 end # @muladd
diff --git a/src/solvers/dgmulti/flux_differencing_compressible_euler.jl b/src/solvers/dgmulti/flux_differencing_compressible_euler.jl
index 530c2b23230..70a29bc73f2 100644
--- a/src/solvers/dgmulti/flux_differencing_compressible_euler.jl
+++ b/src/solvers/dgmulti/flux_differencing_compressible_euler.jl
@@ -3,8 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
-
+#! format: noindent
 
 # TODO: Upstream, LoopVectorization
 #       At the time of writing, LoopVectorization.jl cannot handle this kind of
@@ -12,171 +11,171 @@
 #       `entropy2cons`. Thus, we need to insert the physics directly here to
 #       get a significant runtime performance improvement.
 function cons2entropy!(entropy_var_values::StructArray,
-                       u_values          ::StructArray,
+                       u_values::StructArray,
                        equations::CompressibleEulerEquations2D)
-  # The following is semantically equivalent to
-  # @threaded for i in eachindex(u_values)
-  #   entropy_var_values[i] = cons2entropy(u_values[i], equations)
-  # end
-  # but much more efficient due to explicit optimization via `@turbo` from
-  # LoopVectorization.jl.
-  @unpack gamma, inv_gamma_minus_one = equations
-
-  rho_values, rho_v1_values, rho_v2_values, rho_e_values = StructArrays.components(u_values)
-  w1_values, w2_values, w3_values, w4_values = StructArrays.components(entropy_var_values)
-
-  @turbo thread=true for i in eachindex(
-      rho_values, rho_v1_values, rho_v2_values, rho_e_values,
-      w1_values, w2_values, w3_values, w4_values)
-    rho    = rho_values[i]
-    rho_v1 = rho_v1_values[i]
-    rho_v2 = rho_v2_values[i]
-    rho_e  = rho_e_values[i]
-
-    # The following is basically the same code as in `cons2entropy`
-    v1 = rho_v1 / rho
-    v2 = rho_v2 / rho
-    v_square = v1^2 + v2^2
-    p = (gamma - 1) * (rho_e - 0.5 * rho * v_square)
-    s = log(p) - gamma * log(rho)
-    rho_p = rho / p
-
-    w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square
-    w2_values[i] = rho_p * v1
-    w3_values[i] = rho_p * v2
-    w4_values[i] = -rho_p
-  end
+    # The following is semantically equivalent to
+    # @threaded for i in eachindex(u_values)
+    #   entropy_var_values[i] = cons2entropy(u_values[i], equations)
+    # end
+    # but much more efficient due to explicit optimization via `@turbo` from
+    # LoopVectorization.jl.
+    @unpack gamma, inv_gamma_minus_one = equations
+
+    rho_values, rho_v1_values, rho_v2_values, rho_e_values = StructArrays.components(u_values)
+    w1_values, w2_values, w3_values, w4_values = StructArrays.components(entropy_var_values)
+
+    @turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,
+                                          rho_e_values,
+                                          w1_values, w2_values, w3_values, w4_values)
+        rho = rho_values[i]
+        rho_v1 = rho_v1_values[i]
+        rho_v2 = rho_v2_values[i]
+        rho_e = rho_e_values[i]
+
+        # The following is basically the same code as in `cons2entropy`
+        v1 = rho_v1 / rho
+        v2 = rho_v2 / rho
+        v_square = v1^2 + v2^2
+        p = (gamma - 1) * (rho_e - 0.5 * rho * v_square)
+        s = log(p) - gamma * log(rho)
+        rho_p = rho / p
+
+        w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square
+        w2_values[i] = rho_p * v1
+        w3_values[i] = rho_p * v2
+        w4_values[i] = -rho_p
+    end
 end
 
-function entropy2cons!(entropy_projected_u_values  ::StructArray,
+function entropy2cons!(entropy_projected_u_values::StructArray,
                        projected_entropy_var_values::StructArray,
                        equations::CompressibleEulerEquations2D)
-  # The following is semantically equivalent to
-  # @threaded for i in eachindex(projected_entropy_var_values)
-  #   entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations)
-  # end
-  # but much more efficient due to explicit optimization via `@turbo` from
-  # LoopVectorization.jl.
-  @unpack gamma, inv_gamma_minus_one = equations
-  gamma_minus_one = gamma - 1
-
-  rho_values, rho_v1_values, rho_v2_values, rho_e_values = StructArrays.components(entropy_projected_u_values)
-  w1_values, w2_values, w3_values, w4_values = StructArrays.components(projected_entropy_var_values)
-
-  @turbo thread=true for i in eachindex(
-      rho_values, rho_v1_values, rho_v2_values, rho_e_values,
-      w1_values, w2_values, w3_values, w4_values)
-
-    # The following is basically the same code as in `entropy2cons`
-    # Convert to entropy `-rho * s` used by
-    # - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
-    #   [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
-    # instead of `-rho * s / (gamma - 1)`
-    w1 = gamma_minus_one * w1_values[i]
-    w2 = gamma_minus_one * w2_values[i]
-    w3 = gamma_minus_one * w3_values[i]
-    w4 = gamma_minus_one * w4_values[i]
-
-    # s = specific entropy, eq. (53)
-    s = gamma - w1 + (w2^2 + w3^2) / (2 * w4)
-
-    # eq. (52)
-    rho_iota = (gamma_minus_one / (-w4)^gamma)^(inv_gamma_minus_one) * exp(-s * inv_gamma_minus_one)
-
-    # eq. (51)
-    rho_values[i]    = -rho_iota * w4
-    rho_v1_values[i] =  rho_iota * w2
-    rho_v2_values[i] =  rho_iota * w3
-    rho_e_values[i]  =  rho_iota * (1 - (w2^2 + w3^2) / (2 * w4))
-  end
+    # The following is semantically equivalent to
+    # @threaded for i in eachindex(projected_entropy_var_values)
+    #   entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations)
+    # end
+    # but much more efficient due to explicit optimization via `@turbo` from
+    # LoopVectorization.jl.
+    @unpack gamma, inv_gamma_minus_one = equations
+    gamma_minus_one = gamma - 1
+
+    rho_values, rho_v1_values, rho_v2_values, rho_e_values = StructArrays.components(entropy_projected_u_values)
+    w1_values, w2_values, w3_values, w4_values = StructArrays.components(projected_entropy_var_values)
+
+    @turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,
+                                          rho_e_values,
+                                          w1_values, w2_values, w3_values, w4_values)
+
+        # The following is basically the same code as in `entropy2cons`
+        # Convert to entropy `-rho * s` used by
+        # - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
+        #   [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
+        # instead of `-rho * s / (gamma - 1)`
+        w1 = gamma_minus_one * w1_values[i]
+        w2 = gamma_minus_one * w2_values[i]
+        w3 = gamma_minus_one * w3_values[i]
+        w4 = gamma_minus_one * w4_values[i]
+
+        # s = specific entropy, eq. (53)
+        s = gamma - w1 + (w2^2 + w3^2) / (2 * w4)
+
+        # eq. (52)
+        rho_iota = (gamma_minus_one / (-w4)^gamma)^(inv_gamma_minus_one) *
+                   exp(-s * inv_gamma_minus_one)
+
+        # eq. (51)
+        rho_values[i] = -rho_iota * w4
+        rho_v1_values[i] = rho_iota * w2
+        rho_v2_values[i] = rho_iota * w3
+        rho_e_values[i] = rho_iota * (1 - (w2^2 + w3^2) / (2 * w4))
+    end
 end
 
-
 function cons2entropy!(entropy_var_values::StructArray,
-                       u_values          ::StructArray,
+                       u_values::StructArray,
                        equations::CompressibleEulerEquations3D)
-  # The following is semantically equivalent to
-  # @threaded for i in eachindex(u_values)
-  #   entropy_var_values[i] = cons2entropy(u_values[i], equations)
-  # end
-  # but much more efficient due to explicit optimization via `@turbo` from
-  # LoopVectorization.jl.
-  @unpack gamma, inv_gamma_minus_one = equations
-
-  rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_values = StructArrays.components(u_values)
-  w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(entropy_var_values)
-
-  @turbo thread=true for i in eachindex(
-      rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_values,
-      w1_values, w2_values, w3_values, w4_values, w5_values)
-    rho    = rho_values[i]
-    rho_v1 = rho_v1_values[i]
-    rho_v2 = rho_v2_values[i]
-    rho_v3 = rho_v3_values[i]
-    rho_e  = rho_e_values[i]
-
-    # The following is basically the same code as in `cons2entropy`
-    v1 = rho_v1 / rho
-    v2 = rho_v2 / rho
-    v3 = rho_v3 / rho
-    v_square = v1^2 + v2^2 + v3^2
-    p = (gamma - 1) * (rho_e - 0.5 * rho * v_square)
-    s = log(p) - gamma * log(rho)
-    rho_p = rho / p
-
-    w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square
-    w2_values[i] = rho_p * v1
-    w3_values[i] = rho_p * v2
-    w4_values[i] = rho_p * v3
-    w5_values[i] = -rho_p
-  end
+    # The following is semantically equivalent to
+    # @threaded for i in eachindex(u_values)
+    #   entropy_var_values[i] = cons2entropy(u_values[i], equations)
+    # end
+    # but much more efficient due to explicit optimization via `@turbo` from
+    # LoopVectorization.jl.
+    @unpack gamma, inv_gamma_minus_one = equations
+
+    rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_values = StructArrays.components(u_values)
+    w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(entropy_var_values)
+
+    @turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,
+                                          rho_v3_values, rho_e_values,
+                                          w1_values, w2_values, w3_values, w4_values,
+                                          w5_values)
+        rho = rho_values[i]
+        rho_v1 = rho_v1_values[i]
+        rho_v2 = rho_v2_values[i]
+        rho_v3 = rho_v3_values[i]
+        rho_e = rho_e_values[i]
+
+        # The following is basically the same code as in `cons2entropy`
+        v1 = rho_v1 / rho
+        v2 = rho_v2 / rho
+        v3 = rho_v3 / rho
+        v_square = v1^2 + v2^2 + v3^2
+        p = (gamma - 1) * (rho_e - 0.5 * rho * v_square)
+        s = log(p) - gamma * log(rho)
+        rho_p = rho / p
+
+        w1_values[i] = (gamma - s) * inv_gamma_minus_one - 0.5 * rho_p * v_square
+        w2_values[i] = rho_p * v1
+        w3_values[i] = rho_p * v2
+        w4_values[i] = rho_p * v3
+        w5_values[i] = -rho_p
+    end
 end
 
-function entropy2cons!(entropy_projected_u_values  ::StructArray,
+function entropy2cons!(entropy_projected_u_values::StructArray,
                        projected_entropy_var_values::StructArray,
                        equations::CompressibleEulerEquations3D)
-  # The following is semantically equivalent to
-  # @threaded for i in eachindex(projected_entropy_var_values)
-  #   entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations)
-  # end
-  # but much more efficient due to explicit optimization via `@turbo` from
-  # LoopVectorization.jl.
-  @unpack gamma, inv_gamma_minus_one = equations
-  gamma_minus_one = gamma - 1
-
-  rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_values = StructArrays.components(entropy_projected_u_values)
-  w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(projected_entropy_var_values)
-
-  @turbo thread=true for i in eachindex(
-      rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_values,
-      w1_values, w2_values, w3_values, w4_values, w5_values)
-
-    # The following is basically the same code as in `entropy2cons`
-    # Convert to entropy `-rho * s` used by
-    # - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
-    #   [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
-    # instead of `-rho * s / (gamma - 1)`
-    w1 = gamma_minus_one * w1_values[i]
-    w2 = gamma_minus_one * w2_values[i]
-    w3 = gamma_minus_one * w3_values[i]
-    w4 = gamma_minus_one * w4_values[i]
-    w5 = gamma_minus_one * w5_values[i]
-
-    # s = specific entropy, eq. (53)
-    s = gamma - w1 + (w2^2 + w3^2 + w4^2) / (2 * w5)
-
-    # eq. (52)
-    rho_iota = (gamma_minus_one / (-w5)^gamma)^(inv_gamma_minus_one) * exp(-s * inv_gamma_minus_one)
-
-    # eq. (51)
-    rho_values[i]    = -rho_iota * w5
-    rho_v1_values[i] =  rho_iota * w2
-    rho_v2_values[i] =  rho_iota * w3
-    rho_v3_values[i] =  rho_iota * w4
-    rho_e_values[i]  =  rho_iota * (1 - (w2^2 + w3^2 + w4^2) / (2 * w5))
-  end
+    # The following is semantically equivalent to
+    # @threaded for i in eachindex(projected_entropy_var_values)
+    #   entropy_projected_u_values[i] = entropy2cons(projected_entropy_var_values[i], equations)
+    # end
+    # but much more efficient due to explicit optimization via `@turbo` from
+    # LoopVectorization.jl.
+    @unpack gamma, inv_gamma_minus_one = equations
+    gamma_minus_one = gamma - 1
+
+    rho_values, rho_v1_values, rho_v2_values, rho_v3_values, rho_e_values = StructArrays.components(entropy_projected_u_values)
+    w1_values, w2_values, w3_values, w4_values, w5_values = StructArrays.components(projected_entropy_var_values)
+
+    @turbo thread=true for i in eachindex(rho_values, rho_v1_values, rho_v2_values,
+                                          rho_v3_values, rho_e_values,
+                                          w1_values, w2_values, w3_values, w4_values,
+                                          w5_values)
+
+        # The following is basically the same code as in `entropy2cons`
+        # Convert to entropy `-rho * s` used by
+        # - See Hughes, Franca, Mallet (1986) A new finite element formulation for CFD
+        #   [DOI: 10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
+        # instead of `-rho * s / (gamma - 1)`
+        w1 = gamma_minus_one * w1_values[i]
+        w2 = gamma_minus_one * w2_values[i]
+        w3 = gamma_minus_one * w3_values[i]
+        w4 = gamma_minus_one * w4_values[i]
+        w5 = gamma_minus_one * w5_values[i]
+
+        # s = specific entropy, eq. (53)
+        s = gamma - w1 + (w2^2 + w3^2 + w4^2) / (2 * w5)
+
+        # eq. (52)
+        rho_iota = (gamma_minus_one / (-w5)^gamma)^(inv_gamma_minus_one) *
+                   exp(-s * inv_gamma_minus_one)
+
+        # eq. (51)
+        rho_values[i] = -rho_iota * w5
+        rho_v1_values[i] = rho_iota * w2
+        rho_v2_values[i] = rho_iota * w3
+        rho_v3_values[i] = rho_iota * w4
+        rho_e_values[i] = rho_iota * (1 - (w2^2 + w3^2 + w4^2) / (2 * w5))
+    end
 end
-
-
-
 end # @muladd
diff --git a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
index 95a471fa71b..2c5505cc4e9 100644
--- a/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
+++ b/src/solvers/dgmulti/flux_differencing_gauss_sbp.jl
@@ -8,21 +8,21 @@
 const GaussSBP = Polynomial{Gauss}
 
 function tensor_product_quadrature(element_type::Line, r1D, w1D)
-  return r1D, w1D
+    return r1D, w1D
 end
 
 function tensor_product_quadrature(element_type::Quad, r1D, w1D)
-  sq, rq = vec.(StartUpDG.NodesAndModes.meshgrid(r1D))
-  ws, wr = vec.(StartUpDG.NodesAndModes.meshgrid(w1D))
-  wq = wr .* ws
-  return rq, sq, wq
+    sq, rq = vec.(StartUpDG.NodesAndModes.meshgrid(r1D))
+    ws, wr = vec.(StartUpDG.NodesAndModes.meshgrid(w1D))
+    wq = wr .* ws
+    return rq, sq, wq
 end
 
 function tensor_product_quadrature(element_type::Hex, r1D, w1D)
-  rq, sq, tq = vec.(StartUpDG.NodesAndModes.meshgrid(r1D, r1D, r1D))
-  wr, ws, wt = vec.(StartUpDG.NodesAndModes.meshgrid(w1D, w1D, w1D))
-  wq = wr .* ws .* wt
-  return rq, sq, tq, wq
+    rq, sq, tq = vec.(StartUpDG.NodesAndModes.meshgrid(r1D, r1D, r1D))
+    wr, ws, wt = vec.(StartUpDG.NodesAndModes.meshgrid(w1D, w1D, w1D))
+    wq = wr .* ws .* wt
+    return rq, sq, tq, wq
 end
 
 # type parameters for `TensorProductFaceOperator`.
@@ -32,7 +32,7 @@ struct Interpolation <: AbstractGaussOperator end
 #   which is used in `VolumeIntegralFluxDifferencing`.
 # - `Projection{ScaleByFaceWeights=Static.True()}` corresponds to the quadrature-based lifting
 #   operator `LIFT = M \ (Vf' * diagm(rd.wf))`, which is used in `SurfaceIntegralWeakForm`
-struct Projection{ScaleByFaceWeights}  <: AbstractGaussOperator end
+struct Projection{ScaleByFaceWeights} <: AbstractGaussOperator end
 
 # used to dispatch for different Gauss interpolation operators
 abstract type AbstractTensorProductGaussOperator end
@@ -41,87 +41,89 @@ abstract type AbstractTensorProductGaussOperator end
 #
 # Data for performing tensor product interpolation from volume nodes to face nodes.
 struct TensorProductGaussFaceOperator{NDIMS, OperatorType <: AbstractGaussOperator,
-                                      Tmat, Tweights, Tfweights, Tindices} <: AbstractTensorProductGaussOperator
-  interp_matrix_gauss_to_face_1d::Tmat
-  inv_volume_weights_1d::Tweights
-  face_weights::Tfweights
-  face_indices_tensor_product::Tindices
-  nnodes_1d::Int
-  nfaces::Int
+                                      Tmat, Tweights, Tfweights, Tindices} <:
+       AbstractTensorProductGaussOperator
+    interp_matrix_gauss_to_face_1d::Tmat
+    inv_volume_weights_1d::Tweights
+    face_weights::Tfweights
+    face_indices_tensor_product::Tindices
+    nnodes_1d::Int
+    nfaces::Int
 end
 
 # constructor for a 2D operator
 function TensorProductGaussFaceOperator(operator::AbstractGaussOperator,
                                         dg::DGMulti{2, Quad, GaussSBP})
-  rd = dg.basis
-
-  rq1D, wq1D = StartUpDG.gauss_quad(0, 0, polydeg(dg))
-  interp_matrix_gauss_to_face_1d = polynomial_interpolation_matrix(rq1D, [-1; 1])
-
-  nnodes_1d = length(rq1D)
-
-  # Permutation of indices in a tensor product form
-  num_faces = StartUpDG.num_faces(rd.element_type)
-  indices = reshape(1:length(rd.rf), nnodes_1d, num_faces)
-  face_indices_tensor_product = zeros(Int, 2, nnodes_1d, ndims(rd.element_type))
-  for i in 1:nnodes_1d # loop over nodes in one face
-    face_indices_tensor_product[:, i, 1] .= indices[i, 1:2]
-    face_indices_tensor_product[:, i, 2] .= indices[i, 3:4]
-  end
-
-  T_op = typeof(operator)
-  Tm = typeof(interp_matrix_gauss_to_face_1d)
-  Tw = typeof(inv.(wq1D))
-  Tf = typeof(rd.wf)
-  Ti = typeof(face_indices_tensor_product)
-  return TensorProductGaussFaceOperator{2, T_op, Tm, Tw, Tf, Ti}(interp_matrix_gauss_to_face_1d,
-                                                                 inv.(wq1D), rd.wf,
-                                                                 face_indices_tensor_product,
-                                                                 nnodes_1d, num_faces)
+    rd = dg.basis
+
+    rq1D, wq1D = StartUpDG.gauss_quad(0, 0, polydeg(dg))
+    interp_matrix_gauss_to_face_1d = polynomial_interpolation_matrix(rq1D, [-1; 1])
+
+    nnodes_1d = length(rq1D)
+
+    # Permutation of indices in a tensor product form
+    num_faces = StartUpDG.num_faces(rd.element_type)
+    indices = reshape(1:length(rd.rf), nnodes_1d, num_faces)
+    face_indices_tensor_product = zeros(Int, 2, nnodes_1d, ndims(rd.element_type))
+    for i in 1:nnodes_1d # loop over nodes in one face
+        face_indices_tensor_product[:, i, 1] .= indices[i, 1:2]
+        face_indices_tensor_product[:, i, 2] .= indices[i, 3:4]
+    end
+
+    T_op = typeof(operator)
+    Tm = typeof(interp_matrix_gauss_to_face_1d)
+    Tw = typeof(inv.(wq1D))
+    Tf = typeof(rd.wf)
+    Ti = typeof(face_indices_tensor_product)
+    return TensorProductGaussFaceOperator{2, T_op, Tm, Tw, Tf, Ti}(interp_matrix_gauss_to_face_1d,
+                                                                   inv.(wq1D), rd.wf,
+                                                                   face_indices_tensor_product,
+                                                                   nnodes_1d, num_faces)
 end
 
 # constructor for a 3D operator
 function TensorProductGaussFaceOperator(operator::AbstractGaussOperator,
                                         dg::DGMulti{3, Hex, GaussSBP})
-  rd = dg.basis
-
-  rq1D, wq1D = StartUpDG.gauss_quad(0, 0, polydeg(dg))
-  interp_matrix_gauss_to_face_1d = polynomial_interpolation_matrix(rq1D, [-1; 1])
-
-  nnodes_1d = length(rq1D)
-
-  # Permutation of indices in a tensor product form
-  num_faces = StartUpDG.num_faces(rd.element_type)
-  indices = reshape(1:length(rd.rf), nnodes_1d, nnodes_1d, num_faces)
-  face_indices_tensor_product = zeros(Int, 2, nnodes_1d, nnodes_1d, ndims(rd.element_type))
-  for j in 1:nnodes_1d, i in 1:nnodes_1d # loop over nodes in one face
-    face_indices_tensor_product[:, i, j, 1] .= indices[i, j, 1:2]
-    face_indices_tensor_product[:, i, j, 2] .= indices[i, j, 3:4]
-    face_indices_tensor_product[:, i, j, 3] .= indices[i, j, 5:6]
-  end
-
-  T_op = typeof(operator)
-  Tm = typeof(interp_matrix_gauss_to_face_1d)
-  Tw = typeof(inv.(wq1D))
-  Tf = typeof(rd.wf)
-  Ti = typeof(face_indices_tensor_product)
-  return TensorProductGaussFaceOperator{3, T_op, Tm, Tw, Tf, Ti}(interp_matrix_gauss_to_face_1d,
-                                                                 inv.(wq1D), rd.wf,
-                                                                 face_indices_tensor_product,
-                                                                 nnodes_1d, num_faces)
+    rd = dg.basis
+
+    rq1D, wq1D = StartUpDG.gauss_quad(0, 0, polydeg(dg))
+    interp_matrix_gauss_to_face_1d = polynomial_interpolation_matrix(rq1D, [-1; 1])
+
+    nnodes_1d = length(rq1D)
+
+    # Permutation of indices in a tensor product form
+    num_faces = StartUpDG.num_faces(rd.element_type)
+    indices = reshape(1:length(rd.rf), nnodes_1d, nnodes_1d, num_faces)
+    face_indices_tensor_product = zeros(Int, 2, nnodes_1d, nnodes_1d,
+                                        ndims(rd.element_type))
+    for j in 1:nnodes_1d, i in 1:nnodes_1d # loop over nodes in one face
+        face_indices_tensor_product[:, i, j, 1] .= indices[i, j, 1:2]
+        face_indices_tensor_product[:, i, j, 2] .= indices[i, j, 3:4]
+        face_indices_tensor_product[:, i, j, 3] .= indices[i, j, 5:6]
+    end
+
+    T_op = typeof(operator)
+    Tm = typeof(interp_matrix_gauss_to_face_1d)
+    Tw = typeof(inv.(wq1D))
+    Tf = typeof(rd.wf)
+    Ti = typeof(face_indices_tensor_product)
+    return TensorProductGaussFaceOperator{3, T_op, Tm, Tw, Tf, Ti}(interp_matrix_gauss_to_face_1d,
+                                                                   inv.(wq1D), rd.wf,
+                                                                   face_indices_tensor_product,
+                                                                   nnodes_1d, num_faces)
 end
 
 # specialize behavior of `mul_by!(A)` where `A isa TensorProductGaussFaceOperator)`
 @inline function mul_by!(A::AbstractTensorProductGaussOperator)
-  return (out, x) -> tensor_product_gauss_face_operator!(out, A, x)
+    return (out, x) -> tensor_product_gauss_face_operator!(out, A, x)
 end
 
 @inline function tensor_product_gauss_face_operator!(out::AbstractMatrix,
                                                      A::AbstractTensorProductGaussOperator,
                                                      x::AbstractMatrix)
-  @threaded for col in Base.OneTo(size(out, 2))
-    tensor_product_gauss_face_operator!(view(out, :, col), A, view(x, :, col))
-  end
+    @threaded for col in Base.OneTo(size(out, 2))
+        tensor_product_gauss_face_operator!(view(out, :, col), A, view(x, :, col))
+    end
 end
 
 # By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
@@ -129,276 +131,317 @@ end
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
+#! format: off
 # Interpolates values from volume Gauss nodes to face nodes on one element.
 @inline function tensor_product_gauss_face_operator!(out::AbstractVector,
                                                      A::TensorProductGaussFaceOperator{2, Interpolation},
                                                      x_in::AbstractVector)
-
-  (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A
-  (; nnodes_1d) = A
-
-  fill!(out, zero(eltype(out)))
-
-  # for 2D GaussSBP nodes, the indexing is first in x, then in y
-  x = reshape(x_in, nnodes_1d, nnodes_1d)
-
-  # interpolation in the x-direction
-  @turbo for i in Base.OneTo(nnodes_1d) # loop over nodes in a face
-    index_left  = face_indices_tensor_product[1, i, 1]
-    index_right = face_indices_tensor_product[2, i, 1]
-    for jj in Base.OneTo(nnodes_1d)      # loop over "line" of volume nodes
-      out[index_left]  = out[index_left]  + interp_matrix_gauss_to_face_1d[1, jj] * x[jj, i]
-      out[index_right] = out[index_right] + interp_matrix_gauss_to_face_1d[2, jj] * x[jj, i]
+#! format: on                                                     
+    (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A
+    (; nnodes_1d) = A
+
+    fill!(out, zero(eltype(out)))
+
+    # for 2D GaussSBP nodes, the indexing is first in x, then in y
+    x = reshape(x_in, nnodes_1d, nnodes_1d)
+
+    # interpolation in the x-direction
+    @turbo for i in Base.OneTo(nnodes_1d) # loop over nodes in a face
+        index_left = face_indices_tensor_product[1, i, 1]
+        index_right = face_indices_tensor_product[2, i, 1]
+        for jj in Base.OneTo(nnodes_1d)      # loop over "line" of volume nodes
+            out[index_left] = out[index_left] +
+                              interp_matrix_gauss_to_face_1d[1, jj] * x[jj, i]
+            out[index_right] = out[index_right] +
+                               interp_matrix_gauss_to_face_1d[2, jj] * x[jj, i]
+        end
     end
-  end
-
-  # interpolation in the y-direction
-  @turbo for i in Base.OneTo(nnodes_1d) # loop over nodes in a face
-    index_left  = face_indices_tensor_product[1, i, 2]
-    index_right = face_indices_tensor_product[2, i, 2]
-    for jj in Base.OneTo(nnodes_1d)               # loop over "line" of volume nodes
-      out[index_left]  = out[index_left]  + interp_matrix_gauss_to_face_1d[1, jj] * x[i, jj]
-      out[index_right] = out[index_right] + interp_matrix_gauss_to_face_1d[2, jj] * x[i, jj]
+
+    # interpolation in the y-direction
+    @turbo for i in Base.OneTo(nnodes_1d) # loop over nodes in a face
+        index_left = face_indices_tensor_product[1, i, 2]
+        index_right = face_indices_tensor_product[2, i, 2]
+        for jj in Base.OneTo(nnodes_1d)               # loop over "line" of volume nodes
+            out[index_left] = out[index_left] +
+                              interp_matrix_gauss_to_face_1d[1, jj] * x[i, jj]
+            out[index_right] = out[index_right] +
+                               interp_matrix_gauss_to_face_1d[2, jj] * x[i, jj]
+        end
     end
-  end
 end
 
 # Interpolates values from volume Gauss nodes to face nodes on one element.
+#! format: off
 @inline function tensor_product_gauss_face_operator!(out::AbstractVector,
                                                      A::TensorProductGaussFaceOperator{3, Interpolation},
                                                      x::AbstractVector)
-
-  (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A
-  (; nnodes_1d) = A
-
-  fill!(out, zero(eltype(out)))
-
-  # for 3D GaussSBP nodes, the indexing is first in y, then x, then z.
-  x = reshape(x, nnodes_1d, nnodes_1d, nnodes_1d)
-
-  # interpolation in the y-direction
-  @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face
-    index_left  = face_indices_tensor_product[1, i, j, 2]
-    index_right = face_indices_tensor_product[2, i, j, 2]
-    for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes
-      out[index_left]  = out[index_left]  + interp_matrix_gauss_to_face_1d[1, jj] * x[jj, i, j]
-      out[index_right] = out[index_right] + interp_matrix_gauss_to_face_1d[2, jj] * x[jj, i, j]
+#! format: on                                                     
+    (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A
+    (; nnodes_1d) = A
+
+    fill!(out, zero(eltype(out)))
+
+    # for 3D GaussSBP nodes, the indexing is first in y, then x, then z.
+    x = reshape(x, nnodes_1d, nnodes_1d, nnodes_1d)
+
+    # interpolation in the y-direction
+    @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face
+        index_left = face_indices_tensor_product[1, i, j, 2]
+        index_right = face_indices_tensor_product[2, i, j, 2]
+        for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes
+            out[index_left] = out[index_left] +
+                              interp_matrix_gauss_to_face_1d[1, jj] * x[jj, i, j]
+            out[index_right] = out[index_right] +
+                               interp_matrix_gauss_to_face_1d[2, jj] * x[jj, i, j]
+        end
     end
-  end
-
-  # interpolation in the x-direction
-  @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face
-    index_left  = face_indices_tensor_product[1, i, j, 1]
-    index_right = face_indices_tensor_product[2, i, j, 1]
-    for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes
-      out[index_left]  = out[index_left]  + interp_matrix_gauss_to_face_1d[1, jj] * x[i, jj, j]
-      out[index_right] = out[index_right] + interp_matrix_gauss_to_face_1d[2, jj] * x[i, jj, j]
+
+    # interpolation in the x-direction
+    @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face
+        index_left = face_indices_tensor_product[1, i, j, 1]
+        index_right = face_indices_tensor_product[2, i, j, 1]
+        for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes
+            out[index_left] = out[index_left] +
+                              interp_matrix_gauss_to_face_1d[1, jj] * x[i, jj, j]
+            out[index_right] = out[index_right] +
+                               interp_matrix_gauss_to_face_1d[2, jj] * x[i, jj, j]
+        end
     end
-  end
-
-  # interpolation in the z-direction
-  @turbo for i in Base.OneTo(nnodes_1d), j in Base.OneTo(nnodes_1d) # loop over nodes in a face
-    index_left  = face_indices_tensor_product[1, i, j, 3]
-    index_right = face_indices_tensor_product[2, i, j, 3]
-    for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes
-      # The ordering (i,j) -> (j,i) needs to be reversed for this last face.
-      # This is due to way we define face nodes for Hex() types in StartUpDG.jl.
-      out[index_left]  = out[index_left]  + interp_matrix_gauss_to_face_1d[1, jj] * x[j, i, jj]
-      out[index_right] = out[index_right] + interp_matrix_gauss_to_face_1d[2, jj] * x[j, i, jj]
+
+    # interpolation in the z-direction
+    @turbo for i in Base.OneTo(nnodes_1d), j in Base.OneTo(nnodes_1d) # loop over nodes in a face
+        index_left = face_indices_tensor_product[1, i, j, 3]
+        index_right = face_indices_tensor_product[2, i, j, 3]
+        for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes
+            # The ordering (i,j) -> (j,i) needs to be reversed for this last face.
+            # This is due to way we define face nodes for Hex() types in StartUpDG.jl.
+            out[index_left] = out[index_left] +
+                              interp_matrix_gauss_to_face_1d[1, jj] * x[j, i, jj]
+            out[index_right] = out[index_right] +
+                               interp_matrix_gauss_to_face_1d[2, jj] * x[j, i, jj]
+        end
     end
-  end
 end
 
 # Projects face node values to volume Gauss nodes on one element.
+#! format: off
 @inline function tensor_product_gauss_face_operator!(out_vec::AbstractVector,
                                                      A::TensorProductGaussFaceOperator{2, Projection{ApplyFaceWeights}},
                                                      x::AbstractVector) where {ApplyFaceWeights}
-
-  (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A
-  (; inv_volume_weights_1d, nnodes_1d) = A
-
-  fill!(out_vec, zero(eltype(out_vec)))
-
-  # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
-  # Thus, Base.ReshapedArray should be used if you are setting values in the array.
-  # `reshape` is fine if you are only accessing values.
-  # Note that, for 2D GaussSBP nodes, the indexing is first in x, then y
-  out = Base.ReshapedArray(out_vec, (nnodes_1d, nnodes_1d), ())
-
-  if ApplyFaceWeights == true
-    @turbo for i in eachindex(x)
-      x[i] = x[i] * A.face_weights[i]
+#! format: on                                                     
+    (; interp_matrix_gauss_to_face_1d, face_indices_tensor_product) = A
+    (; inv_volume_weights_1d, nnodes_1d) = A
+
+    fill!(out_vec, zero(eltype(out_vec)))
+
+    # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
+    # Thus, Base.ReshapedArray should be used if you are setting values in the array.
+    # `reshape` is fine if you are only accessing values.
+    # Note that, for 2D GaussSBP nodes, the indexing is first in x, then y
+    out = Base.ReshapedArray(out_vec, (nnodes_1d, nnodes_1d), ())
+
+    if ApplyFaceWeights == true
+        @turbo for i in eachindex(x)
+            x[i] = x[i] * A.face_weights[i]
+        end
     end
-  end
-
-  # interpolation in the x-direction
-  @turbo for i in Base.OneTo(nnodes_1d) # loop over face nodes
-    index_left  = face_indices_tensor_product[1, i, 1]
-    index_right = face_indices_tensor_product[2, i, 1]
-    for jj in Base.OneTo(nnodes_1d) # loop over a line of volume nodes
-      out[jj, i] = out[jj, i] + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left]
-      out[jj, i] = out[jj, i] + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right]
+
+    # interpolation in the x-direction
+    @turbo for i in Base.OneTo(nnodes_1d) # loop over face nodes
+        index_left = face_indices_tensor_product[1, i, 1]
+        index_right = face_indices_tensor_product[2, i, 1]
+        for jj in Base.OneTo(nnodes_1d) # loop over a line of volume nodes
+            out[jj, i] = out[jj, i] +
+                         interp_matrix_gauss_to_face_1d[1, jj] * x[index_left]
+            out[jj, i] = out[jj, i] +
+                         interp_matrix_gauss_to_face_1d[2, jj] * x[index_right]
+        end
     end
-  end
-
-  # interpolation in the y-direction
-  @turbo for i in Base.OneTo(nnodes_1d)
-    index_left  = face_indices_tensor_product[1, i, 2]
-    index_right = face_indices_tensor_product[2, i, 2]
-    # loop over a line of volume nodes
-    for jj in Base.OneTo(nnodes_1d)
-      out[i, jj] = out[i, jj] + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left]
-      out[i, jj] = out[i, jj] + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right]
+
+    # interpolation in the y-direction
+    @turbo for i in Base.OneTo(nnodes_1d)
+        index_left = face_indices_tensor_product[1, i, 2]
+        index_right = face_indices_tensor_product[2, i, 2]
+        # loop over a line of volume nodes
+        for jj in Base.OneTo(nnodes_1d)
+            out[i, jj] = out[i, jj] +
+                         interp_matrix_gauss_to_face_1d[1, jj] * x[index_left]
+            out[i, jj] = out[i, jj] +
+                         interp_matrix_gauss_to_face_1d[2, jj] * x[index_right]
+        end
     end
-  end
 
-  # apply inv(M)
-  @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d)
-    out[i, j] = out[i, j] * inv_volume_weights_1d[i] * inv_volume_weights_1d[j]
-  end
+    # apply inv(M)
+    @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d)
+        out[i, j] = out[i, j] * inv_volume_weights_1d[i] * inv_volume_weights_1d[j]
+    end
 end
 
 # Interpolates values from volume Gauss nodes to face nodes on one element.
+#! format: off
 @inline function tensor_product_gauss_face_operator!(out_vec::AbstractVector,
                                                      A::TensorProductGaussFaceOperator{3, Projection{ApplyFaceWeights}},
                                                      x::AbstractVector) where {ApplyFaceWeights}
-
-  @unpack interp_matrix_gauss_to_face_1d, face_indices_tensor_product = A
-  @unpack inv_volume_weights_1d, nnodes_1d, nfaces = A
-
-  fill!(out_vec, zero(eltype(out_vec)))
-
-  # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
-  # Thus, Base.ReshapedArray should be used if you are setting values in the array.
-  # `reshape` is fine if you are only accessing values.
-  # Note that, for 3D GaussSBP nodes, the indexing is first in y, then x, then z.
-  out = Base.ReshapedArray(out_vec, (nnodes_1d, nnodes_1d, nnodes_1d), ())
-
-  if ApplyFaceWeights == true
-    @turbo for i in eachindex(x)
-      x[i] = x[i] * A.face_weights[i]
+#! format: on                                                                               
+    @unpack interp_matrix_gauss_to_face_1d, face_indices_tensor_product = A
+    @unpack inv_volume_weights_1d, nnodes_1d, nfaces = A
+
+    fill!(out_vec, zero(eltype(out_vec)))
+
+    # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
+    # Thus, Base.ReshapedArray should be used if you are setting values in the array.
+    # `reshape` is fine if you are only accessing values.
+    # Note that, for 3D GaussSBP nodes, the indexing is first in y, then x, then z.
+    out = Base.ReshapedArray(out_vec, (nnodes_1d, nnodes_1d, nnodes_1d), ())
+
+    if ApplyFaceWeights == true
+        @turbo for i in eachindex(x)
+            x[i] = x[i] * A.face_weights[i]
+        end
     end
-  end
-
-  # interpolation in the y-direction
-  @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face
-    index_left  = face_indices_tensor_product[1, i, j, 2]
-    index_right = face_indices_tensor_product[2, i, j, 2]
-    for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes
-      out[jj, i, j] = out[jj, i, j] + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left]
-      out[jj, i, j] = out[jj, i, j] + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right]
+
+    # interpolation in the y-direction
+    @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face
+        index_left = face_indices_tensor_product[1, i, j, 2]
+        index_right = face_indices_tensor_product[2, i, j, 2]
+        for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes
+            out[jj, i, j] = out[jj, i, j] +
+                            interp_matrix_gauss_to_face_1d[1, jj] * x[index_left]
+            out[jj, i, j] = out[jj, i, j] +
+                            interp_matrix_gauss_to_face_1d[2, jj] * x[index_right]
+        end
     end
-  end
-
-  # interpolation in the x-direction
-  @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face
-    index_left  = face_indices_tensor_product[1, i, j, 1]
-    index_right = face_indices_tensor_product[2, i, j, 1]
-    for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes
-      out[i, jj, j] = out[i, jj, j] + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left]
-      out[i, jj, j] = out[i, jj, j] + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right]
+
+    # interpolation in the x-direction
+    @turbo for j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d) # loop over nodes in a face
+        index_left = face_indices_tensor_product[1, i, j, 1]
+        index_right = face_indices_tensor_product[2, i, j, 1]
+        for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes
+            out[i, jj, j] = out[i, jj, j] +
+                            interp_matrix_gauss_to_face_1d[1, jj] * x[index_left]
+            out[i, jj, j] = out[i, jj, j] +
+                            interp_matrix_gauss_to_face_1d[2, jj] * x[index_right]
+        end
     end
-  end
-
-  # interpolation in the z-direction
-  @turbo for i in Base.OneTo(nnodes_1d), j in Base.OneTo(nnodes_1d) # loop over nodes in a face
-    index_left  = face_indices_tensor_product[1, i, j, 3]
-    index_right = face_indices_tensor_product[2, i, j, 3]
-    for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes
-      # The ordering (i,j) -> (j,i) needs to be reversed for this last face.
-      # This is due to way we define face nodes for Hex() types in StartUpDG.jl.
-      out[j, i, jj] = out[j, i, jj] + interp_matrix_gauss_to_face_1d[1, jj] * x[index_left]
-      out[j, i, jj] = out[j, i, jj] + interp_matrix_gauss_to_face_1d[2, jj] * x[index_right]
+
+    # interpolation in the z-direction
+    @turbo for i in Base.OneTo(nnodes_1d), j in Base.OneTo(nnodes_1d) # loop over nodes in a face
+        index_left = face_indices_tensor_product[1, i, j, 3]
+        index_right = face_indices_tensor_product[2, i, j, 3]
+        for jj in Base.OneTo(nnodes_1d) # loop over "line" of volume nodes
+            # The ordering (i,j) -> (j,i) needs to be reversed for this last face.
+            # This is due to way we define face nodes for Hex() types in StartUpDG.jl.
+            out[j, i, jj] = out[j, i, jj] +
+                            interp_matrix_gauss_to_face_1d[1, jj] * x[index_left]
+            out[j, i, jj] = out[j, i, jj] +
+                            interp_matrix_gauss_to_face_1d[2, jj] * x[index_right]
+        end
     end
-  end
 
-  # apply inv(M)
-  @turbo for k in Base.OneTo(nnodes_1d), j in Base.OneTo(nnodes_1d), i in Base.OneTo(nnodes_1d)
-    out[i, j, k] = out[i, j, k] * inv_volume_weights_1d[i] * inv_volume_weights_1d[j] * inv_volume_weights_1d[k]
-  end
+    # apply inv(M)
+    @turbo for k in Base.OneTo(nnodes_1d), j in Base.OneTo(nnodes_1d),
+               i in Base.OneTo(nnodes_1d)
+
+        out[i, j, k] = out[i, j, k] * inv_volume_weights_1d[i] *
+                       inv_volume_weights_1d[j] * inv_volume_weights_1d[k]
+    end
 end
 
 # For now, this is mostly the same as `create_cache` for DGMultiFluxDiff{<:Polynomial}.
 # In the future, we may modify it so that we can specialize additional parts of GaussSBP() solvers.
 function create_cache(mesh::DGMultiMesh, equations,
-                      dg::DGMultiFluxDiff{<:GaussSBP, <:Union{Quad, Hex}}, RealT, uEltype)
-
-  # call general Polynomial flux differencing constructor
-  cache = invoke(create_cache, Tuple{typeof(mesh), typeof(equations),
-                 DGMultiFluxDiff, typeof(RealT), typeof(uEltype)},
-                 mesh, equations, dg, RealT, uEltype)
-
-  rd = dg.basis
-  @unpack md = mesh
-
-  # for change of basis prior to the volume integral and entropy projection
-  r1D, _ = StartUpDG.gauss_lobatto_quad(0, 0, polydeg(dg))
-  rq1D, _ = StartUpDG.gauss_quad(0, 0, polydeg(dg))
-  interp_matrix_lobatto_to_gauss_1D = polynomial_interpolation_matrix(r1D, rq1D)
-  interp_matrix_gauss_to_lobatto_1D = polynomial_interpolation_matrix(rq1D, r1D)
-  NDIMS = ndims(rd.element_type)
-  interp_matrix_lobatto_to_gauss = SimpleKronecker(NDIMS, interp_matrix_lobatto_to_gauss_1D, uEltype)
-  interp_matrix_gauss_to_lobatto = SimpleKronecker(NDIMS, interp_matrix_gauss_to_lobatto_1D, uEltype)
-  inv_gauss_weights = inv.(rd.wq)
-
-  # specialized operators to perform tensor product interpolation to faces for Gauss nodes
-  interp_matrix_gauss_to_face = TensorProductGaussFaceOperator(Interpolation(), dg)
-  projection_matrix_gauss_to_face = TensorProductGaussFaceOperator(Projection{Static.False()}(), dg)
-
-  # `LIFT` matrix for Gauss nodes - this is equivalent to `projection_matrix_gauss_to_face` scaled by `diagm(rd.wf)`,
-  # where `rd.wf` are Gauss node face quadrature weights.
-  gauss_LIFT = TensorProductGaussFaceOperator(Projection{Static.True()}(), dg)
-
-  nvars = nvariables(equations)
-  rhs_volume_local_threaded   = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg)  for _ in 1:Threads.nthreads()]
-  gauss_volume_local_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg)  for _ in 1:Threads.nthreads()]
-
-  return (; cache..., projection_matrix_gauss_to_face, gauss_LIFT, inv_gauss_weights,
-         rhs_volume_local_threaded, gauss_volume_local_threaded,
-         interp_matrix_lobatto_to_gauss, interp_matrix_gauss_to_lobatto,
-         interp_matrix_gauss_to_face,
-         create_cache(mesh, equations, dg.volume_integral, dg, RealT, uEltype)...) # add cache specialized on the volume integral
+                      dg::DGMultiFluxDiff{<:GaussSBP, <:Union{Quad, Hex}}, RealT,
+                      uEltype)
+
+    # call general Polynomial flux differencing constructor
+    cache = invoke(create_cache,
+                   Tuple{typeof(mesh), typeof(equations),
+                         DGMultiFluxDiff, typeof(RealT), typeof(uEltype)},
+                   mesh, equations, dg, RealT, uEltype)
+
+    rd = dg.basis
+    @unpack md = mesh
+
+    # for change of basis prior to the volume integral and entropy projection
+    r1D, _ = StartUpDG.gauss_lobatto_quad(0, 0, polydeg(dg))
+    rq1D, _ = StartUpDG.gauss_quad(0, 0, polydeg(dg))
+    interp_matrix_lobatto_to_gauss_1D = polynomial_interpolation_matrix(r1D, rq1D)
+    interp_matrix_gauss_to_lobatto_1D = polynomial_interpolation_matrix(rq1D, r1D)
+    NDIMS = ndims(rd.element_type)
+    interp_matrix_lobatto_to_gauss = SimpleKronecker(NDIMS,
+                                                     interp_matrix_lobatto_to_gauss_1D,
+                                                     uEltype)
+    interp_matrix_gauss_to_lobatto = SimpleKronecker(NDIMS,
+                                                     interp_matrix_gauss_to_lobatto_1D,
+                                                     uEltype)
+    inv_gauss_weights = inv.(rd.wq)
+
+    # specialized operators to perform tensor product interpolation to faces for Gauss nodes
+    interp_matrix_gauss_to_face = TensorProductGaussFaceOperator(Interpolation(), dg)
+    projection_matrix_gauss_to_face = TensorProductGaussFaceOperator(Projection{
+                                                                                Static.False()
+                                                                                }(), dg)
+
+    # `LIFT` matrix for Gauss nodes - this is equivalent to `projection_matrix_gauss_to_face` scaled by `diagm(rd.wf)`,
+    # where `rd.wf` are Gauss node face quadrature weights.
+    gauss_LIFT = TensorProductGaussFaceOperator(Projection{Static.True()}(), dg)
+
+    nvars = nvariables(equations)
+    rhs_volume_local_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg)
+                                 for _ in 1:Threads.nthreads()]
+    gauss_volume_local_threaded = [allocate_nested_array(uEltype, nvars, (rd.Nq,), dg)
+                                   for _ in 1:Threads.nthreads()]
+
+    return (; cache..., projection_matrix_gauss_to_face, gauss_LIFT, inv_gauss_weights,
+            rhs_volume_local_threaded, gauss_volume_local_threaded,
+            interp_matrix_lobatto_to_gauss, interp_matrix_gauss_to_lobatto,
+            interp_matrix_gauss_to_face,
+            create_cache(mesh, equations, dg.volume_integral, dg, RealT, uEltype)...) # add cache specialized on the volume integral
 end
 
 # by default, return an empty tuple for volume integral caches
 create_cache(mesh, equations, volume_integral, dg, RealT, uEltype) = NamedTuple()
 
 # TODO: DGMulti. Address hard-coding of `entropy2cons!` and `cons2entropy!` for this function.
-function entropy_projection!(cache, u, mesh::DGMultiMesh, equations, dg::DGMultiFluxDiff{<:GaussSBP})
-
-  rd = dg.basis
-  @unpack Vq = rd
-  @unpack VhP, entropy_var_values, u_values = cache
-  @unpack projected_entropy_var_values, entropy_projected_u_values = cache
-  @unpack interp_matrix_lobatto_to_gauss, interp_matrix_gauss_to_face = cache
-
-  @threaded for e in eachelement(mesh, dg, cache)
-    apply_to_each_field(mul_by!(interp_matrix_lobatto_to_gauss), view(u_values, :, e), view(u, :, e))
-  end
+function entropy_projection!(cache, u, mesh::DGMultiMesh, equations,
+                             dg::DGMultiFluxDiff{<:GaussSBP})
+    rd = dg.basis
+    @unpack Vq = rd
+    @unpack VhP, entropy_var_values, u_values = cache
+    @unpack projected_entropy_var_values, entropy_projected_u_values = cache
+    @unpack interp_matrix_lobatto_to_gauss, interp_matrix_gauss_to_face = cache
+
+    @threaded for e in eachelement(mesh, dg, cache)
+        apply_to_each_field(mul_by!(interp_matrix_lobatto_to_gauss),
+                            view(u_values, :, e), view(u, :, e))
+    end
 
-  # transform quadrature values to entropy variables
-  cons2entropy!(entropy_var_values, u_values, equations)
+    # transform quadrature values to entropy variables
+    cons2entropy!(entropy_var_values, u_values, equations)
 
-  volume_indices = Base.OneTo(rd.Nq)
-  face_indices = (rd.Nq + 1):(rd.Nq + rd.Nfq)
+    volume_indices = Base.OneTo(rd.Nq)
+    face_indices = (rd.Nq + 1):(rd.Nq + rd.Nfq)
 
-  # Interpolate volume Gauss nodes to Gauss face nodes (note the layout of
-  # `projected_entropy_var_values = [vol pts; face pts]`).
-  entropy_var_face_values = view(projected_entropy_var_values, face_indices, :)
-  apply_to_each_field(mul_by!(interp_matrix_gauss_to_face), entropy_var_face_values, entropy_var_values)
+    # Interpolate volume Gauss nodes to Gauss face nodes (note the layout of
+    # `projected_entropy_var_values = [vol pts; face pts]`).
+    entropy_var_face_values = view(projected_entropy_var_values, face_indices, :)
+    apply_to_each_field(mul_by!(interp_matrix_gauss_to_face), entropy_var_face_values,
+                        entropy_var_values)
 
-  # directly copy over volume values (no entropy projection required)
-  entropy_projected_volume_values = view(entropy_projected_u_values, volume_indices, :)
-  @threaded for i in eachindex(u_values)
-    entropy_projected_volume_values[i] = u_values[i]
-  end
+    # directly copy over volume values (no entropy projection required)
+    entropy_projected_volume_values = view(entropy_projected_u_values, volume_indices,
+                                           :)
+    @threaded for i in eachindex(u_values)
+        entropy_projected_volume_values[i] = u_values[i]
+    end
 
-  # transform entropy to conservative variables on face values
-  entropy_projected_face_values = view(entropy_projected_u_values, face_indices, :)
-  entropy2cons!(entropy_projected_face_values, entropy_var_face_values, equations)
+    # transform entropy to conservative variables on face values
+    entropy_projected_face_values = view(entropy_projected_u_values, face_indices, :)
+    entropy2cons!(entropy_projected_face_values, entropy_var_face_values, equations)
 
-  return nothing
+    return nothing
 end
 
 # Assumes cache.flux_face_values is already computed.
@@ -406,140 +449,146 @@ end
 function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations,
                                 surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGMultiFluxDiff{<:GaussSBP}, cache)
+    (; gauss_LIFT, gauss_volume_local_threaded) = cache
 
-  (; gauss_LIFT, gauss_volume_local_threaded) = cache
+    @threaded for e in eachelement(mesh, dg, cache)
 
-  @threaded for e in eachelement(mesh, dg, cache)
+        # applies LIFT matrix, output is stored at Gauss nodes
+        gauss_volume_local = gauss_volume_local_threaded[Threads.threadid()]
+        apply_to_each_field(mul_by!(gauss_LIFT), gauss_volume_local,
+                            view(cache.flux_face_values, :, e))
 
-    # applies LIFT matrix, output is stored at Gauss nodes
-    gauss_volume_local = gauss_volume_local_threaded[Threads.threadid()]
-    apply_to_each_field(mul_by!(gauss_LIFT), gauss_volume_local, view(cache.flux_face_values, :, e))
-
-    for i in eachindex(gauss_volume_local)
-      du[i, e] = du[i, e] + gauss_volume_local[i]
+        for i in eachindex(gauss_volume_local)
+            du[i, e] = du[i, e] + gauss_volume_local[i]
+        end
     end
-
-  end
 end
 
 @inline function flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh,
                                            have_nonconservative_terms, equations,
                                            volume_flux, dg::DGMultiFluxDiff{<:GaussSBP},
-                                           cache, alpha=true)
-
-  fluxdiff_local = cache.fluxdiff_local_threaded[Threads.threadid()]
-  fill!(fluxdiff_local, zero(eltype(fluxdiff_local)))
-  u_local = view(cache.entropy_projected_u_values, :, element)
-
-  local_flux_differencing!(fluxdiff_local, u_local, element,
-                           have_nonconservative_terms,
-                           volume_flux, has_sparse_operators(dg),
-                           mesh, equations, dg, cache)
-
-  # convert `fluxdiff_local::Vector{<:SVector}` to `rhs_local::StructArray{<:SVector}`
-  # for faster performance when using `apply_to_each_field`.
-  rhs_local = cache.rhs_local_threaded[Threads.threadid()]
-  for i in Base.OneTo(length(fluxdiff_local))
-    rhs_local[i] = fluxdiff_local[i]
-  end
-
-  project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha)
+                                           cache, alpha = true)
+    fluxdiff_local = cache.fluxdiff_local_threaded[Threads.threadid()]
+    fill!(fluxdiff_local, zero(eltype(fluxdiff_local)))
+    u_local = view(cache.entropy_projected_u_values, :, element)
+
+    local_flux_differencing!(fluxdiff_local, u_local, element,
+                             have_nonconservative_terms,
+                             volume_flux, has_sparse_operators(dg),
+                             mesh, equations, dg, cache)
+
+    # convert `fluxdiff_local::Vector{<:SVector}` to `rhs_local::StructArray{<:SVector}`
+    # for faster performance when using `apply_to_each_field`.
+    rhs_local = cache.rhs_local_threaded[Threads.threadid()]
+    for i in Base.OneTo(length(fluxdiff_local))
+        rhs_local[i] = fluxdiff_local[i]
+    end
 
+    project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha)
 end
 
 function project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh::DGMultiMesh,
-                                     dg::DGMulti, cache, alpha=true)
-
-  # Here, we exploit that under a Gauss nodal basis the structure of the projection
-  # matrix `Ph = [diagm(1 ./ wq), projection_matrix_gauss_to_face]` such that
-  # `Ph * [u; uf] = (u ./ wq) + projection_matrix_gauss_to_face * uf`.
-  volume_indices = Base.OneTo(dg.basis.Nq)
-  face_indices = (dg.basis.Nq + 1):(dg.basis.Nq + dg.basis.Nfq)
-  local_volume_flux = view(rhs_local, volume_indices)
-  local_face_flux = view(rhs_local, face_indices)
-
-  # initialize rhs_volume_local = projection_matrix_gauss_to_face * local_face_flux
-  rhs_volume_local = cache.rhs_volume_local_threaded[Threads.threadid()]
-  apply_to_each_field(mul_by!(cache.projection_matrix_gauss_to_face), rhs_volume_local, local_face_flux)
-
-  # accumulate volume contributions at Gauss nodes
-  for i in eachindex(rhs_volume_local)
-    du_local = rhs_volume_local[i] + local_volume_flux[i] * cache.inv_gauss_weights[i]
-    du[i, element] = du[i, element] + alpha * du_local
-  end
+                                     dg::DGMulti, cache, alpha = true)
+
+    # Here, we exploit that under a Gauss nodal basis the structure of the projection
+    # matrix `Ph = [diagm(1 ./ wq), projection_matrix_gauss_to_face]` such that
+    # `Ph * [u; uf] = (u ./ wq) + projection_matrix_gauss_to_face * uf`.
+    volume_indices = Base.OneTo(dg.basis.Nq)
+    face_indices = (dg.basis.Nq + 1):(dg.basis.Nq + dg.basis.Nfq)
+    local_volume_flux = view(rhs_local, volume_indices)
+    local_face_flux = view(rhs_local, face_indices)
+
+    # initialize rhs_volume_local = projection_matrix_gauss_to_face * local_face_flux
+    rhs_volume_local = cache.rhs_volume_local_threaded[Threads.threadid()]
+    apply_to_each_field(mul_by!(cache.projection_matrix_gauss_to_face),
+                        rhs_volume_local, local_face_flux)
+
+    # accumulate volume contributions at Gauss nodes
+    for i in eachindex(rhs_volume_local)
+        du_local = rhs_volume_local[i] +
+                   local_volume_flux[i] * cache.inv_gauss_weights[i]
+        du[i, element] = du[i, element] + alpha * du_local
+    end
 end
 
 function calc_volume_integral!(du, u, mesh::DGMultiMesh,
                                have_nonconservative_terms, equations,
                                volume_integral::VolumeIntegralFluxDifferencing,
                                dg::DGMultiFluxDiff{<:GaussSBP}, cache)
-
-  @threaded for e in eachelement(mesh, dg, cache)
-    flux_differencing_kernel!(du, u, e, mesh,
-                              have_nonconservative_terms, equations,
-                              volume_integral.volume_flux, dg, cache)
-  end
-
+    @threaded for e in eachelement(mesh, dg, cache)
+        flux_differencing_kernel!(du, u, e, mesh,
+                                  have_nonconservative_terms, equations,
+                                  volume_integral.volume_flux, dg, cache)
+    end
 end
 
 # interpolate back to Lobatto nodes after applying the inverse Jacobian at Gauss points
 function invert_jacobian_and_interpolate!(du, mesh::DGMultiMesh, equations,
-                                          dg::DGMultiFluxDiff{<:GaussSBP}, cache; scaling=-1)
-
-  (; interp_matrix_gauss_to_lobatto, rhs_volume_local_threaded, invJ) = cache
-
-  @threaded for e in eachelement(mesh, dg, cache)
-    rhs_volume_local = rhs_volume_local_threaded[Threads.threadid()]
-
-    # At this point, `rhs_volume_local` should still be stored at Gauss points.
-    # We scale it by the inverse Jacobian before transforming back to Lobatto.
-    for i in eachindex(rhs_volume_local)
-      rhs_volume_local[i] = du[i, e] * invJ[i, e] * scaling
+                                          dg::DGMultiFluxDiff{<:GaussSBP}, cache;
+                                          scaling = -1)
+    (; interp_matrix_gauss_to_lobatto, rhs_volume_local_threaded, invJ) = cache
+
+    @threaded for e in eachelement(mesh, dg, cache)
+        rhs_volume_local = rhs_volume_local_threaded[Threads.threadid()]
+
+        # At this point, `rhs_volume_local` should still be stored at Gauss points.
+        # We scale it by the inverse Jacobian before transforming back to Lobatto.
+        for i in eachindex(rhs_volume_local)
+            rhs_volume_local[i] = du[i, e] * invJ[i, e] * scaling
+        end
+
+        # Interpolate result back to Lobatto nodes for ease of analysis, visualization
+        apply_to_each_field(mul_by!(interp_matrix_gauss_to_lobatto),
+                            view(du, :, e), rhs_volume_local)
     end
-
-    # Interpolate result back to Lobatto nodes for ease of analysis, visualization
-    apply_to_each_field(mul_by!(interp_matrix_gauss_to_lobatto),
-                        view(du, :, e), rhs_volume_local)
-  end
-
 end
 
 # Specialize RHS so that we can call `invert_jacobian_and_interpolate!` instead of just `invert_jacobian!`,
 # since `invert_jacobian!` is also used in other places (e.g., parabolic terms).
 function rhs!(du, u, t, mesh, equations, initial_condition, boundary_conditions::BC,
-              source_terms::Source, dg::DGMultiFluxDiff{<:GaussSBP}, cache) where {Source, BC}
-
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # this function evaluates the solution at volume and face quadrature points (which was previously
-  # done in `prolong2interfaces` and `calc_volume_integral`)
-  @trixi_timeit timer() "entropy_projection!" entropy_projection!(cache, u, mesh, equations, dg)
+              source_terms::Source, dg::DGMultiFluxDiff{<:GaussSBP},
+              cache) where {Source, BC}
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # this function evaluates the solution at volume and face quadrature points (which was previously
+    # done in `prolong2interfaces` and `calc_volume_integral`)
+    @trixi_timeit timer() "entropy_projection!" begin
+        entropy_projection!(cache, u, mesh, equations, dg)
+    end
 
-  # `du` is stored at Gauss nodes here
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh, have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
+    # `du` is stored at Gauss nodes here
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, u, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.volume_integral, dg, cache)
+    end
 
-  # the following functions are the same as in VolumeIntegralWeakForm, and can be reused from dg.jl
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(cache, dg.surface_integral, mesh,
-                                                              have_nonconservative_terms(equations),
-                                                              equations, dg)
+    # the following functions are the same as in VolumeIntegralWeakForm, and can be reused from dg.jl
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache, dg.surface_integral, mesh,
+                             have_nonconservative_terms(equations), equations, dg)
+    end
 
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(cache, t, boundary_conditions, mesh,
-                                                            have_nonconservative_terms(equations), equations, dg)
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux!(cache, t, boundary_conditions, mesh,
+                            have_nonconservative_terms(equations), equations, dg)
+    end
 
-  # `du` is stored at Gauss nodes here
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(du, u, mesh, equations,
-                                                                  dg.surface_integral, dg, cache)
+    # `du` is stored at Gauss nodes here
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations,
+                               dg.surface_integral, dg, cache)
+    end
 
-  # invert Jacobian and map `du` from Gauss to Lobatto nodes
-  @trixi_timeit timer() "Jacobian" invert_jacobian_and_interpolate!(du, mesh, equations, dg, cache)
+    # invert Jacobian and map `du` from Gauss to Lobatto nodes
+    @trixi_timeit timer() "Jacobian" begin
+        invert_jacobian_and_interpolate!(du, mesh, equations, dg, cache)
+    end
 
-  @trixi_timeit timer() "source terms" calc_sources!(du, u, t, source_terms,
-                                                     mesh, equations, dg, cache)
+    @trixi_timeit timer() "source terms" begin
+        calc_sources!(du, u, t, source_terms, mesh, equations, dg, cache)
+    end
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgmulti/sbp.jl b/src/solvers/dgmulti/sbp.jl
index 18be52b7ba3..ba02d812041 100644
--- a/src/solvers/dgmulti/sbp.jl
+++ b/src/solvers/dgmulti/sbp.jl
@@ -18,13 +18,13 @@ and
 """
 function DGMulti(approximation_type::AbstractDerivativeOperator;
                  element_type::AbstractElemShape,
-                 surface_flux=flux_central,
-                 surface_integral=SurfaceIntegralWeakForm(surface_flux),
-                 volume_integral=VolumeIntegralWeakForm(),
+                 surface_flux = flux_central,
+                 surface_integral = SurfaceIntegralWeakForm(surface_flux),
+                 volume_integral = VolumeIntegralWeakForm(),
                  kwargs...)
-
-  rd = RefElemData(element_type, approximation_type; kwargs...)
-  return DG(rd, nothing #= mortar =#, surface_integral, volume_integral)
+    rd = RefElemData(element_type, approximation_type; kwargs...)
+    # `nothing` is passed as `mortar`
+    return DG(rd, nothing, surface_integral, volume_integral)
 end
 
 function DGMulti(element_type::AbstractElemShape,
@@ -32,307 +32,340 @@ function DGMulti(element_type::AbstractElemShape,
                  volume_integral,
                  surface_integral;
                  kwargs...)
-
-  DGMulti(approximation_type, element_type=element_type,
-          surface_integral=surface_integral, volume_integral=volume_integral)
+    DGMulti(approximation_type, element_type = element_type,
+            surface_integral = surface_integral, volume_integral = volume_integral)
 end
 
+function construct_1d_operators(D::AbstractDerivativeOperator, tol)
+    nodes_1d = collect(grid(D))
+    M = SummationByPartsOperators.mass_matrix(D)
+    if M isa UniformScaling
+        weights_1d = M * ones(Bool, length(nodes_1d))
+    else
+        weights_1d = diag(M)
+    end
+
+    # StartUpDG assumes nodes from -1 to +1. Thus, we need to re-scale everything.
+    # We can adjust the grid spacing as follows.
+    xmin = SummationByPartsOperators.xmin(D)
+    xmax = SummationByPartsOperators.xmax(D)
+    factor = 2 / (xmax - xmin)
+    @. nodes_1d = factor * (nodes_1d - xmin) - 1
+    @. weights_1d = factor * weights_1d
 
+    D_1d = droptol!(inv(factor) * sparse(D), tol)
+    I_1d = Diagonal(ones(Bool, length(nodes_1d)))
 
-function construct_1d_operators(D::AbstractDerivativeOperator, tol)
-  nodes_1d = collect(grid(D))
-  M = SummationByPartsOperators.mass_matrix(D)
-  if M isa UniformScaling
-    weights_1d = M * ones(Bool, length(nodes_1d))
-  else
-    weights_1d = diag(M)
-  end
-
-  # StartUpDG assumes nodes from -1 to +1. Thus, we need to re-scale everything.
-  # We can adjust the grid spacing as follows.
-  xmin = SummationByPartsOperators.xmin(D)
-  xmax = SummationByPartsOperators.xmax(D)
-  factor = 2 / (xmax - xmin)
-  @. nodes_1d = factor * (nodes_1d - xmin) - 1
-  @. weights_1d = factor * weights_1d
-
-  D_1d = droptol!(inv(factor) * sparse(D), tol)
-  I_1d = Diagonal(ones(Bool, length(nodes_1d)))
-
-  return nodes_1d, weights_1d, D_1d, I_1d
+    return nodes_1d, weights_1d, D_1d, I_1d
 end
 
-
 function StartUpDG.RefElemData(element_type::Line,
                                D::AbstractDerivativeOperator;
-                               tol = 100*eps())
-
-  approximation_type = D
-  N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree
-
-  # 1D operators
-  nodes_1d, weights_1d, D_1d = construct_1d_operators(D, tol)
-
-  # volume
-  rq = r = nodes_1d
-  wq = weights_1d
-  Dr = D_1d
-  M = Diagonal(wq)
-  Pq = LinearAlgebra.I
-  Vq = LinearAlgebra.I
-
-  VDM = nothing # unused generalized Vandermonde matrix
-
-  rst = (r,)
-  rstq = (rq,)
-  Drst = (Dr,)
-
-  # face
-  face_vertices = StartUpDG.face_vertices(element_type)
-  face_mask = [1, length(nodes_1d)]
-
-  rf = [-1.0; 1.0]
-  nrJ = [-1.0; 1.0]
-  wf = [1.0; 1.0]
-  if D isa AbstractPeriodicDerivativeOperator
-    # we do not need any face stuff for periodic operators
-    Vf = spzeros(length(wf), length(wq))
-  else
-    Vf = sparse([1, 2], [1, length(nodes_1d)], [1.0, 1.0])
-  end
-  LIFT = Diagonal(wq) \ (Vf' * Diagonal(wf))
-
-  rstf = (rf,)
-  nrstJ = (nrJ,)
-
-  # low order interpolation nodes
-  r1 = StartUpDG.nodes(element_type, 1)
-  V1 = StartUpDG.vandermonde(element_type, 1, r) / StartUpDG.vandermonde(element_type, 1, r1)
-
-  return RefElemData(
-    element_type, approximation_type, N,
-    face_vertices, V1,
-    rst, VDM, face_mask,
-    rst, LinearAlgebra.I, # plotting
-    rstq, wq, Vq, # quadrature
-    rstf, wf, Vf, nrstJ, # faces
-    M, Pq, Drst, LIFT)
+                               tol = 100 * eps())
+    approximation_type = D
+    N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree
+
+    # 1D operators
+    nodes_1d, weights_1d, D_1d = construct_1d_operators(D, tol)
+
+    # volume
+    rq = r = nodes_1d
+    wq = weights_1d
+    Dr = D_1d
+    M = Diagonal(wq)
+    Pq = LinearAlgebra.I
+    Vq = LinearAlgebra.I
+
+    VDM = nothing # unused generalized Vandermonde matrix
+
+    rst = (r,)
+    rstq = (rq,)
+    Drst = (Dr,)
+
+    # face
+    face_vertices = StartUpDG.face_vertices(element_type)
+    face_mask = [1, length(nodes_1d)]
+
+    rf = [-1.0; 1.0]
+    nrJ = [-1.0; 1.0]
+    wf = [1.0; 1.0]
+    if D isa AbstractPeriodicDerivativeOperator
+        # we do not need any face stuff for periodic operators
+        Vf = spzeros(length(wf), length(wq))
+    else
+        Vf = sparse([1, 2], [1, length(nodes_1d)], [1.0, 1.0])
+    end
+    LIFT = Diagonal(wq) \ (Vf' * Diagonal(wf))
+
+    rstf = (rf,)
+    nrstJ = (nrJ,)
+
+    # low order interpolation nodes
+    r1 = StartUpDG.nodes(element_type, 1)
+    V1 = StartUpDG.vandermonde(element_type, 1, r) /
+         StartUpDG.vandermonde(element_type, 1, r1)
+
+    return RefElemData(element_type, approximation_type, N,
+                       face_vertices, V1,
+                       rst, VDM, face_mask,
+                       rst, LinearAlgebra.I, # plotting
+                       rstq, wq, Vq, # quadrature
+                       rstf, wf, Vf, nrstJ, # faces
+                       M, Pq, Drst, LIFT)
 end
 
-
 function StartUpDG.RefElemData(element_type::Quad,
                                D::AbstractDerivativeOperator;
-                               tol = 100*eps())
-
-  approximation_type = D
-  N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree
-
-  # 1D operators
-  nodes_1d, weights_1d, D_1d, I_1d = construct_1d_operators(D, tol)
-
-  # volume
-  s, r = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d)) # this is to match
-                                                          # ordering of nrstJ
-  rq = r; sq = s
-  wr, ws = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d))
-  wq = wr .* ws
-  Dr = kron(I_1d, D_1d)
-  Ds = kron(D_1d, I_1d)
-  M = Diagonal(wq)
-  Pq = LinearAlgebra.I
-  Vq = LinearAlgebra.I
-
-  VDM = nothing # unused generalized Vandermonde matrix
-
-  rst = (r, s)
-  rstq = (rq, sq)
-  Drst = (Dr, Ds)
-
-  # face
-  face_vertices = StartUpDG.face_vertices(element_type)
-  face_mask = vcat(StartUpDG.find_face_nodes(element_type, r, s)...)
-
-  rf, sf, wf, nrJ, nsJ = StartUpDG.init_face_data(element_type,
-    quad_rule_face=(nodes_1d, weights_1d))
-  if D isa AbstractPeriodicDerivativeOperator
-    # we do not need any face stuff for periodic operators
-    Vf = spzeros(length(wf), length(wq))
-  else
-    Vf = sparse(eachindex(face_mask), face_mask, ones(Bool, length(face_mask)))
-  end
-  LIFT = Diagonal(wq) \ (Vf' * Diagonal(wf))
-
-  rstf = (rf, sf)
-  nrstJ = (nrJ, nsJ)
-
-  # low order interpolation nodes
-  r1, s1 = StartUpDG.nodes(element_type, 1)
-  V1 = StartUpDG.vandermonde(element_type, 1, r, s) / StartUpDG.vandermonde(element_type, 1, r1, s1)
-
-  return RefElemData(
-    element_type, approximation_type, N,
-    face_vertices, V1,
-    rst, VDM, face_mask,
-    rst, LinearAlgebra.I, # plotting
-    rstq, wq, Vq, # quadrature
-    rstf, wf, Vf, nrstJ, # faces
-    M, Pq, Drst, LIFT)
+                               tol = 100 * eps())
+    approximation_type = D
+    N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree
+
+    # 1D operators
+    nodes_1d, weights_1d, D_1d, I_1d = construct_1d_operators(D, tol)
+
+    # volume
+    s, r = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d)) # this is to match
+    # ordering of nrstJ
+    rq = r
+    sq = s
+    wr, ws = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d))
+    wq = wr .* ws
+    Dr = kron(I_1d, D_1d)
+    Ds = kron(D_1d, I_1d)
+    M = Diagonal(wq)
+    Pq = LinearAlgebra.I
+    Vq = LinearAlgebra.I
+
+    VDM = nothing # unused generalized Vandermonde matrix
+
+    rst = (r, s)
+    rstq = (rq, sq)
+    Drst = (Dr, Ds)
+
+    # face
+    face_vertices = StartUpDG.face_vertices(element_type)
+    face_mask = vcat(StartUpDG.find_face_nodes(element_type, r, s)...)
+
+    rf, sf, wf, nrJ, nsJ = StartUpDG.init_face_data(element_type,
+                                                    quad_rule_face = (nodes_1d, weights_1d))
+    if D isa AbstractPeriodicDerivativeOperator
+        # we do not need any face stuff for periodic operators
+        Vf = spzeros(length(wf), length(wq))
+    else
+        Vf = sparse(eachindex(face_mask), face_mask, ones(Bool, length(face_mask)))
+    end
+    LIFT = Diagonal(wq) \ (Vf' * Diagonal(wf))
+
+    rstf = (rf, sf)
+    nrstJ = (nrJ, nsJ)
+
+    # low order interpolation nodes
+    r1, s1 = StartUpDG.nodes(element_type, 1)
+    V1 = StartUpDG.vandermonde(element_type, 1, r, s) /
+         StartUpDG.vandermonde(element_type, 1, r1, s1)
+
+    return RefElemData(element_type, approximation_type, N,
+                       face_vertices, V1,
+                       rst, VDM, face_mask,
+                       rst, LinearAlgebra.I, # plotting
+                       rstq, wq, Vq, # quadrature
+                       rstf, wf, Vf, nrstJ, # faces
+                       M, Pq, Drst, LIFT)
 end
 
-
 function StartUpDG.RefElemData(element_type::Hex,
                                D::AbstractDerivativeOperator;
-                               tol = 100*eps())
-
-  approximation_type = D
-  N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree
-
-  # 1D operators
-  nodes_1d, weights_1d, D_1d, I_1d = construct_1d_operators(D, tol)
-
-  # volume
-  # to match ordering of nrstJ
-  s, r, t = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d, nodes_1d, nodes_1d))
-  rq = r; sq = s; tq = t
-  wr, ws, wt = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d, weights_1d, weights_1d))
-  wq = wr .* ws .* wt
-  Dr = kron(I_1d, I_1d, D_1d)
-  Ds = kron(I_1d, D_1d, I_1d)
-  Dt = kron(D_1d, I_1d, I_1d)
-  M = Diagonal(wq)
-  Pq = LinearAlgebra.I
-  Vq = LinearAlgebra.I
-
-  VDM = nothing # unused generalized Vandermonde matrix
-
-  rst = (r, s, t)
-  rstq = (rq, sq, tq)
-  Drst = (Dr, Ds, Dt)
-
-  # face
-  face_vertices = StartUpDG.face_vertices(element_type)
-  face_mask = vcat(StartUpDG.find_face_nodes(element_type, r, s, t)...)
-
-  rf, sf, tf, wf, nrJ, nsJ, ntJ = let
-    rf, sf = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d, nodes_1d))
-    wr, ws = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d, weights_1d))
-    wf = wr .* ws
-    StartUpDG.init_face_data(element_type, quad_rule_face=(rf, sf, wf))
-  end
-  Vf = sparse(eachindex(face_mask), face_mask, ones(Bool, length(face_mask)))
-  LIFT = Diagonal(wq) \ (Vf' * Diagonal(wf))
-
-  rstf = (rf, sf, tf)
-  nrstJ = (nrJ, nsJ, ntJ)
-
-  # low order interpolation nodes
-  r1, s1, t1 = StartUpDG.nodes(element_type, 1)
-  V1 = StartUpDG.vandermonde(element_type, 1, r, s, t) / StartUpDG.vandermonde(element_type, 1, r1, s1, t1)
-
-  return RefElemData(
-    element_type, approximation_type, N,
-    face_vertices, V1,
-    rst, VDM, face_mask,
-    rst, LinearAlgebra.I, # plotting
-    rstq, wq, Vq, # quadrature
-    rstf, wf, Vf, nrstJ, # faces
-    M, Pq, Drst, LIFT)
+                               tol = 100 * eps())
+    approximation_type = D
+    N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree
+
+    # 1D operators
+    nodes_1d, weights_1d, D_1d, I_1d = construct_1d_operators(D, tol)
+
+    # volume
+    # to match ordering of nrstJ
+    s, r, t = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d, nodes_1d, nodes_1d))
+    rq = r
+    sq = s
+    tq = t
+    wr, ws, wt = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d, weights_1d, weights_1d))
+    wq = wr .* ws .* wt
+    Dr = kron(I_1d, I_1d, D_1d)
+    Ds = kron(I_1d, D_1d, I_1d)
+    Dt = kron(D_1d, I_1d, I_1d)
+    M = Diagonal(wq)
+    Pq = LinearAlgebra.I
+    Vq = LinearAlgebra.I
+
+    VDM = nothing # unused generalized Vandermonde matrix
+
+    rst = (r, s, t)
+    rstq = (rq, sq, tq)
+    Drst = (Dr, Ds, Dt)
+
+    # face
+    face_vertices = StartUpDG.face_vertices(element_type)
+    face_mask = vcat(StartUpDG.find_face_nodes(element_type, r, s, t)...)
+
+    rf, sf, tf, wf, nrJ, nsJ, ntJ = let
+        rf, sf = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d, nodes_1d))
+        wr, ws = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d, weights_1d))
+        wf = wr .* ws
+        StartUpDG.init_face_data(element_type, quad_rule_face = (rf, sf, wf))
+    end
+    Vf = sparse(eachindex(face_mask), face_mask, ones(Bool, length(face_mask)))
+    LIFT = Diagonal(wq) \ (Vf' * Diagonal(wf))
+
+    rstf = (rf, sf, tf)
+    nrstJ = (nrJ, nsJ, ntJ)
+
+    # low order interpolation nodes
+    r1, s1, t1 = StartUpDG.nodes(element_type, 1)
+    V1 = StartUpDG.vandermonde(element_type, 1, r, s, t) /
+         StartUpDG.vandermonde(element_type, 1, r1, s1, t1)
+
+    return RefElemData(element_type, approximation_type, N,
+                       face_vertices, V1,
+                       rst, VDM, face_mask,
+                       rst, LinearAlgebra.I, # plotting
+                       rstq, wq, Vq, # quadrature
+                       rstf, wf, Vf, nrstJ, # faces
+                       M, Pq, Drst, LIFT)
 end
 
 # specialized Hex constructor in 3D to reduce memory usage.
 function StartUpDG.RefElemData(element_type::Hex,
                                D::AbstractPeriodicDerivativeOperator;
-                               tol = 100*eps())
-
-  approximation_type = D
-  N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree
-
-  # 1D operators
-  nodes_1d, weights_1d, D_1d, I_1d = construct_1d_operators(D, tol)
-
-  # volume
-  # to match ordering of nrstJ
-  s, r, t = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d, nodes_1d, nodes_1d))
-  rq = r; sq = s; tq = t
-  wr, ws, wt = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d, weights_1d, weights_1d))
-  wq = wr .* ws .* wt
-  Dr = kron(I_1d, I_1d, D_1d)
-  Ds = kron(I_1d, D_1d, I_1d)
-  Dt = kron(D_1d, I_1d, I_1d)
-  M = Diagonal(wq)
-  Pq = LinearAlgebra.I
-  Vq = LinearAlgebra.I
-
-  VDM = nothing # unused generalized Vandermonde matrix
-
-  rst = (r, s, t)
-  rstq = (rq, sq, tq)
-  Drst = (Dr, Ds, Dt)
-
-  # face
-  # We do not need any face data for periodic operators. Thus, we just
-  # pass `nothing` to save memory.
-  face_vertices = ntuple(_ -> nothing, 3)
-  face_mask = nothing
-  wf = nothing
-  rstf = ntuple(_ -> nothing, 3)
-  nrstJ = ntuple(_ -> nothing, 3)
-  Vf = nothing
-  LIFT = nothing
-
-  # low order interpolation nodes
-  V1 = nothing # do not need to store V1, since we specialize StartUpDG.MeshData to avoid using it.
-
-  return RefElemData(
-    element_type, approximation_type, N,
-    face_vertices, V1,
-    rst, VDM, face_mask,
-    rst, LinearAlgebra.I, # plotting
-    rstq, wq, Vq, # quadrature
-    rstf, wf, Vf, nrstJ, # faces
-    M, Pq, Drst, LIFT)
+                               tol = 100 * eps())
+    approximation_type = D
+    N = SummationByPartsOperators.accuracy_order(D) # kind of polynomial degree
+
+    # 1D operators
+    nodes_1d, weights_1d, D_1d, I_1d = construct_1d_operators(D, tol)
+
+    # volume
+    # to match ordering of nrstJ
+    s, r, t = vec.(StartUpDG.NodesAndModes.meshgrid(nodes_1d, nodes_1d, nodes_1d))
+    rq = r
+    sq = s
+    tq = t
+    wr, ws, wt = vec.(StartUpDG.NodesAndModes.meshgrid(weights_1d, weights_1d, weights_1d))
+    wq = wr .* ws .* wt
+    Dr = kron(I_1d, I_1d, D_1d)
+    Ds = kron(I_1d, D_1d, I_1d)
+    Dt = kron(D_1d, I_1d, I_1d)
+    M = Diagonal(wq)
+    Pq = LinearAlgebra.I
+    Vq = LinearAlgebra.I
+
+    VDM = nothing # unused generalized Vandermonde matrix
+
+    rst = (r, s, t)
+    rstq = (rq, sq, tq)
+    Drst = (Dr, Ds, Dt)
+
+    # face
+    # We do not need any face data for periodic operators. Thus, we just
+    # pass `nothing` to save memory.
+    face_vertices = ntuple(_ -> nothing, 3)
+    face_mask = nothing
+    wf = nothing
+    rstf = ntuple(_ -> nothing, 3)
+    nrstJ = ntuple(_ -> nothing, 3)
+    Vf = nothing
+    LIFT = nothing
+
+    # low order interpolation nodes
+    V1 = nothing # do not need to store V1, since we specialize StartUpDG.MeshData to avoid using it.
+
+    return RefElemData(element_type, approximation_type, N,
+                       face_vertices, V1,
+                       rst, VDM, face_mask,
+                       rst, LinearAlgebra.I, # plotting
+                       rstq, wq, Vq, # quadrature
+                       rstf, wf, Vf, nrstJ, # faces
+                       M, Pq, Drst, LIFT)
 end
 
-
-function Base.show(io::IO, mime::MIME"text/plain", rd::RefElemData{NDIMS, ElementType, ApproximationType}) where {NDIMS, ElementType<:StartUpDG.AbstractElemShape, ApproximationType<:AbstractDerivativeOperator}
-  @nospecialize rd
-  print(io, "RefElemData for an approximation using an ")
-  show(IOContext(io, :compact => true), rd.approximation_type)
-  print(io, " on $(rd.element_type) element")
+function Base.show(io::IO, mime::MIME"text/plain",
+                   rd::RefElemData{NDIMS, ElementType, ApproximationType}) where {NDIMS,
+                                                                                  ElementType <:
+                                                                                  StartUpDG.AbstractElemShape,
+                                                                                  ApproximationType <:
+                                                                                  AbstractDerivativeOperator
+                                                                                  }
+    @nospecialize rd
+    print(io, "RefElemData for an approximation using an ")
+    show(IOContext(io, :compact => true), rd.approximation_type)
+    print(io, " on $(rd.element_type) element")
 end
 
-function Base.show(io::IO, rd::RefElemData{NDIMS, ElementType, ApproximationType}) where {NDIMS, ElementType<:StartUpDG.AbstractElemShape, ApproximationType<:AbstractDerivativeOperator}
-  @nospecialize rd
-  print(io, "RefElemData{", summary(rd.approximation_type), ", ", rd.element_type, "}")
+function Base.show(io::IO,
+                   rd::RefElemData{NDIMS, ElementType, ApproximationType}) where {NDIMS,
+                                                                                  ElementType <:
+                                                                                  StartUpDG.AbstractElemShape,
+                                                                                  ApproximationType <:
+                                                                                  AbstractDerivativeOperator
+                                                                                  }
+    @nospecialize rd
+    print(io, "RefElemData{", summary(rd.approximation_type), ", ", rd.element_type, "}")
 end
 
-function StartUpDG.inverse_trace_constant(rd::RefElemData{NDIMS, ElementType, ApproximationType})  where {NDIMS, ElementType<:Union{Line, Quad, Hex}, ApproximationType<:AbstractDerivativeOperator}
-  D = rd.approximation_type
-
-  # the inverse trace constant is the maximum eigenvalue corresponding to
-  #       M_f * v = λ * M * v
-  # where M_f is the face mass matrix and M is the volume mass matrix.
-  # Since M is diagonal and since M_f is just the boundary "mask" matrix
-  # (which extracts the first and last entries of a vector), the maximum
-  # eigenvalue is the inverse of the first or last mass matrix diagonal.
-  left_weight = SummationByPartsOperators.left_boundary_weight(D)
-  right_weight = SummationByPartsOperators.right_boundary_weight(D)
-  max_eigenvalue = max(inv(left_weight), inv(right_weight))
-
-  # For tensor product elements, the trace constant for higher dimensional
-  # elements is the one-dimensional trace constant multiplied by `NDIMS`. See
-  #     "GPU-accelerated discontinuous Galerkin methods on hybrid meshes."
-  #     Chan, Jesse, et al (2016), https://doi.org/10.1016/j.jcp.2016.04.003
-  # for more details (specifically, Appendix A.1, Theorem A.4).
-  return NDIMS * max_eigenvalue
+function StartUpDG.inverse_trace_constant(rd::RefElemData{NDIMS, ElementType,
+                                                          ApproximationType}) where {NDIMS,
+                                                                                     ElementType <:
+                                                                                     Union{
+                                                                                           Line,
+                                                                                           Quad,
+                                                                                           Hex
+                                                                                           },
+                                                                                     ApproximationType <:
+                                                                                     AbstractDerivativeOperator
+                                                                                     }
+    D = rd.approximation_type
+
+    # the inverse trace constant is the maximum eigenvalue corresponding to
+    #       M_f * v = λ * M * v
+    # where M_f is the face mass matrix and M is the volume mass matrix.
+    # Since M is diagonal and since M_f is just the boundary "mask" matrix
+    # (which extracts the first and last entries of a vector), the maximum
+    # eigenvalue is the inverse of the first or last mass matrix diagonal.
+    left_weight = SummationByPartsOperators.left_boundary_weight(D)
+    right_weight = SummationByPartsOperators.right_boundary_weight(D)
+    max_eigenvalue = max(inv(left_weight), inv(right_weight))
+
+    # For tensor product elements, the trace constant for higher dimensional
+    # elements is the one-dimensional trace constant multiplied by `NDIMS`. See
+    #     "GPU-accelerated discontinuous Galerkin methods on hybrid meshes."
+    #     Chan, Jesse, et al (2016), https://doi.org/10.1016/j.jcp.2016.04.003
+    # for more details (specifically, Appendix A.1, Theorem A.4).
+    return NDIMS * max_eigenvalue
 end
 
 # type alias for specializing on a periodic SBP operator
-const DGMultiPeriodicFDSBP{NDIMS, ApproxType, ElemType} =
-  DGMulti{NDIMS, ElemType, ApproxType, SurfaceIntegral, VolumeIntegral} where {NDIMS, ElemType, ApproxType<:SummationByPartsOperators.AbstractPeriodicDerivativeOperator, SurfaceIntegral, VolumeIntegral}
-
-const DGMultiFluxDiffPeriodicFDSBP{NDIMS, ApproxType, ElemType} =
-  DGMulti{NDIMS, ElemType, ApproxType, SurfaceIntegral, VolumeIntegral} where {NDIMS, ElemType, ApproxType<:SummationByPartsOperators.AbstractPeriodicDerivativeOperator, SurfaceIntegral<:SurfaceIntegralWeakForm, VolumeIntegral<:VolumeIntegralFluxDifferencing}
+const DGMultiPeriodicFDSBP{NDIMS, ApproxType, ElemType} = DGMulti{NDIMS, ElemType,
+                                                                  ApproxType,
+                                                                  SurfaceIntegral,
+                                                                  VolumeIntegral
+                                                                  } where {NDIMS, ElemType,
+                                                                           ApproxType <:
+                                                                           SummationByPartsOperators.AbstractPeriodicDerivativeOperator,
+                                                                           SurfaceIntegral,
+                                                                           VolumeIntegral}
+
+const DGMultiFluxDiffPeriodicFDSBP{NDIMS, ApproxType, ElemType} = DGMulti{NDIMS, ElemType,
+                                                                          ApproxType,
+                                                                          SurfaceIntegral,
+                                                                          VolumeIntegral
+                                                                          } where {NDIMS,
+                                                                                   ElemType,
+                                                                                   ApproxType <:
+                                                                                   SummationByPartsOperators.AbstractPeriodicDerivativeOperator,
+                                                                                   SurfaceIntegral <:
+                                                                                   SurfaceIntegralWeakForm,
+                                                                                   VolumeIntegral <:
+                                                                                   VolumeIntegralFluxDifferencing
+                                                                                   }
 
 """
     DGMultiMesh(dg::DGMulti)
@@ -342,71 +375,72 @@ a DGMulti with `approximation_type` set to a periodic (finite difference) SBP op
 SummationByPartsOperators.jl.
 """
 function DGMultiMesh(dg::DGMultiPeriodicFDSBP{NDIMS};
-                     coordinates_min=ntuple(_ -> -one(real(dg)), NDIMS),
-                     coordinates_max=ntuple(_ -> one(real(dg)), NDIMS)) where {NDIMS}
-
-  rd = dg.basis
-
-  e = Ones{eltype(rd.r)}(size(rd.r))
-  z = Zeros{eltype(rd.r)}(size(rd.r))
-
-  VXYZ = ntuple(_ -> [], NDIMS)
-  EToV = NaN # StartUpDG.jl uses size(EToV, 1) for the number of elements, this lets us reuse that.
-  FToF = []
-
-  # We need to scale the domain from `[-1, 1]^NDIMS` (default in StartUpDG.jl)
-  # to the given `coordinates_min, coordinates_max`
-  xyz = xyzq = map(copy, rd.rst)
-  for dim in 1:NDIMS
-    factor = (coordinates_max[dim] - coordinates_min[dim]) / 2
-    @. xyz[dim] = factor * (xyz[dim] + 1) + coordinates_min[dim]
-  end
-  xyzf = ntuple(_ -> [], NDIMS)
-  wJq = diag(rd.M)
-
-  # arrays of connectivity indices between face nodes
-  mapM = mapP = mapB = []
-
-  # volume geofacs Gij = dx_i/dxhat_j
-  coord_diffs = coordinates_max .- coordinates_min
-
-  J_scalar = prod(coord_diffs) / 2^NDIMS
-  J = e * J_scalar
-
-  if NDIMS == 1
-    rxJ = J_scalar * 2 / coord_diffs[1]
-    rstxyzJ = @SMatrix [rxJ * e]
-  elseif NDIMS == 2
-    rxJ = J_scalar * 2 / coord_diffs[1]
-    syJ = J_scalar * 2 / coord_diffs[2]
-    rstxyzJ = @SMatrix [rxJ * e z; z syJ * e]
-  elseif NDIMS == 3
-    rxJ = J_scalar * 2 / coord_diffs[1]
-    syJ = J_scalar * 2 / coord_diffs[2]
-    tzJ = J_scalar * 2 / coord_diffs[3]
-    rstxyzJ = @SMatrix [rxJ * e z z; z syJ * e z; z z tzJ * e]
-  end
-
-  # surface geofacs
-  nxyzJ = ntuple(_ -> [], NDIMS)
-  Jf = []
-
-  periodicity = ntuple(_ -> true, NDIMS)
-
-  if NDIMS == 1
-    mesh_type = Line()
-  elseif NDIMS == 2
-    mesh_type = Quad()
-  elseif NDIMS == 3
-    mesh_type = Hex()
-  end
-
-  md = MeshData(StartUpDG.VertexMappedMesh(mesh_type, VXYZ, EToV), FToF, xyz, xyzf, xyzq, wJq,
-                mapM, mapP, mapB, rstxyzJ, J, nxyzJ, Jf,
-                periodicity)
-
-  boundary_faces = []
-  return DGMultiMesh{NDIMS, rd.element_type, typeof(md), typeof(boundary_faces)}(md, boundary_faces)
+                     coordinates_min = ntuple(_ -> -one(real(dg)), NDIMS),
+                     coordinates_max = ntuple(_ -> one(real(dg)), NDIMS)) where {NDIMS}
+    rd = dg.basis
+
+    e = Ones{eltype(rd.r)}(size(rd.r))
+    z = Zeros{eltype(rd.r)}(size(rd.r))
+
+    VXYZ = ntuple(_ -> [], NDIMS)
+    EToV = NaN # StartUpDG.jl uses size(EToV, 1) for the number of elements, this lets us reuse that.
+    FToF = []
+
+    # We need to scale the domain from `[-1, 1]^NDIMS` (default in StartUpDG.jl)
+    # to the given `coordinates_min, coordinates_max`
+    xyz = xyzq = map(copy, rd.rst)
+    for dim in 1:NDIMS
+        factor = (coordinates_max[dim] - coordinates_min[dim]) / 2
+        @. xyz[dim] = factor * (xyz[dim] + 1) + coordinates_min[dim]
+    end
+    xyzf = ntuple(_ -> [], NDIMS)
+    wJq = diag(rd.M)
+
+    # arrays of connectivity indices between face nodes
+    mapM = mapP = mapB = []
+
+    # volume geofacs Gij = dx_i/dxhat_j
+    coord_diffs = coordinates_max .- coordinates_min
+
+    J_scalar = prod(coord_diffs) / 2^NDIMS
+    J = e * J_scalar
+
+    if NDIMS == 1
+        rxJ = J_scalar * 2 / coord_diffs[1]
+        rstxyzJ = @SMatrix [rxJ * e]
+    elseif NDIMS == 2
+        rxJ = J_scalar * 2 / coord_diffs[1]
+        syJ = J_scalar * 2 / coord_diffs[2]
+        rstxyzJ = @SMatrix [rxJ*e z; z syJ*e]
+    elseif NDIMS == 3
+        rxJ = J_scalar * 2 / coord_diffs[1]
+        syJ = J_scalar * 2 / coord_diffs[2]
+        tzJ = J_scalar * 2 / coord_diffs[3]
+        rstxyzJ = @SMatrix [rxJ*e z z; z syJ*e z; z z tzJ*e]
+    end
+
+    # surface geofacs
+    nxyzJ = ntuple(_ -> [], NDIMS)
+    Jf = []
+
+    periodicity = ntuple(_ -> true, NDIMS)
+
+    if NDIMS == 1
+        mesh_type = Line()
+    elseif NDIMS == 2
+        mesh_type = Quad()
+    elseif NDIMS == 3
+        mesh_type = Hex()
+    end
+
+    md = MeshData(StartUpDG.VertexMappedMesh(mesh_type, VXYZ, EToV), FToF, xyz, xyzf, xyzq,
+                  wJq,
+                  mapM, mapP, mapB, rstxyzJ, J, nxyzJ, Jf,
+                  periodicity)
+
+    boundary_faces = []
+    return DGMultiMesh{NDIMS, rd.element_type, typeof(md), typeof(boundary_faces)}(md,
+                                                                                   boundary_faces)
 end
 
 # By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
@@ -414,64 +448,70 @@ end
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 # This is used in `estimate_dt`. `estimate_h` uses that `Jf / J = O(h^{NDIMS-1}) / O(h^{NDIMS}) = O(1/h)`.
 # However, since we do not initialize `Jf` for periodic FDSBP operators, we specialize `estimate_h`
 # based on the reference grid provided by SummationByPartsOperators.jl and information about the domain size
 # provided by `md::MeshData``.
-function StartUpDG.estimate_h(e, rd::RefElemData{NDIMS, ElementType, ApproximationType}, md::MeshData)  where {NDIMS, ElementType<:StartUpDG.AbstractElemShape, ApproximationType<:SummationByPartsOperators.AbstractPeriodicDerivativeOperator}
-  D = rd.approximation_type
-  x = grid(D)
-
-  # we assume all SummationByPartsOperators.jl reference grids are rescaled to [-1, 1]
-  xmin = SummationByPartsOperators.xmin(D)
-  xmax = SummationByPartsOperators.xmax(D)
-  factor = 2 / (xmax - xmin)
-
-  # If the domain has size L^NDIMS, then `minimum(md.J)^(1 / NDIMS) = L`.
-  # WARNING: this is not a good estimate on anisotropic grids.
-  return minimum(diff(x)) * factor * minimum(md.J)^(1 / NDIMS)
+function StartUpDG.estimate_h(e, rd::RefElemData{NDIMS, ElementType, ApproximationType},
+                              md::MeshData) where {NDIMS,
+                                                   ElementType <:
+                                                   StartUpDG.AbstractElemShape,
+                                                   ApproximationType <:
+                                                   SummationByPartsOperators.AbstractPeriodicDerivativeOperator
+                                                   }
+    D = rd.approximation_type
+    x = grid(D)
+
+    # we assume all SummationByPartsOperators.jl reference grids are rescaled to [-1, 1]
+    xmin = SummationByPartsOperators.xmin(D)
+    xmax = SummationByPartsOperators.xmax(D)
+    factor = 2 / (xmax - xmin)
+
+    # If the domain has size L^NDIMS, then `minimum(md.J)^(1 / NDIMS) = L`.
+    # WARNING: this is not a good estimate on anisotropic grids.
+    return minimum(diff(x)) * factor * minimum(md.J)^(1 / NDIMS)
 end
 
 # specialized for DGMultiPeriodicFDSBP since there are no face nodes
 # and thus no inverse trace constant for periodic domains.
 function estimate_dt(mesh::DGMultiMesh, dg::DGMultiPeriodicFDSBP)
-  rd = dg.basis # RefElemData
-  return StartUpDG.estimate_h(rd, mesh.md)
+    rd = dg.basis # RefElemData
+    return StartUpDG.estimate_h(rd, mesh.md)
 end
 
 # do nothing for interface terms if using a periodic operator
 # We pass the `surface_integral` argument solely for dispatch
 function prolong2interfaces!(cache, u, mesh::DGMultiMesh, equations,
                              surface_integral, dg::DGMultiPeriodicFDSBP)
-  @assert nelements(mesh, dg, cache) == 1
-  nothing
+    @assert nelements(mesh, dg, cache) == 1
+    nothing
 end
 
 function calc_interface_flux!(cache, surface_integral::SurfaceIntegralWeakForm,
                               mesh::DGMultiMesh,
                               have_nonconservative_terms::False, equations,
                               dg::DGMultiPeriodicFDSBP)
-  @assert nelements(mesh, dg, cache) == 1
-  nothing
+    @assert nelements(mesh, dg, cache) == 1
+    nothing
 end
 
 function calc_surface_integral!(du, u, mesh::DGMultiMesh, equations,
                                 surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGMultiPeriodicFDSBP, cache)
-  @assert nelements(mesh, dg, cache) == 1
-  nothing
+    @assert nelements(mesh, dg, cache) == 1
+    nothing
 end
 
 function create_cache(mesh::DGMultiMesh, equations,
                       dg::DGMultiFluxDiffPeriodicFDSBP, RealT, uEltype)
+    md = mesh.md
 
-  md = mesh.md
-
-  # storage for volume quadrature values, face quadrature values, flux values
-  nvars = nvariables(equations)
-  u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg)
-  return (; u_values, invJ = inv.(md.J) )
+    # storage for volume quadrature values, face quadrature values, flux values
+    nvars = nvariables(equations)
+    u_values = allocate_nested_array(uEltype, nvars, size(md.xq), dg)
+    return (; u_values, invJ = inv.(md.J))
 end
 
 # Specialize calc_volume_integral for periodic SBP operators (assumes the operator is sparse).
@@ -479,64 +519,61 @@ function calc_volume_integral!(du, u, mesh::DGMultiMesh,
                                have_nonconservative_terms::False, equations,
                                volume_integral::VolumeIntegralFluxDifferencing,
                                dg::DGMultiFluxDiffPeriodicFDSBP, cache)
-
-  @unpack volume_flux = volume_integral
-
-  # We expect speedup over the serial version only when using two or more threads
-  # since the threaded version below does not exploit the symmetry properties,
-  # resulting in a performance penalty of 1/2
-  if Threads.nthreads() > 1
-
-    for dim in eachdim(mesh)
-      normal_direction = get_contravariant_vector(1, dim, mesh, cache)
-
-      # These are strong-form operators of the form `D = M \ Q` where `M` is diagonal
-      # and `Q` is skew-symmetric. Since `M` is diagonal, `inv(M)` scales the rows of `Q`.
-      # Then, `1 / M[i,i] * ∑_j Q[i,j] * volume_flux(u[i], u[j])` is equivalent to
-      #       `= ∑_j (1 / M[i,i] * Q[i,j]) * volume_flux(u[i], u[j])`
-      #       `= ∑_j        D[i,j]         * volume_flux(u[i], u[j])`
-      # TODO: DGMulti.
-      # This would have to be changed if `has_nonconservative_terms = False()`
-      # because then `volume_flux` is non-symmetric.
-      A = dg.basis.Drst[dim]
-
-      A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
-      row_ids = axes(A, 2)
-      rows = rowvals(A_base)
-      vals = nonzeros(A_base)
-
-      @threaded for i in row_ids
-        u_i = u[i]
-        du_i = du[i]
-        for id in nzrange(A_base, i)
-          j = rows[id]
-          u_j = u[j]
-          A_ij = vals[id]
-          AF_ij = 2 * A_ij * volume_flux(u_i, u_j, normal_direction, equations)
-          du_i = du_i + AF_ij
+    @unpack volume_flux = volume_integral
+
+    # We expect speedup over the serial version only when using two or more threads
+    # since the threaded version below does not exploit the symmetry properties,
+    # resulting in a performance penalty of 1/2
+    if Threads.nthreads() > 1
+        for dim in eachdim(mesh)
+            normal_direction = get_contravariant_vector(1, dim, mesh, cache)
+
+            # These are strong-form operators of the form `D = M \ Q` where `M` is diagonal
+            # and `Q` is skew-symmetric. Since `M` is diagonal, `inv(M)` scales the rows of `Q`.
+            # Then, `1 / M[i,i] * ∑_j Q[i,j] * volume_flux(u[i], u[j])` is equivalent to
+            #       `= ∑_j (1 / M[i,i] * Q[i,j]) * volume_flux(u[i], u[j])`
+            #       `= ∑_j        D[i,j]         * volume_flux(u[i], u[j])`
+            # TODO: DGMulti.
+            # This would have to be changed if `has_nonconservative_terms = False()`
+            # because then `volume_flux` is non-symmetric.
+            A = dg.basis.Drst[dim]
+
+            A_base = parent(A) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
+            row_ids = axes(A, 2)
+            rows = rowvals(A_base)
+            vals = nonzeros(A_base)
+
+            @threaded for i in row_ids
+                u_i = u[i]
+                du_i = du[i]
+                for id in nzrange(A_base, i)
+                    j = rows[id]
+                    u_j = u[j]
+                    A_ij = vals[id]
+                    AF_ij = 2 * A_ij *
+                            volume_flux(u_i, u_j, normal_direction, equations)
+                    du_i = du_i + AF_ij
+                end
+                du[i] = du_i
+            end
         end
-        du[i] = du_i
-      end
-    end
 
-  else # if using two threads or fewer
+    else # if using two threads or fewer
 
-    # Calls `hadamard_sum!``, which uses symmetry to reduce flux evaluations. Symmetry
-    # is expected to yield about a 2x speedup, so we default to the symmetry-exploiting
-    # volume integral unless we have >2 threads (which should yield >2 speedup).
-    for dim in eachdim(mesh)
-      normal_direction = get_contravariant_vector(1, dim, mesh, cache)
+        # Calls `hadamard_sum!``, which uses symmetry to reduce flux evaluations. Symmetry
+        # is expected to yield about a 2x speedup, so we default to the symmetry-exploiting
+        # volume integral unless we have >2 threads (which should yield >2 speedup).
+        for dim in eachdim(mesh)
+            normal_direction = get_contravariant_vector(1, dim, mesh, cache)
 
-      A = dg.basis.Drst[dim]
+            A = dg.basis.Drst[dim]
 
-      # since has_nonconservative_terms::False,
-      # the volume flux is symmetric.
-      flux_is_symmetric = True()
-      hadamard_sum!(du, A, flux_is_symmetric, volume_flux,
-                    normal_direction, u, equations)
+            # since has_nonconservative_terms::False,
+            # the volume flux is symmetric.
+            flux_is_symmetric = True()
+            hadamard_sum!(du, A, flux_is_symmetric, volume_flux,
+                          normal_direction, u, equations)
+        end
     end
-
-  end
 end
-
 end # @muladd
diff --git a/src/solvers/dgmulti/shock_capturing.jl b/src/solvers/dgmulti/shock_capturing.jl
index bbda089ee58..d224e5ed03d 100644
--- a/src/solvers/dgmulti/shock_capturing.jl
+++ b/src/solvers/dgmulti/shock_capturing.jl
@@ -2,155 +2,153 @@
 function create_cache(mesh::DGMultiMesh{NDIMS}, equations,
                       volume_integral::VolumeIntegralShockCapturingHG,
                       dg::DGMultiFluxDiff{<:GaussSBP}, RealT, uEltype) where {NDIMS}
-  element_ids_dg   = Int[]
-  element_ids_dgfv = Int[]
-
-  # build element to element (element_to_element_connectivity) connectivity for smoothing of
-  # shock capturing parameters.
-  face_to_face_connectivity = mesh.md.FToF # num_faces x num_elements matrix
-  element_to_element_connectivity = similar(face_to_face_connectivity)
-  for e in axes(face_to_face_connectivity, 2)
-    for f in axes(face_to_face_connectivity, 1)
-      neighbor_face_index = face_to_face_connectivity[f, e]
-
-      # reverse-engineer element index from face. Assumes all elements
-      # have the same number of faces.
-      neighbor_element_index = ((neighbor_face_index - 1) ÷ dg.basis.num_faces) + 1
-      element_to_element_connectivity[f, e] = neighbor_element_index
+    element_ids_dg = Int[]
+    element_ids_dgfv = Int[]
+
+    # build element to element (element_to_element_connectivity) connectivity for smoothing of
+    # shock capturing parameters.
+    face_to_face_connectivity = mesh.md.FToF # num_faces x num_elements matrix
+    element_to_element_connectivity = similar(face_to_face_connectivity)
+    for e in axes(face_to_face_connectivity, 2)
+        for f in axes(face_to_face_connectivity, 1)
+            neighbor_face_index = face_to_face_connectivity[f, e]
+
+            # reverse-engineer element index from face. Assumes all elements
+            # have the same number of faces.
+            neighbor_element_index = ((neighbor_face_index - 1) ÷ dg.basis.num_faces) + 1
+            element_to_element_connectivity[f, e] = neighbor_element_index
+        end
     end
-  end
 
-  # create sparse hybridized operators for low order scheme
-  Qrst, E = StartUpDG.sparse_low_order_SBP_operators(dg.basis)
-  Brst = map(n -> Diagonal(n .* dg.basis.wf), dg.basis.nrstJ)
-  sparse_hybridized_SBP_operators = map((Q, B) -> 0.5 * [Q-Q' E'*B; -B*E zeros(size(B))], Qrst, Brst)
+    # create sparse hybridized operators for low order scheme
+    Qrst, E = StartUpDG.sparse_low_order_SBP_operators(dg.basis)
+    Brst = map(n -> Diagonal(n .* dg.basis.wf), dg.basis.nrstJ)
+    sparse_hybridized_SBP_operators = map((Q, B) -> 0.5 * [Q-Q' E'*B; -B*E zeros(size(B))],
+                                          Qrst, Brst)
 
-  # Find the joint sparsity pattern of the entire matrix. We store the sparsity pattern as
-  # an adjoint for faster iteration through the rows.
-  sparsity_pattern = sum(map(A -> abs.(A)', sparse_hybridized_SBP_operators)) .> 100 * eps()
+    # Find the joint sparsity pattern of the entire matrix. We store the sparsity pattern as
+    # an adjoint for faster iteration through the rows.
+    sparsity_pattern = sum(map(A -> abs.(A)', sparse_hybridized_SBP_operators)) .>
+                       100 * eps()
 
-  return (; element_ids_dg, element_ids_dgfv,
+    return (; element_ids_dg, element_ids_dgfv,
             sparse_hybridized_SBP_operators, sparsity_pattern,
             element_to_element_connectivity)
 end
 
-
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
 function create_cache(::Type{IndicatorHennemannGassner}, equations::AbstractEquations,
-                      basis::RefElemData{NDIMS}) where NDIMS
-
-  alpha = Vector{real(basis)}()
-  alpha_tmp = similar(alpha)
+                      basis::RefElemData{NDIMS}) where {NDIMS}
+    alpha = Vector{real(basis)}()
+    alpha_tmp = similar(alpha)
 
-  A = Vector{real(basis)}
-  indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
-  modal_threaded     = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
+    A = Vector{real(basis)}
+    indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
+    modal_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
 
-  # initialize inverse Vandermonde matrices at Gauss-Legendre nodes
-  (; N) = basis
-  lobatto_node_coordinates_1D, _ = StartUpDG.gauss_lobatto_quad(0, 0, N)
-  VDM_1D = StartUpDG.vandermonde(Line(), N, lobatto_node_coordinates_1D)
-  inverse_vandermonde = SimpleKronecker(NDIMS, inv(VDM_1D))
+    # initialize inverse Vandermonde matrices at Gauss-Legendre nodes
+    (; N) = basis
+    lobatto_node_coordinates_1D, _ = StartUpDG.gauss_lobatto_quad(0, 0, N)
+    VDM_1D = StartUpDG.vandermonde(Line(), N, lobatto_node_coordinates_1D)
+    inverse_vandermonde = SimpleKronecker(NDIMS, inv(VDM_1D))
 
-  return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde)
+    return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde)
 end
 
-
 function (indicator_hg::IndicatorHennemannGassner)(u, mesh::DGMultiMesh,
                                                    equations, dg::DGMulti{NDIMS}, cache;
                                                    kwargs...) where {NDIMS}
-  (; alpha_max, alpha_min, alpha_smooth, variable) = indicator_hg
-  (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde) = indicator_hg.cache
-
-  resize!(alpha, nelements(mesh, dg))
-  if alpha_smooth
-    resize!(alpha_tmp, nelements(mesh, dg))
-  end
-
-  # magic parameters
-  threshold = 0.5 * 10^(-1.8 * (dg.basis.N + 1)^0.25)
-  parameter_s = log((1 - 0.0001) / 0.0001)
-
-  @threaded for element in eachelement(mesh, dg)
-    indicator = indicator_threaded[Threads.threadid()]
-    modal_ = modal_threaded[Threads.threadid()]
-
-    # Calculate indicator variable at interpolation (Lobatto) nodes.
-    # TODO: calculate indicator variables at Gauss nodes or using `cache.entropy_projected_u_values`
-    for i in eachnode(dg)
-      indicator[i] = indicator_hg.variable(u[i, element], equations)
-    end
+    (; alpha_max, alpha_min, alpha_smooth, variable) = indicator_hg
+    (; alpha, alpha_tmp, indicator_threaded, modal_threaded, inverse_vandermonde) = indicator_hg.cache
 
-    # multiply by invVDM::SimpleKronecker
-    LinearAlgebra.mul!(modal_, inverse_vandermonde, indicator)
-
-    # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
-    # Thus, Base.ReshapedArray should be used if you are setting values in the array.
-    # `reshape` is fine if you are only accessing values.
-    # Here, we reshape modal coefficients to expose the tensor product structure.
-    modal = Base.ReshapedArray(modal_, ntuple(_ -> dg.basis.N + 1, NDIMS), ())
-
-    # Calculate total energies for all modes, all modes minus the highest mode, and
-    # all modes without the two highest modes
-    total_energy = sum(x -> x^2, modal)
-    clip_1_ranges = ntuple(_ -> Base.OneTo(dg.basis.N), NDIMS)
-    clip_2_ranges = ntuple(_ -> Base.OneTo(dg.basis.N - 1), NDIMS)
-    # These splattings do not seem to allocate as of Julia 1.9.0?
-    total_energy_clip1 = sum(x -> x^2, view(modal, clip_1_ranges...))
-    total_energy_clip2 = sum(x -> x^2, view(modal, clip_2_ranges...))
-
-    # Calculate energy in higher modes
-    if !(iszero(total_energy))
-      energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
-    else
-      energy_frac_1 = zero(total_energy)
-    end
-    if !(iszero(total_energy_clip1))
-      energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
-    else
-      energy_frac_2 = zero(total_energy_clip1)
+    resize!(alpha, nelements(mesh, dg))
+    if alpha_smooth
+        resize!(alpha_tmp, nelements(mesh, dg))
     end
-    energy = max(energy_frac_1, energy_frac_2)
 
-    alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
-
-    # Take care of the case close to pure DG
-    if alpha_element < alpha_min
-      alpha_element = zero(alpha_element)
+    # magic parameters
+    threshold = 0.5 * 10^(-1.8 * (dg.basis.N + 1)^0.25)
+    parameter_s = log((1 - 0.0001) / 0.0001)
+
+    @threaded for element in eachelement(mesh, dg)
+        indicator = indicator_threaded[Threads.threadid()]
+        modal_ = modal_threaded[Threads.threadid()]
+
+        # Calculate indicator variable at interpolation (Lobatto) nodes.
+        # TODO: calculate indicator variables at Gauss nodes or using `cache.entropy_projected_u_values`
+        for i in eachnode(dg)
+            indicator[i] = indicator_hg.variable(u[i, element], equations)
+        end
+
+        # multiply by invVDM::SimpleKronecker
+        LinearAlgebra.mul!(modal_, inverse_vandermonde, indicator)
+
+        # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
+        # Thus, Base.ReshapedArray should be used if you are setting values in the array.
+        # `reshape` is fine if you are only accessing values.
+        # Here, we reshape modal coefficients to expose the tensor product structure.
+        modal = Base.ReshapedArray(modal_, ntuple(_ -> dg.basis.N + 1, NDIMS), ())
+
+        # Calculate total energies for all modes, all modes minus the highest mode, and
+        # all modes without the two highest modes
+        total_energy = sum(x -> x^2, modal)
+        clip_1_ranges = ntuple(_ -> Base.OneTo(dg.basis.N), NDIMS)
+        clip_2_ranges = ntuple(_ -> Base.OneTo(dg.basis.N - 1), NDIMS)
+        # These splattings do not seem to allocate as of Julia 1.9.0?
+        total_energy_clip1 = sum(x -> x^2, view(modal, clip_1_ranges...))
+        total_energy_clip2 = sum(x -> x^2, view(modal, clip_2_ranges...))
+
+        # Calculate energy in higher modes
+        if !(iszero(total_energy))
+            energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
+        else
+            energy_frac_1 = zero(total_energy)
+        end
+        if !(iszero(total_energy_clip1))
+            energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
+        else
+            energy_frac_2 = zero(total_energy_clip1)
+        end
+        energy = max(energy_frac_1, energy_frac_2)
+
+        alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
+
+        # Take care of the case close to pure DG
+        if alpha_element < alpha_min
+            alpha_element = zero(alpha_element)
+        end
+
+        # Take care of the case close to pure FV
+        if alpha_element > 1 - alpha_min
+            alpha_element = one(alpha_element)
+        end
+
+        # Clip the maximum amount of FV allowed
+        alpha[element] = min(alpha_max, alpha_element)
     end
 
-    # Take care of the case close to pure FV
-    if alpha_element > 1 - alpha_min
-      alpha_element = one(alpha_element)
+    # smooth element indices after they're all computed
+    if alpha_smooth
+        apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
     end
 
-    # Clip the maximum amount of FV allowed
-    alpha[element] = min(alpha_max, alpha_element)
-  end
-
-  # smooth element indices after they're all computed
-  if alpha_smooth
-    apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
-  end
-
-  return alpha
+    return alpha
 end
 
 # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
 function apply_smoothing!(mesh::DGMultiMesh, alpha, alpha_tmp, dg::DGMulti, cache)
 
-  # Copy alpha values such that smoothing is indpedenent of the element access order
-  alpha_tmp .= alpha
+    # Copy alpha values such that smoothing is indpedenent of the element access order
+    alpha_tmp .= alpha
 
-  # smooth alpha with its neighboring value
-  for element in eachelement(mesh, dg)
-    for face in Base.OneTo(StartUpDG.num_faces(dg.basis.element_type))
-      neighboring_element = cache.element_to_element_connectivity[face, element]
-      alpha_neighbor = alpha_tmp[neighboring_element]
-      alpha[element]  = max(alpha[element], 0.5 * alpha_neighbor)
+    # smooth alpha with its neighboring value
+    for element in eachelement(mesh, dg)
+        for face in Base.OneTo(StartUpDG.num_faces(dg.basis.element_type))
+            neighboring_element = cache.element_to_element_connectivity[face, element]
+            alpha_neighbor = alpha_tmp[neighboring_element]
+            alpha[element] = max(alpha[element], 0.5 * alpha_neighbor)
+        end
     end
-  end
-
 end
 
 #     pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache)
@@ -160,20 +158,20 @@ end
 # `element_ids_dgfv` with the IDs of elements using a blended DG-FV scheme.
 function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha,
                                        mesh::DGMultiMesh, dg::DGMulti)
-  empty!(element_ids_dg)
-  empty!(element_ids_dgfv)
-
-  for element in eachelement(mesh, dg)
-    # Clip blending factor for values close to zero (-> pure DG)
-    dg_only = isapprox(alpha[element], 0, atol=1e-12)
-    if dg_only
-      push!(element_ids_dg, element)
-    else
-      push!(element_ids_dgfv, element)
+    empty!(element_ids_dg)
+    empty!(element_ids_dgfv)
+
+    for element in eachelement(mesh, dg)
+        # Clip blending factor for values close to zero (-> pure DG)
+        dg_only = isapprox(alpha[element], 0, atol = 1e-12)
+        if dg_only
+            push!(element_ids_dg, element)
+        else
+            push!(element_ids_dgfv, element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 function calc_volume_integral!(du, u,
@@ -181,167 +179,177 @@ function calc_volume_integral!(du, u,
                                have_nonconservative_terms, equations,
                                volume_integral::VolumeIntegralShockCapturingHG,
                                dg::DGMultiFluxDiff, cache)
+    (; element_ids_dg, element_ids_dgfv) = cache
+    (; volume_flux_dg, volume_flux_fv, indicator) = volume_integral
 
-  (; element_ids_dg, element_ids_dgfv) = cache
-  (; volume_flux_dg, volume_flux_fv, indicator) = volume_integral
-
-  # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α
-  alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache)
+    # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α
+    alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg,
+                                                               cache)
 
-  # Determine element ids for DG-only and blended DG-FV volume integral
-  pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, mesh, dg)
+    # Determine element ids for DG-only and blended DG-FV volume integral
+    pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, mesh, dg)
 
-  # Loop over pure DG elements
-  @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg)
-    element = element_ids_dg[idx_element]
-    flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms,
-                              equations, volume_flux_dg, dg, cache)
-  end
-
-  # Loop over blended DG-FV elements, blend the high and low order RHS contributions
-  # via `rhs_high * (1 - alpha) + rhs_low * (alpha)`.
-  @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv)
-    element = element_ids_dgfv[idx_element]
-    alpha_element = alpha[element]
-
-    # Calculate DG volume integral contribution
-    flux_differencing_kernel!(du, u, element, mesh,
-                              have_nonconservative_terms, equations,
-                              volume_flux_dg, dg, cache, 1 - alpha_element)
+    # Loop over pure DG elements
+    @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg)
+        element = element_ids_dg[idx_element]
+        flux_differencing_kernel!(du, u, element, mesh, have_nonconservative_terms,
+                                  equations, volume_flux_dg, dg, cache)
+    end
 
-    # Calculate "FV" low order volume integral contribution
-    low_order_flux_differencing_kernel!(du, u, element, mesh,
-                                        have_nonconservative_terms, equations,
-                                        volume_flux_fv, dg, cache, alpha_element)
-  end
+    # Loop over blended DG-FV elements, blend the high and low order RHS contributions
+    # via `rhs_high * (1 - alpha) + rhs_low * (alpha)`.
+    @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv)
+        element = element_ids_dgfv[idx_element]
+        alpha_element = alpha[element]
+
+        # Calculate DG volume integral contribution
+        flux_differencing_kernel!(du, u, element, mesh,
+                                  have_nonconservative_terms, equations,
+                                  volume_flux_dg, dg, cache, 1 - alpha_element)
+
+        # Calculate "FV" low order volume integral contribution
+        low_order_flux_differencing_kernel!(du, u, element, mesh,
+                                            have_nonconservative_terms, equations,
+                                            volume_flux_fv, dg, cache, alpha_element)
+    end
 
-  return nothing
+    return nothing
 end
 
-get_sparse_operator_entries(i, j, mesh::DGMultiMesh{1}, cache) =
-  SVector(cache.sparse_hybridized_SBP_operators[1][i, j])
+function get_sparse_operator_entries(i, j, mesh::DGMultiMesh{1}, cache)
+    SVector(cache.sparse_hybridized_SBP_operators[1][i, j])
+end
 
 function get_sparse_operator_entries(i, j, mesh::DGMultiMesh{2}, cache)
-  Qr, Qs = cache.sparse_hybridized_SBP_operators
-  return SVector(Qr[i, j], Qs[i, j])
+    Qr, Qs = cache.sparse_hybridized_SBP_operators
+    return SVector(Qr[i, j], Qs[i, j])
 end
 
 function get_sparse_operator_entries(i, j, mesh::DGMultiMesh{3}, cache)
-  Qr, Qs, Qt = cache.sparse_hybridized_SBP_operators
-  return SVector(Qr[i, j], Qs[i, j], Qt[i, j])
+    Qr, Qs, Qt = cache.sparse_hybridized_SBP_operators
+    return SVector(Qr[i, j], Qs[i, j], Qt[i, j])
 end
 
-get_contravariant_matrix(element, mesh::DGMultiMesh{1}, cache) =
-  SMatrix{1, 1}(cache.dxidxhatj[1, 1][1, element])
+function get_contravariant_matrix(element, mesh::DGMultiMesh{1}, cache)
+    SMatrix{1, 1}(cache.dxidxhatj[1, 1][1, element])
+end
 
 function get_contravariant_matrix(element, mesh::DGMultiMesh{2, <:Affine}, cache)
-  (; dxidxhatj) = cache
-  return SMatrix{2, 2}(dxidxhatj[1, 1][1, element], dxidxhatj[2, 1][1, element],
-                       dxidxhatj[1, 2][1, element], dxidxhatj[2, 2][1, element])
+    (; dxidxhatj) = cache
+    return SMatrix{2, 2}(dxidxhatj[1, 1][1, element], dxidxhatj[2, 1][1, element],
+                         dxidxhatj[1, 2][1, element], dxidxhatj[2, 2][1, element])
 end
 
 function get_contravariant_matrix(element, mesh::DGMultiMesh{3, <:Affine}, cache)
-  (; dxidxhatj) = cache
-  return SMatrix{3, 3}(dxidxhatj[1, 1][1, element], dxidxhatj[2, 1][1, element], dxidxhatj[3, 1][1, element],
-                       dxidxhatj[1, 2][1, element], dxidxhatj[2, 2][1, element], dxidxhatj[3, 2][1, element],
-                       dxidxhatj[1, 3][1, element], dxidxhatj[2, 3][1, element], dxidxhatj[3, 3][1, element])
+    (; dxidxhatj) = cache
+    return SMatrix{3, 3}(dxidxhatj[1, 1][1, element], dxidxhatj[2, 1][1, element],
+                         dxidxhatj[3, 1][1, element],
+                         dxidxhatj[1, 2][1, element], dxidxhatj[2, 2][1, element],
+                         dxidxhatj[3, 2][1, element],
+                         dxidxhatj[1, 3][1, element], dxidxhatj[2, 3][1, element],
+                         dxidxhatj[3, 3][1, element])
 end
 
 function get_contravariant_matrix(i, element, mesh::DGMultiMesh{2}, cache)
-  (; dxidxhatj) = cache
-  return SMatrix{2, 2}(dxidxhatj[1, 1][i, element], dxidxhatj[2, 1][i, element],
-                       dxidxhatj[1, 2][i, element], dxidxhatj[2, 2][i, element])
+    (; dxidxhatj) = cache
+    return SMatrix{2, 2}(dxidxhatj[1, 1][i, element], dxidxhatj[2, 1][i, element],
+                         dxidxhatj[1, 2][i, element], dxidxhatj[2, 2][i, element])
 end
 
 function get_contravariant_matrix(i, element, mesh::DGMultiMesh{3}, cache)
-  (; dxidxhatj) = cache
-  return SMatrix{3, 3}(dxidxhatj[1, 1][i, element], dxidxhatj[2, 1][i, element], dxidxhatj[3, 1][i, element],
-                       dxidxhatj[1, 2][i, element], dxidxhatj[2, 2][i, element], dxidxhatj[3, 2][i, element],
-                       dxidxhatj[1, 3][i, element], dxidxhatj[2, 3][i, element], dxidxhatj[3, 3][i, element])
+    (; dxidxhatj) = cache
+    return SMatrix{3, 3}(dxidxhatj[1, 1][i, element], dxidxhatj[2, 1][i, element],
+                         dxidxhatj[3, 1][i, element],
+                         dxidxhatj[1, 2][i, element], dxidxhatj[2, 2][i, element],
+                         dxidxhatj[3, 2][i, element],
+                         dxidxhatj[1, 3][i, element], dxidxhatj[2, 3][i, element],
+                         dxidxhatj[3, 3][i, element])
 end
 
-get_avg_contravariant_matrix(i, j, element, mesh::DGMultiMesh, cache) =
-  0.5 * (get_contravariant_matrix(i, element, mesh, cache) + get_contravariant_matrix(j, element, mesh, cache))
+function get_avg_contravariant_matrix(i, j, element, mesh::DGMultiMesh, cache)
+    0.5 * (get_contravariant_matrix(i, element, mesh, cache) +
+     get_contravariant_matrix(j, element, mesh, cache))
+end
 
 # computes an algebraic low order method with internal dissipation.
 # This method is for affine/Cartesian meshes
 function low_order_flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh,
                                              have_nonconservative_terms::False, equations,
-                                             volume_flux_fv, dg::DGMultiFluxDiff{<:GaussSBP},
-                                             cache, alpha=true)
-
-  # accumulates output from flux differencing
-  rhs_local = cache.rhs_local_threaded[Threads.threadid()]
-  fill!(rhs_local, zero(eltype(rhs_local)))
-
-  u_local = view(cache.entropy_projected_u_values, :, element)
-
-  # constant over each element
-  geometric_matrix = get_contravariant_matrix(element, mesh, cache)
-
-  (; sparsity_pattern) = cache
-  A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
-  row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base)
-  for i in row_ids
-    u_i = u_local[i]
-    du_i = zero(u_i)
-    for id in nzrange(A_base, i)
-      j = rows[id]
-      u_j = u_local[j]
-
-      # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j
-      reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache)
-      normal_direction_ij = geometric_matrix * reference_operator_entries
-
-      # note that we do not need to normalize `normal_direction_ij` since
-      # it is typically normalized within the flux computation.
-      f_ij = volume_flux_fv(u_i, u_j, normal_direction_ij, equations)
-      du_i = du_i + 2 * f_ij
+                                             volume_flux_fv,
+                                             dg::DGMultiFluxDiff{<:GaussSBP},
+                                             cache, alpha = true)
+
+    # accumulates output from flux differencing
+    rhs_local = cache.rhs_local_threaded[Threads.threadid()]
+    fill!(rhs_local, zero(eltype(rhs_local)))
+
+    u_local = view(cache.entropy_projected_u_values, :, element)
+
+    # constant over each element
+    geometric_matrix = get_contravariant_matrix(element, mesh, cache)
+
+    (; sparsity_pattern) = cache
+    A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
+    row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base)
+    for i in row_ids
+        u_i = u_local[i]
+        du_i = zero(u_i)
+        for id in nzrange(A_base, i)
+            j = rows[id]
+            u_j = u_local[j]
+
+            # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j
+            reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache)
+            normal_direction_ij = geometric_matrix * reference_operator_entries
+
+            # note that we do not need to normalize `normal_direction_ij` since
+            # it is typically normalized within the flux computation.
+            f_ij = volume_flux_fv(u_i, u_j, normal_direction_ij, equations)
+            du_i = du_i + 2 * f_ij
+        end
+        rhs_local[i] = du_i
     end
-    rhs_local[i] = du_i
-  end
-
-  # TODO: factor this out to avoid calling it twice during calc_volume_integral!
-  project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha)
 
+    # TODO: factor this out to avoid calling it twice during calc_volume_integral!
+    project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha)
 end
 
-function low_order_flux_differencing_kernel!(du, u, element, mesh::DGMultiMesh{NDIMS, <:NonAffine},
+function low_order_flux_differencing_kernel!(du, u, element,
+                                             mesh::DGMultiMesh{NDIMS, <:NonAffine},
                                              have_nonconservative_terms::False, equations,
-                                             volume_flux_fv, dg::DGMultiFluxDiff{<:GaussSBP},
-                                             cache, alpha=true) where {NDIMS}
-
-  # accumulates output from flux differencing
-  rhs_local = cache.rhs_local_threaded[Threads.threadid()]
-  fill!(rhs_local, zero(eltype(rhs_local)))
-
-  u_local = view(cache.entropy_projected_u_values, :, element)
-
-  (; sparsity_pattern) = cache
-  A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
-  row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base)
-  for i in row_ids
-    u_i = u_local[i]
-    du_i = zero(u_i)
-    for id in nzrange(A_base, i)
-      j = rows[id]
-      u_j = u_local[j]
-
-      # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j
-      geometric_matrix = get_avg_contravariant_matrix(i, j, element, mesh, cache)
-      reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache)
-      normal_direction_ij = geometric_matrix * reference_operator_entries
-
-      # note that we do not need to normalize `normal_direction_ij` since
-      # it is typically normalized within the flux computation.
-      f_ij = volume_flux_fv(u_i, u_j, normal_direction_ij, equations)
-      du_i = du_i + 2 * f_ij
+                                             volume_flux_fv,
+                                             dg::DGMultiFluxDiff{<:GaussSBP},
+                                             cache, alpha = true) where {NDIMS}
+
+    # accumulates output from flux differencing
+    rhs_local = cache.rhs_local_threaded[Threads.threadid()]
+    fill!(rhs_local, zero(eltype(rhs_local)))
+
+    u_local = view(cache.entropy_projected_u_values, :, element)
+
+    (; sparsity_pattern) = cache
+    A_base = parent(sparsity_pattern) # the adjoint of a SparseMatrixCSC is basically a SparseMatrixCSR
+    row_ids, rows = axes(sparsity_pattern, 2), rowvals(A_base)
+    for i in row_ids
+        u_i = u_local[i]
+        du_i = zero(u_i)
+        for id in nzrange(A_base, i)
+            j = rows[id]
+            u_j = u_local[j]
+
+            # compute (Q_1[i,j], Q_2[i,j], ...) where Q_i = ∑_j dxidxhatj * Q̂_j
+            geometric_matrix = get_avg_contravariant_matrix(i, j, element, mesh, cache)
+            reference_operator_entries = get_sparse_operator_entries(i, j, mesh, cache)
+            normal_direction_ij = geometric_matrix * reference_operator_entries
+
+            # note that we do not need to normalize `normal_direction_ij` since
+            # it is typically normalized within the flux computation.
+            f_ij = volume_flux_fv(u_i, u_j, normal_direction_ij, equations)
+            du_i = du_i + 2 * f_ij
+        end
+        rhs_local[i] = du_i
     end
-    rhs_local[i] = du_i
-  end
-
-  # TODO: factor this out to avoid calling it twice during calc_volume_integral!
-  project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha)
 
+    # TODO: factor this out to avoid calling it twice during calc_volume_integral!
+    project_rhs_to_gauss_nodes!(du, rhs_local, element, mesh, dg, cache, alpha)
 end
-
diff --git a/src/solvers/dgmulti/types.jl b/src/solvers/dgmulti/types.jl
index c452ed67b2e..c225e334e8e 100644
--- a/src/solvers/dgmulti/types.jl
+++ b/src/solvers/dgmulti/types.jl
@@ -4,32 +4,62 @@
 
 # `DGMulti` refers to both multiple DG types (polynomial/SBP, simplices/quads/hexes) as well as
 # the use of multi-dimensional operators in the solver.
-const DGMulti{NDIMS, ElemType, ApproxType, SurfaceIntegral, VolumeIntegral} =
-  DG{<:RefElemData{NDIMS, ElemType, ApproxType}, Mortar, SurfaceIntegral, VolumeIntegral} where {Mortar}
+const DGMulti{NDIMS, ElemType, ApproxType, SurfaceIntegral, VolumeIntegral} = DG{
+                                                                                 <:RefElemData{
+                                                                                               NDIMS,
+                                                                                               ElemType,
+                                                                                               ApproxType
+                                                                                               },
+                                                                                 Mortar,
+                                                                                 SurfaceIntegral,
+                                                                                 VolumeIntegral
+                                                                                 } where {
+                                                                                          Mortar
+                                                                                          }
 
 # Type aliases. The first parameter is `ApproxType` since it is more commonly used for dispatch.
-const DGMultiWeakForm{ApproxType, ElemType} =
-  DGMulti{NDIMS, ElemType, ApproxType, <:SurfaceIntegralWeakForm, <:VolumeIntegralWeakForm} where {NDIMS}
-
-const DGMultiFluxDiff{ApproxType, ElemType} =
-  DGMulti{NDIMS, ElemType, ApproxType, <:SurfaceIntegralWeakForm, <:Union{VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG}} where {NDIMS}
-
-const DGMultiFluxDiffSBP{ApproxType, ElemType} =
-  DGMulti{NDIMS, ElemType, ApproxType, <:SurfaceIntegralWeakForm, <:Union{VolumeIntegralFluxDifferencing, VolumeIntegralShockCapturingHG}} where {NDIMS, ApproxType<:Union{SBP, AbstractDerivativeOperator}}
-
-const DGMultiSBP{ApproxType, ElemType} =
-  DGMulti{NDIMS, ElemType, ApproxType, SurfaceIntegral, VolumeIntegral} where {NDIMS, ElemType, ApproxType<:Union{SBP, AbstractDerivativeOperator}, SurfaceIntegral, VolumeIntegral}
-
+const DGMultiWeakForm{ApproxType, ElemType} = DGMulti{NDIMS, ElemType, ApproxType,
+                                                      <:SurfaceIntegralWeakForm,
+                                                      <:VolumeIntegralWeakForm
+                                                      } where {NDIMS}
+
+const DGMultiFluxDiff{ApproxType, ElemType} = DGMulti{NDIMS, ElemType, ApproxType,
+                                                      <:SurfaceIntegralWeakForm,
+                                                      <:Union{
+                                                              VolumeIntegralFluxDifferencing,
+                                                              VolumeIntegralShockCapturingHG
+                                                              }} where {NDIMS}
+
+const DGMultiFluxDiffSBP{ApproxType, ElemType} = DGMulti{NDIMS, ElemType, ApproxType,
+                                                         <:SurfaceIntegralWeakForm,
+                                                         <:Union{
+                                                                 VolumeIntegralFluxDifferencing,
+                                                                 VolumeIntegralShockCapturingHG
+                                                                 }
+                                                         } where {NDIMS,
+                                                                  ApproxType <: Union{SBP,
+                                                                        AbstractDerivativeOperator
+                                                                        }}
+
+const DGMultiSBP{ApproxType, ElemType} = DGMulti{NDIMS, ElemType, ApproxType,
+                                                 SurfaceIntegral, VolumeIntegral
+                                                 } where {NDIMS, ElemType,
+                                                          ApproxType <: Union{SBP,
+                                                                AbstractDerivativeOperator},
+                                                          SurfaceIntegral, VolumeIntegral}
 
 # By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
 # Since these FMAs can increase the performance of many numerical algorithms,
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 # these are necessary for pretty printing
 polydeg(dg::DGMulti) = dg.basis.N
-Base.summary(io::IO, dg::DG) where {DG <: DGMulti} = print(io, "DGMulti(polydeg=$(polydeg(dg)))")
+function Base.summary(io::IO, dg::DG) where {DG <: DGMulti}
+    print(io, "DGMulti(polydeg=$(polydeg(dg)))")
+end
 
 # real(rd) is the eltype of the nodes `rd.r`.
 Base.real(rd::RefElemData) = eltype(rd.r)
@@ -53,17 +83,17 @@ Optional:
 - `RefElemData_kwargs` are additional keyword arguments for `RefElemData`, such as `quad_rule_vol`.
   For more info, see the [StartUpDG.jl docs](https://jlchan.github.io/StartUpDG.jl/dev/).
 """
-function DGMulti(; polydeg=nothing,
-                   element_type::AbstractElemShape,
-                   approximation_type=Polynomial(),
-                   surface_flux=flux_central,
-                   surface_integral=SurfaceIntegralWeakForm(surface_flux),
-                   volume_integral=VolumeIntegralWeakForm(),
-                   kwargs...)
-
-  # call dispatchable constructor
-  DGMulti(element_type, approximation_type, volume_integral, surface_integral;
-          polydeg=polydeg, kwargs...)
+function DGMulti(; polydeg = nothing,
+                 element_type::AbstractElemShape,
+                 approximation_type = Polynomial(),
+                 surface_flux = flux_central,
+                 surface_integral = SurfaceIntegralWeakForm(surface_flux),
+                 volume_integral = VolumeIntegralWeakForm(),
+                 kwargs...)
+
+    # call dispatchable constructor
+    DGMulti(element_type, approximation_type, volume_integral, surface_integral;
+            polydeg = polydeg, kwargs...)
 end
 
 # dispatchable constructor for DGMulti to allow for specialization
@@ -73,13 +103,15 @@ function DGMulti(element_type::AbstractElemShape,
                  surface_integral;
                  polydeg::Integer,
                  kwargs...)
-
-  rd = RefElemData(element_type, approximation_type, polydeg; kwargs...)
-  return DG(rd, nothing #= mortar =#, surface_integral, volume_integral)
+    rd = RefElemData(element_type, approximation_type, polydeg; kwargs...)
+    # `nothing` is passed as `mortar`
+    return DG(rd, nothing, surface_integral, volume_integral)
 end
 
-DGMulti(basis::RefElemData; volume_integral, surface_integral) =
-  DG(basis, nothing #= mortar =#, surface_integral, volume_integral)
+function DGMulti(basis::RefElemData; volume_integral, surface_integral)
+    # `nothing` is passed as `mortar`
+    DG(basis, nothing, surface_integral, volume_integral)
+end
 
 """
     DGMultiBasis(element_type, polydeg; approximation_type = Polynomial(), kwargs...)
@@ -90,9 +122,10 @@ Constructs a basis for DGMulti solvers. Returns a "StartUpDG.RefElemData" object
   For more info, see the [StartUpDG.jl docs](https://jlchan.github.io/StartUpDG.jl/dev/).
 
 """
-DGMultiBasis(element_type, polydeg; approximation_type = Polynomial(), kwargs...) =
-  RefElemData(element_type, approximation_type, polydeg; kwargs...)
-
+function DGMultiBasis(element_type, polydeg; approximation_type = Polynomial(),
+                      kwargs...)
+    RefElemData(element_type, approximation_type, polydeg; kwargs...)
+end
 
 ########################################
 #            DGMultiMesh
@@ -100,8 +133,10 @@ DGMultiBasis(element_type, polydeg; approximation_type = Polynomial(), kwargs...
 
 # now that `DGMulti` is defined, we can define constructors for `DGMultiMesh` which use `dg::DGMulti`
 
-function DGMultiMesh(dg::DGMulti, geometric_term_type, md::MeshData{NDIMS}, boundary_faces) where {NDIMS}
-  return DGMultiMesh{NDIMS, typeof(geometric_term_type), typeof(md), typeof(boundary_faces)}(md, boundary_faces)
+function DGMultiMesh(dg::DGMulti, geometric_term_type, md::MeshData{NDIMS},
+                     boundary_faces) where {NDIMS}
+    return DGMultiMesh{NDIMS, typeof(geometric_term_type), typeof(md),
+                       typeof(boundary_faces)}(md, boundary_faces)
 end
 
 # Mesh types used internally for trait dispatch
@@ -115,11 +150,15 @@ struct Affine <: GeometricTermsType end # mesh produces constant geometric terms
 struct NonAffine <: GeometricTermsType end # mesh produces non-constant geometric terms
 
 # choose MeshType based on the constructor and element type
-GeometricTermsType(mesh_type, dg::DGMulti) = GeometricTermsType(mesh_type, dg.basis.element_type)
+function GeometricTermsType(mesh_type, dg::DGMulti)
+    GeometricTermsType(mesh_type, dg.basis.element_type)
+end
 GeometricTermsType(mesh_type::Cartesian, element_type::AbstractElemShape) = Affine()
 GeometricTermsType(mesh_type::TriangulateIO, element_type::Tri) = Affine()
 GeometricTermsType(mesh_type::VertexMapped, element_type::Union{Tri, Tet}) = Affine()
-GeometricTermsType(mesh_type::VertexMapped, element_type::Union{Quad, Hex}) = NonAffine()
+function GeometricTermsType(mesh_type::VertexMapped, element_type::Union{Quad, Hex})
+    NonAffine()
+end
 GeometricTermsType(mesh_type::Curved, element_type::AbstractElemShape) = NonAffine()
 
 # other potential constructor types to add later: Bilinear, Isoparametric{polydeg_geo}, Rational/Exact?
@@ -139,17 +178,16 @@ GeometricTermsType(mesh_type::Curved, element_type::AbstractElemShape) = NonAffi
   (x,y,z) direction.
 """
 function DGMultiMesh(dg::DGMulti{NDIMS}, vertex_coordinates, EToV::AbstractArray;
-                     is_on_boundary=nothing,
-                     periodicity=ntuple(_->false, NDIMS), kwargs...) where {NDIMS}
-
-  md = MeshData(vertex_coordinates, EToV, dg.basis)
-  if NDIMS == 1
-    md = StartUpDG.make_periodic(md, periodicity...)
-  else
-    md = StartUpDG.make_periodic(md, periodicity)
-  end
-  boundary_faces = StartUpDG.tag_boundary_faces(md, is_on_boundary)
-  return DGMultiMesh(dg, GeometricTermsType(VertexMapped(), dg), md, boundary_faces)
+                     is_on_boundary = nothing,
+                     periodicity = ntuple(_ -> false, NDIMS), kwargs...) where {NDIMS}
+    md = MeshData(vertex_coordinates, EToV, dg.basis)
+    if NDIMS == 1
+        md = StartUpDG.make_periodic(md, periodicity...)
+    else
+        md = StartUpDG.make_periodic(md, periodicity)
+    end
+    boundary_faces = StartUpDG.tag_boundary_faces(md, is_on_boundary)
+    return DGMultiMesh(dg, GeometricTermsType(VertexMapped(), dg), md, boundary_faces)
 end
 
 """
@@ -161,13 +199,15 @@ end
 - `boundary_dict` is a `Dict{Symbol, Int}` which associates each integer `TriangulateIO` boundary
   tag with a `Symbol`.
 """
-function DGMultiMesh(dg::DGMulti{2, Tri}, triangulateIO, boundary_dict::Dict{Symbol, Int};
-                     periodicity=(false, false))
-  vertex_coordinates, EToV = StartUpDG.triangulateIO_to_VXYEToV(triangulateIO)
-  md = MeshData(vertex_coordinates, EToV, dg.basis)
-  md = StartUpDG.make_periodic(md, periodicity)
-  boundary_faces = StartUpDG.tag_boundary_faces(triangulateIO, dg.basis, md, boundary_dict)
-  return DGMultiMesh(dg, GeometricTermsType(TriangulateIO(), dg), md, boundary_faces)
+function DGMultiMesh(dg::DGMulti{2, Tri}, triangulateIO,
+                     boundary_dict::Dict{Symbol, Int};
+                     periodicity = (false, false))
+    vertex_coordinates, EToV = StartUpDG.triangulateIO_to_VXYEToV(triangulateIO)
+    md = MeshData(vertex_coordinates, EToV, dg.basis)
+    md = StartUpDG.make_periodic(md, periodicity)
+    boundary_faces = StartUpDG.tag_boundary_faces(triangulateIO, dg.basis, md,
+                                                  boundary_dict)
+    return DGMultiMesh(dg, GeometricTermsType(TriangulateIO(), dg), md, boundary_faces)
 end
 
 """
@@ -182,26 +222,27 @@ the tensor product of the intervals `[coordinates_min[i], coordinates_max[i]]`.
 - `periodicity` is a tuple of `Bool`s specifying periodicity = `true`/`false` in the (x,y,z) direction.
 """
 function DGMultiMesh(dg::DGMulti{NDIMS}, cells_per_dimension;
-                     coordinates_min=ntuple(_ -> -one(real(dg)), NDIMS),
-                     coordinates_max=ntuple(_ -> one(real(dg)), NDIMS),
-                     is_on_boundary=nothing,
-                     periodicity=ntuple(_ -> false, NDIMS), kwargs...) where {NDIMS}
-
-  vertex_coordinates, EToV = StartUpDG.uniform_mesh(dg.basis.element_type, cells_per_dimension...)
-  domain_lengths = coordinates_max .- coordinates_min
-  for i in 1:NDIMS
-    @. vertex_coordinates[i] = 0.5 * (vertex_coordinates[i] + 1) * domain_lengths[i] + coordinates_min[i]
-  end
-
-  md = MeshData(vertex_coordinates, EToV, dg.basis)
-  if NDIMS == 1 && first(periodicity) == true
-    md = StartUpDG.make_periodic(md)
-  end
-  if NDIMS > 1
-    md = StartUpDG.make_periodic(md, periodicity)
-  end
-  boundary_faces = StartUpDG.tag_boundary_faces(md, is_on_boundary)
-  return DGMultiMesh(dg, GeometricTermsType(Cartesian(), dg), md, boundary_faces)
+                     coordinates_min = ntuple(_ -> -one(real(dg)), NDIMS),
+                     coordinates_max = ntuple(_ -> one(real(dg)), NDIMS),
+                     is_on_boundary = nothing,
+                     periodicity = ntuple(_ -> false, NDIMS), kwargs...) where {NDIMS}
+    vertex_coordinates, EToV = StartUpDG.uniform_mesh(dg.basis.element_type,
+                                                      cells_per_dimension...)
+    domain_lengths = coordinates_max .- coordinates_min
+    for i in 1:NDIMS
+        @. vertex_coordinates[i] = 0.5 * (vertex_coordinates[i] + 1) *
+                                   domain_lengths[i] + coordinates_min[i]
+    end
+
+    md = MeshData(vertex_coordinates, EToV, dg.basis)
+    if NDIMS == 1 && first(periodicity) == true
+        md = StartUpDG.make_periodic(md)
+    end
+    if NDIMS > 1
+        md = StartUpDG.make_periodic(md, periodicity)
+    end
+    boundary_faces = StartUpDG.tag_boundary_faces(md, is_on_boundary)
+    return DGMultiMesh(dg, GeometricTermsType(Cartesian(), dg), md, boundary_faces)
 end
 
 """
@@ -216,22 +257,23 @@ Constructs a `Curved()` [`DGMultiMesh`](@ref) with element type `dg.basis.elemen
 - `periodicity` is a tuple of `Bool`s specifying periodicity = `true`/`false` in the (x,y,z) direction.
 """
 function DGMultiMesh(dg::DGMulti{NDIMS}, cells_per_dimension, mapping;
-                     is_on_boundary=nothing,
-                     periodicity=ntuple(_ -> false, NDIMS), kwargs...) where {NDIMS}
-
-  vertex_coordinates, EToV = StartUpDG.uniform_mesh(dg.basis.element_type, cells_per_dimension...)
-  md = MeshData(vertex_coordinates, EToV, dg.basis)
-  md = NDIMS==1 ? StartUpDG.make_periodic(md, periodicity...) : StartUpDG.make_periodic(md, periodicity)
-
-  @unpack xyz = md
-  for i in eachindex(xyz[1])
-    new_xyz = mapping(getindex.(xyz, i)...)
-    setindex!.(xyz, new_xyz, i)
-  end
-  md_curved = MeshData(dg.basis, md, xyz...)
-
-  boundary_faces = StartUpDG.tag_boundary_faces(md_curved, is_on_boundary)
-  return DGMultiMesh(dg, GeometricTermsType(Curved(), dg), md_curved, boundary_faces)
+                     is_on_boundary = nothing,
+                     periodicity = ntuple(_ -> false, NDIMS), kwargs...) where {NDIMS}
+    vertex_coordinates, EToV = StartUpDG.uniform_mesh(dg.basis.element_type,
+                                                      cells_per_dimension...)
+    md = MeshData(vertex_coordinates, EToV, dg.basis)
+    md = NDIMS == 1 ? StartUpDG.make_periodic(md, periodicity...) :
+         StartUpDG.make_periodic(md, periodicity)
+
+    @unpack xyz = md
+    for i in eachindex(xyz[1])
+        new_xyz = mapping(getindex.(xyz, i)...)
+        setindex!.(xyz, new_xyz, i)
+    end
+    md_curved = MeshData(dg.basis, md, xyz...)
+
+    boundary_faces = StartUpDG.tag_boundary_faces(md_curved, is_on_boundary)
+    return DGMultiMesh(dg, GeometricTermsType(Curved(), dg), md_curved, boundary_faces)
 end
 
 """
@@ -243,144 +285,148 @@ end
   [HOHQMesh](https://github.com/trixi-framework/HOHQMesh).
 """
 function DGMultiMesh(dg::DGMulti{NDIMS}, filename::String;
-                     periodicity=ntuple(_ -> false, NDIMS)) where {NDIMS}
-
-  hohqmesh_data = StartUpDG.read_HOHQMesh(filename)
-  md = MeshData(hohqmesh_data, dg.basis)
-  md = StartUpDG.make_periodic(md, periodicity)
-  boundary_faces = Dict(Pair.(keys(md.mesh_type.boundary_faces), values(md.mesh_type.boundary_faces)))
-  return DGMultiMesh(dg, GeometricTermsType(Curved(), dg), md, boundary_faces)
+                     periodicity = ntuple(_ -> false, NDIMS)) where {NDIMS}
+    hohqmesh_data = StartUpDG.read_HOHQMesh(filename)
+    md = MeshData(hohqmesh_data, dg.basis)
+    md = StartUpDG.make_periodic(md, periodicity)
+    boundary_faces = Dict(Pair.(keys(md.mesh_type.boundary_faces),
+                                values(md.mesh_type.boundary_faces)))
+    return DGMultiMesh(dg, GeometricTermsType(Curved(), dg), md, boundary_faces)
 end
 
 # Matrix type for lazy construction of physical differentiation matrices
 # Constructs a lazy linear combination of B = ∑_i coeffs[i] * A[i]
-struct LazyMatrixLinearCombo{Tcoeffs, N, Tv, TA <: AbstractMatrix{Tv}} <: AbstractMatrix{Tv}
-  matrices::NTuple{N, TA}
-  coeffs::NTuple{N, Tcoeffs}
-  function LazyMatrixLinearCombo(matrices, coeffs)
-    @assert all(matrix -> size(matrix) == size(first(matrices)), matrices)
-    new{typeof(first(coeffs)), length(matrices), eltype(first(matrices)), typeof(first(matrices))}(matrices, coeffs)
-  end
+struct LazyMatrixLinearCombo{Tcoeffs, N, Tv, TA <: AbstractMatrix{Tv}} <:
+       AbstractMatrix{Tv}
+    matrices::NTuple{N, TA}
+    coeffs::NTuple{N, Tcoeffs}
+    function LazyMatrixLinearCombo(matrices, coeffs)
+        @assert all(matrix -> size(matrix) == size(first(matrices)), matrices)
+        new{typeof(first(coeffs)), length(matrices), eltype(first(matrices)),
+            typeof(first(matrices))}(matrices, coeffs)
+    end
 end
 Base.eltype(A::LazyMatrixLinearCombo) = eltype(first(A.matrices))
 Base.IndexStyle(A::LazyMatrixLinearCombo) = IndexCartesian()
 Base.size(A::LazyMatrixLinearCombo) = size(first(A.matrices))
 
 @inline function Base.getindex(A::LazyMatrixLinearCombo{<:Real, N}, i, j) where {N}
-  val = zero(eltype(A))
-  for k in Base.OneTo(N)
-    val = val + A.coeffs[k] * getindex(A.matrices[k], i, j)
-  end
-  return val
+    val = zero(eltype(A))
+    for k in Base.OneTo(N)
+        val = val + A.coeffs[k] * getindex(A.matrices[k], i, j)
+    end
+    return val
 end
 
 # `SimpleKronecker` lazily stores a Kronecker product `kron(ntuple(A, NDIMS)...)`.
 # This object also allocates some temporary storage to enable the fast computation
 # of matrix-vector products.
 struct SimpleKronecker{NDIMS, TA, Ttmp}
-  A::TA
-  tmp_storage::Ttmp # temporary array used for Kronecker multiplication
+    A::TA
+    tmp_storage::Ttmp # temporary array used for Kronecker multiplication
 end
 
 # constructor for SimpleKronecker which requires specifying only `NDIMS` and
 # the 1D matrix `A`.
-function SimpleKronecker(NDIMS, A, eltype_A=eltype(A))
-  @assert size(A, 1) == size(A, 2) # check if square
-  tmp_storage=[zeros(eltype_A, ntuple(_ -> size(A, 2), NDIMS)...) for _ in 1:Threads.nthreads()]
-  return SimpleKronecker{NDIMS, typeof(A), typeof(tmp_storage)}(A, tmp_storage)
+function SimpleKronecker(NDIMS, A, eltype_A = eltype(A))
+    @assert size(A, 1) == size(A, 2) # check if square
+    tmp_storage = [zeros(eltype_A, ntuple(_ -> size(A, 2), NDIMS)...)
+                   for _ in 1:Threads.nthreads()]
+    return SimpleKronecker{NDIMS, typeof(A), typeof(tmp_storage)}(A, tmp_storage)
 end
 
 # Computes `b = kron(A, A) * x` in an optimized fashion
 function LinearAlgebra.mul!(b_in, A_kronecker::SimpleKronecker{2}, x_in)
-
-  @unpack A = A_kronecker
-  tmp_storage = A_kronecker.tmp_storage[Threads.threadid()]
-  n = size(A, 2)
-
-  # copy `x_in` to `tmp_storage` to avoid mutating the input
-  @assert length(tmp_storage) == length(x_in)
-  @turbo thread=true for i in eachindex(tmp_storage)
-    tmp_storage[i] = x_in[i]
-  end
-  x = reshape(tmp_storage, n, n)
-  # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
-  # Thus, Base.ReshapedArray should be used if you are setting values in the array.
-  # `reshape` is fine if you are only accessing values.
-  b = Base.ReshapedArray(b_in, (n, n), ())
-
-  @turbo thread=true for j in 1:n, i in 1:n
-    tmp = zero(eltype(x))
-    for ii in 1:n
-      tmp = tmp + A[i, ii] * x[ii, j]
+    @unpack A = A_kronecker
+    tmp_storage = A_kronecker.tmp_storage[Threads.threadid()]
+    n = size(A, 2)
+
+    # copy `x_in` to `tmp_storage` to avoid mutating the input
+    @assert length(tmp_storage) == length(x_in)
+    @turbo thread=true for i in eachindex(tmp_storage)
+        tmp_storage[i] = x_in[i]
+    end
+    x = reshape(tmp_storage, n, n)
+    # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
+    # Thus, Base.ReshapedArray should be used if you are setting values in the array.
+    # `reshape` is fine if you are only accessing values.
+    b = Base.ReshapedArray(b_in, (n, n), ())
+
+    @turbo thread=true for j in 1:n, i in 1:n
+        tmp = zero(eltype(x))
+        for ii in 1:n
+            tmp = tmp + A[i, ii] * x[ii, j]
+        end
+        b[i, j] = tmp
     end
-    b[i, j] = tmp
-  end
 
-  @turbo thread=true for j in 1:n, i in 1:n
-    tmp = zero(eltype(x))
-    for jj in 1:n
-      tmp = tmp + A[j, jj] * b[i, jj]
+    @turbo thread=true for j in 1:n, i in 1:n
+        tmp = zero(eltype(x))
+        for jj in 1:n
+            tmp = tmp + A[j, jj] * b[i, jj]
+        end
+        x[i, j] = tmp
     end
-    x[i, j] = tmp
-  end
 
-  @turbo thread=true for i in eachindex(b_in)
-    b_in[i] = x[i]
-  end
+    @turbo thread=true for i in eachindex(b_in)
+        b_in[i] = x[i]
+    end
 
-  return nothing
+    return nothing
 end
 
 # Computes `b = kron(A, A, A) * x` in an optimized fashion
 function LinearAlgebra.mul!(b_in, A_kronecker::SimpleKronecker{3}, x_in)
+    @unpack A = A_kronecker
+    tmp_storage = A_kronecker.tmp_storage[Threads.threadid()]
+    n = size(A, 2)
 
-  @unpack A = A_kronecker
-  tmp_storage = A_kronecker.tmp_storage[Threads.threadid()]
-  n = size(A, 2)
-
-  # copy `x_in` to `tmp_storage` to avoid mutating the input
-  @turbo thread=true for i in eachindex(tmp_storage)
-    tmp_storage[i] = x_in[i]
-  end
-  x = reshape(tmp_storage, n, n, n)
-  # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
-  # Thus, Base.ReshapedArray should be used if you are setting values in the array.
-  # `reshape` is fine if you are only accessing values.
-  b = Base.ReshapedArray(b_in, (n, n, n), ())
-
-  @turbo thread=true for k in 1:n, j in 1:n, i in 1:n
-    tmp = zero(eltype(x))
-    for ii in 1:n
-      tmp = tmp + A[i, ii] * x[ii, j, k]
+    # copy `x_in` to `tmp_storage` to avoid mutating the input
+    @turbo thread=true for i in eachindex(tmp_storage)
+        tmp_storage[i] = x_in[i]
+    end
+    x = reshape(tmp_storage, n, n, n)
+    # As of Julia 1.9, Base.ReshapedArray does not produce allocations when setting values.
+    # Thus, Base.ReshapedArray should be used if you are setting values in the array.
+    # `reshape` is fine if you are only accessing values.
+    b = Base.ReshapedArray(b_in, (n, n, n), ())
+
+    @turbo thread=true for k in 1:n, j in 1:n, i in 1:n
+        tmp = zero(eltype(x))
+        for ii in 1:n
+            tmp = tmp + A[i, ii] * x[ii, j, k]
+        end
+        b[i, j, k] = tmp
     end
-    b[i, j, k] = tmp
-  end
 
-  @turbo thread=true for k in 1:n, j in 1:n, i in 1:n
-    tmp = zero(eltype(x))
-    for jj in 1:n
-      tmp = tmp + A[j, jj] * b[i, jj, k]
+    @turbo thread=true for k in 1:n, j in 1:n, i in 1:n
+        tmp = zero(eltype(x))
+        for jj in 1:n
+            tmp = tmp + A[j, jj] * b[i, jj, k]
+        end
+        x[i, j, k] = tmp
     end
-    x[i, j, k] = tmp
-  end
 
-  @turbo thread=true for k in 1:n, j in 1:n, i in 1:n
-    tmp = zero(eltype(x))
-    for kk in 1:n
-      tmp = tmp + A[k, kk] * x[i, j, kk]
+    @turbo thread=true for k in 1:n, j in 1:n, i in 1:n
+        tmp = zero(eltype(x))
+        for kk in 1:n
+            tmp = tmp + A[k, kk] * x[i, j, kk]
+        end
+        b[i, j, k] = tmp
     end
-    b[i, j, k] = tmp
-  end
 
-  return nothing
+    return nothing
 end
-
 end # @muladd
 
 # TODO: deprecations introduced in Trixi.jl v0.6
-@deprecate DGMultiMesh(dg::DGMulti{NDIMS}; cells_per_dimension, kwargs...) where {NDIMS} DGMultiMesh(dg, cells_per_dimension; kwargs...)
+@deprecate DGMultiMesh(dg::DGMulti{NDIMS}; cells_per_dimension, kwargs...) where {NDIMS} DGMultiMesh(dg,
+                                                                                                     cells_per_dimension;
+                                                                                                     kwargs...)
 
 # TODO: deprecations introduced in Trixi.jl v0.5
-@deprecate DGMultiMesh(vertex_coordinates, EToV, dg::DGMulti{NDIMS}; kwargs...) where {NDIMS} DGMultiMesh(dg, vertex_coordinates, EToV; kwargs...)
-@deprecate DGMultiMesh(triangulateIO, dg::DGMulti{2, Tri}, boundary_dict::Dict{Symbol, Int}; kwargs...) DGMultiMesh(dg, triangulateIO, boundary_dict; kwargs...)
-
+@deprecate DGMultiMesh(vertex_coordinates, EToV, dg::DGMulti{NDIMS};
+                       kwargs...) where {NDIMS} DGMultiMesh(dg, vertex_coordinates, EToV;
+                                                            kwargs...)
+@deprecate DGMultiMesh(triangulateIO, dg::DGMulti{2, Tri}, boundary_dict::Dict{Symbol, Int};
+                       kwargs...) DGMultiMesh(dg, triangulateIO, boundary_dict; kwargs...)
diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl
index a3a7fb6dd31..1b4e5446e44 100644
--- a/src/solvers/dgsem/basis_lobatto_legendre.jl
+++ b/src/solvers/dgsem/basis_lobatto_legendre.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     LobattoLegendreBasis([RealT=Float64,] polydeg::Integer)
@@ -13,95 +13,106 @@ Create a nodal Lobatto-Legendre basis for polynomials of degree `polydeg`.
 For the special case `polydeg=0` the DG method reduces to a finite volume method.
 Therefore, this function sets the center point of the cell as single node.
 """
-struct LobattoLegendreBasis{RealT<:Real, NNODES,
-                            VectorT<:AbstractVector{RealT},
-                            InverseVandermondeLegendre<:AbstractMatrix{RealT},
-                            BoundaryMatrix<:AbstractMatrix{RealT},
-                            DerivativeMatrix<:AbstractMatrix{RealT}} <: AbstractBasisSBP{RealT}
-  nodes          ::VectorT
-  weights        ::VectorT
-  inverse_weights::VectorT
-
-  inverse_vandermonde_legendre::InverseVandermondeLegendre
-  boundary_interpolation      ::BoundaryMatrix # lhat
-
-  derivative_matrix         ::DerivativeMatrix # strong form derivative matrix
-  derivative_split          ::DerivativeMatrix # strong form derivative matrix minus boundary terms
-  derivative_split_transpose::DerivativeMatrix # transpose of `derivative_split`
-  derivative_dhat           ::DerivativeMatrix # weak form matrix "dhat",
-                                               # negative adjoint wrt the SBP dot product
+struct LobattoLegendreBasis{RealT <: Real, NNODES,
+                            VectorT <: AbstractVector{RealT},
+                            InverseVandermondeLegendre <: AbstractMatrix{RealT},
+                            BoundaryMatrix <: AbstractMatrix{RealT},
+                            DerivativeMatrix <: AbstractMatrix{RealT}} <:
+       AbstractBasisSBP{RealT}
+    nodes::VectorT
+    weights::VectorT
+    inverse_weights::VectorT
+
+    inverse_vandermonde_legendre::InverseVandermondeLegendre
+    boundary_interpolation::BoundaryMatrix # lhat
+
+    derivative_matrix::DerivativeMatrix # strong form derivative matrix
+    derivative_split::DerivativeMatrix # strong form derivative matrix minus boundary terms
+    derivative_split_transpose::DerivativeMatrix # transpose of `derivative_split`
+    derivative_dhat::DerivativeMatrix # weak form matrix "dhat",
+    # negative adjoint wrt the SBP dot product
 end
 
 function LobattoLegendreBasis(RealT, polydeg::Integer)
-  nnodes_ = polydeg + 1
-
-  # compute everything using `Float64` by default
-  nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_)
-  inverse_weights_ = inv.(weights_)
-
-  _, inverse_vandermonde_legendre_ = vandermonde_legendre(nodes_)
-
-  boundary_interpolation_ = zeros(nnodes_, 2)
-  boundary_interpolation_[:, 1] = calc_lhat(-1.0, nodes_, weights_)
-  boundary_interpolation_[:, 2] = calc_lhat( 1.0, nodes_, weights_)
-
-  derivative_matrix_          = polynomial_derivative_matrix(nodes_)
-  derivative_split_           = calc_dsplit(nodes_, weights_)
-  derivative_split_transpose_ = Matrix(derivative_split_')
-  derivative_dhat_            = calc_dhat(nodes_, weights_)
-
-  # type conversions to get the requested real type and enable possible
-  # optimizations of runtime performance and latency
-  nodes           = SVector{nnodes_, RealT}(nodes_)
-  weights         = SVector{nnodes_, RealT}(weights_)
-  inverse_weights = SVector{nnodes_, RealT}(inverse_weights_)
-
-  inverse_vandermonde_legendre = convert.(RealT, inverse_vandermonde_legendre_)
-  boundary_interpolation       = convert.(RealT, boundary_interpolation_)
-
-  # Usually as fast as `SMatrix` (when using `let` in the volume integral/`@threaded`)
-  derivative_matrix          = Matrix{RealT}(derivative_matrix_)
-  derivative_split           = Matrix{RealT}(derivative_split_)
-  derivative_split_transpose = Matrix{RealT}(derivative_split_transpose_)
-  derivative_dhat            = Matrix{RealT}(derivative_dhat_)
-
-  return LobattoLegendreBasis{RealT, nnodes_, typeof(nodes), typeof(inverse_vandermonde_legendre), typeof(boundary_interpolation), typeof(derivative_matrix)}(
-    nodes, weights, inverse_weights,
-    inverse_vandermonde_legendre, boundary_interpolation,
-    derivative_matrix, derivative_split, derivative_split_transpose, derivative_dhat
-  )
+    nnodes_ = polydeg + 1
+
+    # compute everything using `Float64` by default
+    nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_)
+    inverse_weights_ = inv.(weights_)
+
+    _, inverse_vandermonde_legendre_ = vandermonde_legendre(nodes_)
+
+    boundary_interpolation_ = zeros(nnodes_, 2)
+    boundary_interpolation_[:, 1] = calc_lhat(-1.0, nodes_, weights_)
+    boundary_interpolation_[:, 2] = calc_lhat(1.0, nodes_, weights_)
+
+    derivative_matrix_ = polynomial_derivative_matrix(nodes_)
+    derivative_split_ = calc_dsplit(nodes_, weights_)
+    derivative_split_transpose_ = Matrix(derivative_split_')
+    derivative_dhat_ = calc_dhat(nodes_, weights_)
+
+    # type conversions to get the requested real type and enable possible
+    # optimizations of runtime performance and latency
+    nodes = SVector{nnodes_, RealT}(nodes_)
+    weights = SVector{nnodes_, RealT}(weights_)
+    inverse_weights = SVector{nnodes_, RealT}(inverse_weights_)
+
+    inverse_vandermonde_legendre = convert.(RealT, inverse_vandermonde_legendre_)
+    boundary_interpolation = convert.(RealT, boundary_interpolation_)
+
+    # Usually as fast as `SMatrix` (when using `let` in the volume integral/`@threaded`)
+    derivative_matrix = Matrix{RealT}(derivative_matrix_)
+    derivative_split = Matrix{RealT}(derivative_split_)
+    derivative_split_transpose = Matrix{RealT}(derivative_split_transpose_)
+    derivative_dhat = Matrix{RealT}(derivative_dhat_)
+
+    return LobattoLegendreBasis{RealT, nnodes_, typeof(nodes),
+                                typeof(inverse_vandermonde_legendre),
+                                typeof(boundary_interpolation),
+                                typeof(derivative_matrix)}(nodes, weights,
+                                                           inverse_weights,
+                                                           inverse_vandermonde_legendre,
+                                                           boundary_interpolation,
+                                                           derivative_matrix,
+                                                           derivative_split,
+                                                           derivative_split_transpose,
+                                                           derivative_dhat)
 end
 
 LobattoLegendreBasis(polydeg::Integer) = LobattoLegendreBasis(Float64, polydeg)
 
 function Base.show(io::IO, basis::LobattoLegendreBasis)
-  @nospecialize basis # reduce precompilation time
+    @nospecialize basis # reduce precompilation time
 
-  print(io, "LobattoLegendreBasis{", real(basis), "}(polydeg=", polydeg(basis), ")")
+    print(io, "LobattoLegendreBasis{", real(basis), "}(polydeg=", polydeg(basis), ")")
 end
 function Base.show(io::IO, ::MIME"text/plain", basis::LobattoLegendreBasis)
-  @nospecialize basis # reduce precompilation time
+    @nospecialize basis # reduce precompilation time
 
-  print(io, "LobattoLegendreBasis{", real(basis), "} with polynomials of degree ", polydeg(basis))
+    print(io, "LobattoLegendreBasis{", real(basis), "} with polynomials of degree ",
+          polydeg(basis))
 end
 
 function Base.:(==)(b1::LobattoLegendreBasis, b2::LobattoLegendreBasis)
-  if typeof(b1) != typeof(b2)
-    return false
-  end
+    if typeof(b1) != typeof(b2)
+        return false
+    end
 
-  for field in fieldnames(typeof(b1))
-    if getfield(b1, field) != getfield(b2, field)
-      return false
+    for field in fieldnames(typeof(b1))
+        if getfield(b1, field) != getfield(b2, field)
+            return false
+        end
     end
-  end
 
-  return true
+    return true
 end
 
 @inline Base.real(basis::LobattoLegendreBasis{RealT}) where {RealT} = RealT
 
-@inline nnodes(basis::LobattoLegendreBasis{RealT, NNODES}) where {RealT, NNODES} = NNODES
+@inline function nnodes(basis::LobattoLegendreBasis{RealT, NNODES}) where {RealT, NNODES
+                                                                           }
+    NNODES
+end
 
 """
     eachnode(basis::LobattoLegendreBasis)
@@ -116,7 +127,6 @@ In particular, not the nodes themselves are returned.
 
 @inline get_nodes(basis::LobattoLegendreBasis) = basis.nodes
 
-
 """
     integrate(f, u, basis::LobattoLegendreBasis)
 
@@ -124,13 +134,13 @@ Map the function `f` to the coefficients `u` and integrate with respect to the
 quadrature rule given by `basis`.
 """
 function integrate(f, u, basis::LobattoLegendreBasis)
-  @unpack weights = basis
+    @unpack weights = basis
 
-  res = zero(f(first(u)))
-  for i in eachindex(u, weights)
-    res += f(u[i]) * weights[i]
-  end
-  return res
+    res = zero(f(first(u)))
+    for i in eachindex(u, weights)
+        res += f(u[i]) * weights[i]
+    end
+    return res
 end
 
 # Return the first/last weight of the quadrature associated with `basis`.
@@ -140,66 +150,71 @@ end
 left_boundary_weight(basis::LobattoLegendreBasis) = first(basis.weights)
 right_boundary_weight(basis::LobattoLegendreBasis) = last(basis.weights)
 
-
-
-struct LobattoLegendreMortarL2{RealT<:Real, NNODES, ForwardMatrix<:AbstractMatrix{RealT}, ReverseMatrix<:AbstractMatrix{RealT}} <: AbstractMortarL2{RealT}
-  forward_upper::ForwardMatrix
-  forward_lower::ForwardMatrix
-  reverse_upper::ReverseMatrix
-  reverse_lower::ReverseMatrix
+struct LobattoLegendreMortarL2{RealT <: Real, NNODES,
+                               ForwardMatrix <: AbstractMatrix{RealT},
+                               ReverseMatrix <: AbstractMatrix{RealT}} <:
+       AbstractMortarL2{RealT}
+    forward_upper::ForwardMatrix
+    forward_lower::ForwardMatrix
+    reverse_upper::ReverseMatrix
+    reverse_lower::ReverseMatrix
 end
 
 function MortarL2(basis::LobattoLegendreBasis)
-  RealT = real(basis)
-  nnodes_ = nnodes(basis)
-
-  # compute everything using `Float64` by default
-  forward_upper_ = calc_forward_upper(nnodes_)
-  forward_lower_ = calc_forward_lower(nnodes_)
-  reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss))
-  reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss))
-
-  # type conversions to get the requested real type and enable possible
-  # optimizations of runtime performance and latency
-
-  # Usually as fast as `SMatrix` but better for latency
-  forward_upper = Matrix{RealT}(forward_upper_)
-  forward_lower = Matrix{RealT}(forward_lower_)
-
-  # TODO: Taal performance
-  #       Check the performance of different implementations of `mortar_fluxes_to_elements!`
-  #       with different types of the reverse matrices and different types of
-  #       `fstar_upper_threaded` etc. used in the cache.
-  #       Check whether `@turbo` with `eachnode` in `multiply_dimensionwise!` can be faster than
-  #       `@tullio` when the matrix sizes are not necessarily static.
-  # reverse_upper = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_upper_)
-  # reverse_lower = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_lower_)
-  reverse_upper = Matrix{RealT}(reverse_upper_)
-  reverse_lower = Matrix{RealT}(reverse_lower_)
-
-  LobattoLegendreMortarL2{RealT, nnodes_, typeof(forward_upper), typeof(reverse_upper)}(
-    forward_upper, forward_lower,
-    reverse_upper, reverse_lower)
+    RealT = real(basis)
+    nnodes_ = nnodes(basis)
+
+    # compute everything using `Float64` by default
+    forward_upper_ = calc_forward_upper(nnodes_)
+    forward_lower_ = calc_forward_lower(nnodes_)
+    reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss))
+    reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss))
+
+    # type conversions to get the requested real type and enable possible
+    # optimizations of runtime performance and latency
+
+    # Usually as fast as `SMatrix` but better for latency
+    forward_upper = Matrix{RealT}(forward_upper_)
+    forward_lower = Matrix{RealT}(forward_lower_)
+
+    # TODO: Taal performance
+    #       Check the performance of different implementations of `mortar_fluxes_to_elements!`
+    #       with different types of the reverse matrices and different types of
+    #       `fstar_upper_threaded` etc. used in the cache.
+    #       Check whether `@turbo` with `eachnode` in `multiply_dimensionwise!` can be faster than
+    #       `@tullio` when the matrix sizes are not necessarily static.
+    # reverse_upper = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_upper_)
+    # reverse_lower = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_lower_)
+    reverse_upper = Matrix{RealT}(reverse_upper_)
+    reverse_lower = Matrix{RealT}(reverse_lower_)
+
+    LobattoLegendreMortarL2{RealT, nnodes_, typeof(forward_upper), typeof(reverse_upper)
+                            }(forward_upper, forward_lower,
+                              reverse_upper, reverse_lower)
 end
 
 function Base.show(io::IO, mortar::LobattoLegendreMortarL2)
-  @nospecialize mortar # reduce precompilation time
+    @nospecialize mortar # reduce precompilation time
 
-  print(io, "LobattoLegendreMortarL2{", real(mortar), "}(polydeg=", polydeg(mortar), ")")
+    print(io, "LobattoLegendreMortarL2{", real(mortar), "}(polydeg=", polydeg(mortar),
+          ")")
 end
 function Base.show(io::IO, ::MIME"text/plain", mortar::LobattoLegendreMortarL2)
-  @nospecialize mortar # reduce precompilation time
+    @nospecialize mortar # reduce precompilation time
 
-  print(io, "LobattoLegendreMortarL2{", real(mortar), "} with polynomials of degree ", polydeg(mortar))
+    print(io, "LobattoLegendreMortarL2{", real(mortar), "} with polynomials of degree ",
+          polydeg(mortar))
 end
 
 @inline Base.real(mortar::LobattoLegendreMortarL2{RealT}) where {RealT} = RealT
 
-@inline nnodes(mortar::LobattoLegendreMortarL2{RealT, NNODES}) where {RealT, NNODES} = NNODES
+@inline function nnodes(mortar::LobattoLegendreMortarL2{RealT, NNODES}) where {RealT,
+                                                                               NNODES}
+    NNODES
+end
 
 @inline polydeg(mortar::LobattoLegendreMortarL2) = nnodes(mortar) - 1
 
-
 # TODO: We can create EC mortars along the lines of the following implementation.
 # abstract type AbstractMortarEC{RealT} <: AbstractMortar{RealT} end
 
@@ -233,49 +248,55 @@ end
 
 # @inline nnodes(mortar::LobattoLegendreMortarEC{RealT, NNODES}) = NNODES
 
-
-
-struct LobattoLegendreAnalyzer{RealT<:Real, NNODES,
-                               VectorT<:AbstractVector{RealT},
-                               Vandermonde<:AbstractMatrix{RealT}} <: SolutionAnalyzer{RealT}
-  nodes  ::VectorT
-  weights::VectorT
-  vandermonde::Vandermonde
+struct LobattoLegendreAnalyzer{RealT <: Real, NNODES,
+                               VectorT <: AbstractVector{RealT},
+                               Vandermonde <: AbstractMatrix{RealT}} <:
+       SolutionAnalyzer{RealT}
+    nodes::VectorT
+    weights::VectorT
+    vandermonde::Vandermonde
 end
 
-function SolutionAnalyzer(basis::LobattoLegendreBasis; analysis_polydeg=2*polydeg(basis))
-  RealT = real(basis)
-  nnodes_ = analysis_polydeg + 1
+function SolutionAnalyzer(basis::LobattoLegendreBasis;
+                          analysis_polydeg = 2 * polydeg(basis))
+    RealT = real(basis)
+    nnodes_ = analysis_polydeg + 1
 
-  # compute everything using `Float64` by default
-  nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_)
-  vandermonde_ = polynomial_interpolation_matrix(get_nodes(basis), nodes_)
+    # compute everything using `Float64` by default
+    nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_)
+    vandermonde_ = polynomial_interpolation_matrix(get_nodes(basis), nodes_)
 
-  # type conversions to get the requested real type and enable possible
-  # optimizations of runtime performance and latency
-  nodes   = SVector{nnodes_, RealT}(nodes_)
-  weights = SVector{nnodes_, RealT}(weights_)
+    # type conversions to get the requested real type and enable possible
+    # optimizations of runtime performance and latency
+    nodes = SVector{nnodes_, RealT}(nodes_)
+    weights = SVector{nnodes_, RealT}(weights_)
 
-  vandermonde = Matrix{RealT}(vandermonde_)
+    vandermonde = Matrix{RealT}(vandermonde_)
 
-  return LobattoLegendreAnalyzer{RealT, nnodes_, typeof(nodes), typeof(vandermonde)}(
-    nodes, weights, vandermonde)
+    return LobattoLegendreAnalyzer{RealT, nnodes_, typeof(nodes), typeof(vandermonde)}(nodes,
+                                                                                       weights,
+                                                                                       vandermonde)
 end
 
 function Base.show(io::IO, analyzer::LobattoLegendreAnalyzer)
-  @nospecialize analyzer # reduce precompilation time
+    @nospecialize analyzer # reduce precompilation time
 
-  print(io, "LobattoLegendreAnalyzer{", real(analyzer), "}(polydeg=", polydeg(analyzer), ")")
+    print(io, "LobattoLegendreAnalyzer{", real(analyzer), "}(polydeg=",
+          polydeg(analyzer), ")")
 end
 function Base.show(io::IO, ::MIME"text/plain", analyzer::LobattoLegendreAnalyzer)
-  @nospecialize analyzer # reduce precompilation time
+    @nospecialize analyzer # reduce precompilation time
 
-  print(io, "LobattoLegendreAnalyzer{", real(analyzer), "} with polynomials of degree ", polydeg(analyzer))
+    print(io, "LobattoLegendreAnalyzer{", real(analyzer),
+          "} with polynomials of degree ", polydeg(analyzer))
 end
 
 @inline Base.real(analyzer::LobattoLegendreAnalyzer{RealT}) where {RealT} = RealT
 
-@inline nnodes(analyzer::LobattoLegendreAnalyzer{RealT, NNODES}) where {RealT, NNODES} = NNODES
+@inline function nnodes(analyzer::LobattoLegendreAnalyzer{RealT, NNODES}) where {RealT,
+                                                                                 NNODES}
+    NNODES
+end
 """
     eachnode(analyzer::LobattoLegendreAnalyzer)
 
@@ -287,67 +308,72 @@ In particular, not the nodes themselves are returned.
 
 @inline polydeg(analyzer::LobattoLegendreAnalyzer) = nnodes(analyzer) - 1
 
-
-
-struct LobattoLegendreAdaptorL2{RealT<:Real, NNODES, ForwardMatrix<:AbstractMatrix{RealT}, ReverseMatrix<:AbstractMatrix{RealT}} <: AdaptorL2{RealT}
-  forward_upper::ForwardMatrix
-  forward_lower::ForwardMatrix
-  reverse_upper::ReverseMatrix
-  reverse_lower::ReverseMatrix
+struct LobattoLegendreAdaptorL2{RealT <: Real, NNODES,
+                                ForwardMatrix <: AbstractMatrix{RealT},
+                                ReverseMatrix <: AbstractMatrix{RealT}} <:
+       AdaptorL2{RealT}
+    forward_upper::ForwardMatrix
+    forward_lower::ForwardMatrix
+    reverse_upper::ReverseMatrix
+    reverse_lower::ReverseMatrix
 end
 
 function AdaptorL2(basis::LobattoLegendreBasis{RealT}) where {RealT}
-  nnodes_ = nnodes(basis)
-
-  # compute everything using `Float64` by default
-  forward_upper_ = calc_forward_upper(nnodes_)
-  forward_lower_ = calc_forward_lower(nnodes_)
-  reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss))
-  reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss))
-
-  # type conversions to get the requested real type and enable possible
-  # optimizations of runtime performance and latency
-
-  # TODO: Taal performance
-  #       Check the performance of different implementations of
-  #       `refine_elements!` (forward) and `coarsen_elements!` (reverse)
-  #       with different types of the matrices.
-  #       Check whether `@turbo` with `eachnode` in `multiply_dimensionwise!`
-  #       can be faster than `@tullio` when the matrix sizes are not necessarily
-  #       static.
-  forward_upper = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(forward_upper_)
-  forward_lower = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(forward_lower_)
-  # forward_upper = Matrix{RealT}(forward_upper_)
-  # forward_lower = Matrix{RealT}(forward_lower_)
-
-  reverse_upper = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_upper_)
-  reverse_lower = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_lower_)
-  # reverse_upper = Matrix{RealT}(reverse_upper_)
-  # reverse_lower = Matrix{RealT}(reverse_lower_)
-
-  LobattoLegendreAdaptorL2{RealT, nnodes_, typeof(forward_upper), typeof(reverse_upper)}(
-    forward_upper, forward_lower,
-    reverse_upper, reverse_lower)
+    nnodes_ = nnodes(basis)
+
+    # compute everything using `Float64` by default
+    forward_upper_ = calc_forward_upper(nnodes_)
+    forward_lower_ = calc_forward_lower(nnodes_)
+    reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss))
+    reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss))
+
+    # type conversions to get the requested real type and enable possible
+    # optimizations of runtime performance and latency
+
+    # TODO: Taal performance
+    #       Check the performance of different implementations of
+    #       `refine_elements!` (forward) and `coarsen_elements!` (reverse)
+    #       with different types of the matrices.
+    #       Check whether `@turbo` with `eachnode` in `multiply_dimensionwise!`
+    #       can be faster than `@tullio` when the matrix sizes are not necessarily
+    #       static.
+    forward_upper = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(forward_upper_)
+    forward_lower = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(forward_lower_)
+    # forward_upper = Matrix{RealT}(forward_upper_)
+    # forward_lower = Matrix{RealT}(forward_lower_)
+
+    reverse_upper = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_upper_)
+    reverse_lower = SMatrix{nnodes_, nnodes_, RealT, nnodes_^2}(reverse_lower_)
+    # reverse_upper = Matrix{RealT}(reverse_upper_)
+    # reverse_lower = Matrix{RealT}(reverse_lower_)
+
+    LobattoLegendreAdaptorL2{RealT, nnodes_, typeof(forward_upper),
+                             typeof(reverse_upper)}(forward_upper, forward_lower,
+                                                    reverse_upper, reverse_lower)
 end
 
 function Base.show(io::IO, adaptor::LobattoLegendreAdaptorL2)
-  @nospecialize adaptor # reduce precompilation time
+    @nospecialize adaptor # reduce precompilation time
 
-  print(io, "LobattoLegendreAdaptorL2{", real(adaptor), "}(polydeg=", polydeg(adaptor), ")")
+    print(io, "LobattoLegendreAdaptorL2{", real(adaptor), "}(polydeg=",
+          polydeg(adaptor), ")")
 end
 function Base.show(io::IO, ::MIME"text/plain", adaptor::LobattoLegendreAdaptorL2)
-  @nospecialize adaptor # reduce precompilation time
+    @nospecialize adaptor # reduce precompilation time
 
-  print(io, "LobattoLegendreAdaptorL2{", real(adaptor), "} with polynomials of degree ", polydeg(adaptor))
+    print(io, "LobattoLegendreAdaptorL2{", real(adaptor),
+          "} with polynomials of degree ", polydeg(adaptor))
 end
 
 @inline Base.real(adaptor::LobattoLegendreAdaptorL2{RealT}) where {RealT} = RealT
 
-@inline nnodes(adaptor::LobattoLegendreAdaptorL2{RealT, NNODES}) where {RealT, NNODES} = NNODES
+@inline function nnodes(adaptor::LobattoLegendreAdaptorL2{RealT, NNODES}) where {RealT,
+                                                                                 NNODES}
+    NNODES
+end
 
 @inline polydeg(adaptor::LobattoLegendreAdaptorL2) = nnodes(adaptor) - 1
 
-
 ###############################################################################
 # Polynomial derivative and interpolation functions
 
@@ -355,349 +381,337 @@ end
 
 # Calculate the Dhat matrix
 function calc_dhat(nodes, weights)
-  n_nodes = length(nodes)
-  dhat = Matrix(polynomial_derivative_matrix(nodes)')
+    n_nodes = length(nodes)
+    dhat = Matrix(polynomial_derivative_matrix(nodes)')
 
-  for n in 1:n_nodes, j in 1:n_nodes
-    dhat[j, n] *= -weights[n] / weights[j]
-  end
+    for n in 1:n_nodes, j in 1:n_nodes
+        dhat[j, n] *= -weights[n] / weights[j]
+    end
 
-  return dhat
+    return dhat
 end
 
-
 # Calculate the Dsplit matrix for split-form differentiation: dplit = 2D - M⁻¹B
 function calc_dsplit(nodes, weights)
-  # Start with 2 x the normal D matrix
-  dsplit = 2 .* polynomial_derivative_matrix(nodes)
+    # Start with 2 x the normal D matrix
+    dsplit = 2 .* polynomial_derivative_matrix(nodes)
 
-  # Modify to account for
-  dsplit[  1,   1] += 1 / weights[1]
-  dsplit[end, end] -= 1 / weights[end]
+    # Modify to account for
+    dsplit[1, 1] += 1 / weights[1]
+    dsplit[end, end] -= 1 / weights[end]
 
-  return dsplit
+    return dsplit
 end
 
-
 # Calculate the polynomial derivative matrix D
 function polynomial_derivative_matrix(nodes)
-  n_nodes = length(nodes)
-  d = zeros(n_nodes, n_nodes)
-  wbary = barycentric_weights(nodes)
-
-  for i in 1:n_nodes, j in 1:n_nodes
-    if j != i
-      d[i, j] = wbary[j] / wbary[i] * 1 / (nodes[i] - nodes[j])
-      d[i, i] -= d[i, j]
+    n_nodes = length(nodes)
+    d = zeros(n_nodes, n_nodes)
+    wbary = barycentric_weights(nodes)
+
+    for i in 1:n_nodes, j in 1:n_nodes
+        if j != i
+            d[i, j] = wbary[j] / wbary[i] * 1 / (nodes[i] - nodes[j])
+            d[i, i] -= d[i, j]
+        end
     end
-  end
 
-  return d
+    return d
 end
 
-
 # Calculate and interpolation matrix (Vandermonde matrix) between two given sets of nodes
 function polynomial_interpolation_matrix(nodes_in, nodes_out,
-                                         baryweights_in=barycentric_weights(nodes_in))
-  n_nodes_in = length(nodes_in)
-  n_nodes_out = length(nodes_out)
-  vandermonde = Matrix{promote_type(eltype(nodes_in), eltype(nodes_out))}(undef,
-                  n_nodes_out, n_nodes_in)
-  polynomial_interpolation_matrix!(vandermonde, nodes_in, nodes_out, baryweights_in)
-
-  return vandermonde
+                                         baryweights_in = barycentric_weights(nodes_in))
+    n_nodes_in = length(nodes_in)
+    n_nodes_out = length(nodes_out)
+    vandermonde = Matrix{promote_type(eltype(nodes_in), eltype(nodes_out))}(undef,
+                                                                            n_nodes_out,
+                                                                            n_nodes_in)
+    polynomial_interpolation_matrix!(vandermonde, nodes_in, nodes_out, baryweights_in)
+
+    return vandermonde
 end
 
 function polynomial_interpolation_matrix!(vandermonde,
                                           nodes_in, nodes_out,
                                           baryweights_in)
-  fill!(vandermonde, zero(eltype(vandermonde)))
-
-  for k in eachindex(nodes_out)
-    match = false
-    for j in eachindex(nodes_in)
-      if isapprox(nodes_out[k], nodes_in[j])
-        match = true
-        vandermonde[k, j] = 1
-      end
-    end
+    fill!(vandermonde, zero(eltype(vandermonde)))
+
+    for k in eachindex(nodes_out)
+        match = false
+        for j in eachindex(nodes_in)
+            if isapprox(nodes_out[k], nodes_in[j])
+                match = true
+                vandermonde[k, j] = 1
+            end
+        end
 
-    if match == false
-      s = zero(eltype(vandermonde))
-      for j in eachindex(nodes_in)
-        t = baryweights_in[j] / (nodes_out[k] - nodes_in[j])
-        vandermonde[k, j] = t
-        s += t
-      end
-      for j in eachindex(nodes_in)
-        vandermonde[k, j] = vandermonde[k, j] / s
-      end
+        if match == false
+            s = zero(eltype(vandermonde))
+            for j in eachindex(nodes_in)
+                t = baryweights_in[j] / (nodes_out[k] - nodes_in[j])
+                vandermonde[k, j] = t
+                s += t
+            end
+            for j in eachindex(nodes_in)
+                vandermonde[k, j] = vandermonde[k, j] / s
+            end
+        end
     end
-  end
 
-  return vandermonde
+    return vandermonde
 end
 
-
 # Calculate the barycentric weights for a given node distribution.
 function barycentric_weights(nodes)
-  n_nodes = length(nodes)
-  weights = ones(n_nodes)
+    n_nodes = length(nodes)
+    weights = ones(n_nodes)
 
-  for j = 2:n_nodes, k = 1:(j-1)
-    weights[k] *= nodes[k] - nodes[j]
-    weights[j] *= nodes[j] - nodes[k]
-  end
+    for j in 2:n_nodes, k in 1:(j - 1)
+        weights[k] *= nodes[k] - nodes[j]
+        weights[j] *= nodes[j] - nodes[k]
+    end
 
-  for j in 1:n_nodes
-    weights[j] = 1 / weights[j]
-  end
+    for j in 1:n_nodes
+        weights[j] = 1 / weights[j]
+    end
 
-  return weights
+    return weights
 end
 
-
 # Calculate Lhat.
 function calc_lhat(x, nodes, weights)
-  n_nodes = length(nodes)
-  wbary = barycentric_weights(nodes)
+    n_nodes = length(nodes)
+    wbary = barycentric_weights(nodes)
 
-  lhat = lagrange_interpolating_polynomials(x, nodes, wbary)
+    lhat = lagrange_interpolating_polynomials(x, nodes, wbary)
 
-  for i in 1:n_nodes
-    lhat[i] /= weights[i]
-  end
+    for i in 1:n_nodes
+        lhat[i] /= weights[i]
+    end
 
-  return lhat
+    return lhat
 end
 
-
 # Calculate Lagrange polynomials for a given node distribution.
 function lagrange_interpolating_polynomials(x, nodes, wbary)
-  n_nodes = length(nodes)
-  polynomials = zeros(n_nodes)
+    n_nodes = length(nodes)
+    polynomials = zeros(n_nodes)
 
-  for i in 1:n_nodes
-    if isapprox(x, nodes[i], rtol=eps(x))
-      polynomials[i] = 1
-      return polynomials
+    for i in 1:n_nodes
+        if isapprox(x, nodes[i], rtol = eps(x))
+            polynomials[i] = 1
+            return polynomials
+        end
     end
-  end
 
-  for i in 1:n_nodes
-    polynomials[i] = wbary[i] / (x - nodes[i])
-  end
-  total = sum(polynomials)
+    for i in 1:n_nodes
+        polynomials[i] = wbary[i] / (x - nodes[i])
+    end
+    total = sum(polynomials)
 
-  for i in 1:n_nodes
-    polynomials[i] /= total
-  end
+    for i in 1:n_nodes
+        polynomials[i] /= total
+    end
 
-  return polynomials
+    return polynomials
 end
 
-
 # From FLUXO (but really from blue book by Kopriva)
 function gauss_lobatto_nodes_weights(n_nodes::Integer)
-  # From Kopriva's book
-  n_iterations = 10
-  tolerance = 1e-15
-
-  # Initialize output
-  nodes = zeros(n_nodes)
-  weights = zeros(n_nodes)
-
-  # Special case for polynomial degree zero (first order finite volume)
-  if n_nodes == 1
-    nodes[1] = 0
-    weights[1] = 2
-    return nodes, weights
-  end
-
-  # Get polynomial degree for convenience
-  N = n_nodes - 1
-
-  # Calculate values at boundary
-  nodes[1] = -1.0
-  nodes[end] = 1.0
-  weights[1] = 2 / (N * (N + 1))
-  weights[end] = weights[1]
-
-  # Calculate interior values
-  if N > 1
-    cont1 = pi/N
-    cont2 = 3/(8 * N * pi)
-
-    # Use symmetry -> only left side is computed
-    for i in 1:(div(N + 1, 2) - 1)
-      # Calculate node
-      # Initial guess for Newton method
-      nodes[i+1] = -cos(cont1*(i+0.25) - cont2/(i+0.25))
-
-      # Newton iteration to find root of Legendre polynomial (= integration node)
-      for k in 0:n_iterations
-        q, qder, _ = calc_q_and_l(N, nodes[i+1])
-        dx = -q/qder
-        nodes[i+1] += dx
-        if abs(dx) < tolerance * abs(nodes[i+1])
-          break
-        end
-      end
-
-      # Calculate weight
-      _, _, L = calc_q_and_l(N, nodes[i+1])
-      weights[i+1] = weights[1] / L^2
+    # From Kopriva's book
+    n_iterations = 10
+    tolerance = 1e-15
+
+    # Initialize output
+    nodes = zeros(n_nodes)
+    weights = zeros(n_nodes)
+
+    # Special case for polynomial degree zero (first order finite volume)
+    if n_nodes == 1
+        nodes[1] = 0
+        weights[1] = 2
+        return nodes, weights
+    end
 
-      # Set nodes and weights according to symmetry properties
-      nodes[N+1-i] = -nodes[i+1]
-      weights[N+1-i] = weights[i+1]
+    # Get polynomial degree for convenience
+    N = n_nodes - 1
+
+    # Calculate values at boundary
+    nodes[1] = -1.0
+    nodes[end] = 1.0
+    weights[1] = 2 / (N * (N + 1))
+    weights[end] = weights[1]
+
+    # Calculate interior values
+    if N > 1
+        cont1 = pi / N
+        cont2 = 3 / (8 * N * pi)
+
+        # Use symmetry -> only left side is computed
+        for i in 1:(div(N + 1, 2) - 1)
+            # Calculate node
+            # Initial guess for Newton method
+            nodes[i + 1] = -cos(cont1 * (i + 0.25) - cont2 / (i + 0.25))
+
+            # Newton iteration to find root of Legendre polynomial (= integration node)
+            for k in 0:n_iterations
+                q, qder, _ = calc_q_and_l(N, nodes[i + 1])
+                dx = -q / qder
+                nodes[i + 1] += dx
+                if abs(dx) < tolerance * abs(nodes[i + 1])
+                    break
+                end
+            end
+
+            # Calculate weight
+            _, _, L = calc_q_and_l(N, nodes[i + 1])
+            weights[i + 1] = weights[1] / L^2
+
+            # Set nodes and weights according to symmetry properties
+            nodes[N + 1 - i] = -nodes[i + 1]
+            weights[N + 1 - i] = weights[i + 1]
+        end
     end
-  end
 
-  # If odd number of nodes, set center node to origin (= 0.0) and calculate weight
-  if n_nodes % 2 == 1
-    _, _, L = calc_q_and_l(N, 0)
-    nodes[div(N, 2) + 1] = 0.0
-    weights[div(N, 2) + 1] = weights[1] / L^2
-  end
+    # If odd number of nodes, set center node to origin (= 0.0) and calculate weight
+    if n_nodes % 2 == 1
+        _, _, L = calc_q_and_l(N, 0)
+        nodes[div(N, 2) + 1] = 0.0
+        weights[div(N, 2) + 1] = weights[1] / L^2
+    end
 
-  return nodes, weights
+    return nodes, weights
 end
 
-
 # From FLUXO (but really from blue book by Kopriva)
 function calc_q_and_l(N::Integer, x::Float64)
-  L_Nm2 = 1.0
-  L_Nm1 = x
-  Lder_Nm2 = 0.0
-  Lder_Nm1 = 1.0
-
-  local L
-  for i in 2:N
-    L = ((2 * i - 1) * x * L_Nm1 - (i - 1) * L_Nm2) / i
-    Lder = Lder_Nm2 + (2 * i - 1) * L_Nm1
-    L_Nm2 = L_Nm1
-    L_Nm1 = L
-    Lder_Nm2 = Lder_Nm1
-    Lder_Nm1 = Lder
-  end
-
-  q = (2 * N + 1)/(N + 1) * (x * L - L_Nm2)
-  qder = (2 * N + 1) * L
-
-  return q, qder, L
+    L_Nm2 = 1.0
+    L_Nm1 = x
+    Lder_Nm2 = 0.0
+    Lder_Nm1 = 1.0
+
+    local L
+    for i in 2:N
+        L = ((2 * i - 1) * x * L_Nm1 - (i - 1) * L_Nm2) / i
+        Lder = Lder_Nm2 + (2 * i - 1) * L_Nm1
+        L_Nm2 = L_Nm1
+        L_Nm1 = L
+        Lder_Nm2 = Lder_Nm1
+        Lder_Nm1 = Lder
+    end
+
+    q = (2 * N + 1) / (N + 1) * (x * L - L_Nm2)
+    qder = (2 * N + 1) * L
+
+    return q, qder, L
 end
 calc_q_and_l(N::Integer, x::Real) = calc_q_and_l(N, convert(Float64, x))
 
-
 # From FLUXO (but really from blue book by Kopriva)
 function gauss_nodes_weights(n_nodes::Integer)
-  # From Kopriva's book
-  n_iterations = 10
-  tolerance = 1e-15
-
-  # Initialize output
-  nodes = ones(n_nodes) * 1000
-  weights = zeros(n_nodes)
-
-  # Get polynomial degree for convenience
-  N = n_nodes - 1
-  if N == 0
-    nodes .= 0.0
-    weights .= 2.0
-    return nodes, weights
-  elseif N == 1
-    nodes[1] = -sqrt(1/3)
-    nodes[end] = -nodes[1]
-    weights .= 1.0
-    return nodes, weights
-  else # N > 1
-    # Use symmetry property of the roots of the Legendre polynomials
-    for i in 0:(div(N + 1, 2) - 1)
-      # Starting guess for Newton method
-      nodes[i+1] = -cos(pi / (2 * N + 2) * (2 * i + 1))
-
-      # Newton iteration to find root of Legendre polynomial (= integration node)
-      for k in 0:n_iterations
-        poly, deriv = legendre_polynomial_and_derivative(N + 1, nodes[i+1])
-        dx = -poly / deriv
-        nodes[i+1] += dx
-        if abs(dx) < tolerance * abs(nodes[i+1])
-          break
+    # From Kopriva's book
+    n_iterations = 10
+    tolerance = 1e-15
+
+    # Initialize output
+    nodes = ones(n_nodes) * 1000
+    weights = zeros(n_nodes)
+
+    # Get polynomial degree for convenience
+    N = n_nodes - 1
+    if N == 0
+        nodes .= 0.0
+        weights .= 2.0
+        return nodes, weights
+    elseif N == 1
+        nodes[1] = -sqrt(1 / 3)
+        nodes[end] = -nodes[1]
+        weights .= 1.0
+        return nodes, weights
+    else # N > 1
+        # Use symmetry property of the roots of the Legendre polynomials
+        for i in 0:(div(N + 1, 2) - 1)
+            # Starting guess for Newton method
+            nodes[i + 1] = -cos(pi / (2 * N + 2) * (2 * i + 1))
+
+            # Newton iteration to find root of Legendre polynomial (= integration node)
+            for k in 0:n_iterations
+                poly, deriv = legendre_polynomial_and_derivative(N + 1, nodes[i + 1])
+                dx = -poly / deriv
+                nodes[i + 1] += dx
+                if abs(dx) < tolerance * abs(nodes[i + 1])
+                    break
+                end
+            end
+
+            # Calculate weight
+            poly, deriv = legendre_polynomial_and_derivative(N + 1, nodes[i + 1])
+            weights[i + 1] = (2 * N + 3) / ((1 - nodes[i + 1]^2) * deriv^2)
+
+            # Set nodes and weights according to symmetry properties
+            nodes[N + 1 - i] = -nodes[i + 1]
+            weights[N + 1 - i] = weights[i + 1]
         end
-      end
 
-      # Calculate weight
-      poly, deriv = legendre_polynomial_and_derivative(N + 1, nodes[i+1])
-      weights[i+1] = (2 * N + 3) / ((1 - nodes[i+1]^2) * deriv^2)
-
-      # Set nodes and weights according to symmetry properties
-      nodes[N+1-i] = -nodes[i+1]
-      weights[N+1-i] = weights[i+1]
-    end
+        # If odd number of nodes, set center node to origin (= 0.0) and calculate weight
+        if n_nodes % 2 == 1
+            poly, deriv = legendre_polynomial_and_derivative(N + 1, 0.0)
+            nodes[div(N, 2) + 1] = 0.0
+            weights[div(N, 2) + 1] = (2 * N + 3) / deriv^2
+        end
 
-    # If odd number of nodes, set center node to origin (= 0.0) and calculate weight
-    if n_nodes % 2 == 1
-      poly, deriv = legendre_polynomial_and_derivative(N + 1, 0.0)
-      nodes[div(N, 2) + 1] = 0.0
-      weights[div(N, 2) + 1] = (2 * N + 3) / deriv^2
+        return nodes, weights
     end
-
-    return nodes, weights
-  end
 end
 
-
 # From FLUXO (but really from blue book by Kopriva)
 function legendre_polynomial_and_derivative(N::Int, x::Real)
-  if N == 0
-    poly = 1.0
-    deriv = 0.0
-  elseif N == 1
-    poly = convert(Float64, x)
-    deriv = 1.0
-  else
-    poly_Nm2 = 1.0
-    poly_Nm1 = convert(Float64, x)
-    deriv_Nm2 = 0.0
-    deriv_Nm1 = 1.0
-
-    poly = 0.0
-    deriv = 0.0
-    for i in 2:N
-      poly = ((2*i-1) * x * poly_Nm1 - (i-1) * poly_Nm2) / i
-      deriv=deriv_Nm2 + (2*i-1)*poly_Nm1
-      poly_Nm2=poly_Nm1
-      poly_Nm1=poly
-      deriv_Nm2=deriv_Nm1
-      deriv_Nm1=deriv
+    if N == 0
+        poly = 1.0
+        deriv = 0.0
+    elseif N == 1
+        poly = convert(Float64, x)
+        deriv = 1.0
+    else
+        poly_Nm2 = 1.0
+        poly_Nm1 = convert(Float64, x)
+        deriv_Nm2 = 0.0
+        deriv_Nm1 = 1.0
+
+        poly = 0.0
+        deriv = 0.0
+        for i in 2:N
+            poly = ((2 * i - 1) * x * poly_Nm1 - (i - 1) * poly_Nm2) / i
+            deriv = deriv_Nm2 + (2 * i - 1) * poly_Nm1
+            poly_Nm2 = poly_Nm1
+            poly_Nm1 = poly
+            deriv_Nm2 = deriv_Nm1
+            deriv_Nm1 = deriv
+        end
     end
-  end
 
-  # Normalize
-  poly = poly * sqrt(N+0.5)
-  deriv = deriv * sqrt(N+0.5)
+    # Normalize
+    poly = poly * sqrt(N + 0.5)
+    deriv = deriv * sqrt(N + 0.5)
 
-  return poly, deriv
+    return poly, deriv
 end
 
-
 # Calculate Legendre vandermonde matrix and its inverse
 function vandermonde_legendre(nodes, N)
-  n_nodes = length(nodes)
-  n_modes = N + 1
-  vandermonde = zeros(n_nodes, n_modes)
+    n_nodes = length(nodes)
+    n_modes = N + 1
+    vandermonde = zeros(n_nodes, n_modes)
 
-  for i in 1:n_nodes
-    for m in 1:n_modes
-      vandermonde[i, m], _ = legendre_polynomial_and_derivative(m-1, nodes[i])
+    for i in 1:n_nodes
+        for m in 1:n_modes
+            vandermonde[i, m], _ = legendre_polynomial_and_derivative(m - 1, nodes[i])
+        end
     end
-  end
-  # for very high polynomial degree, this is not well conditioned
-  inverse_vandermonde = inv(vandermonde)
-  return vandermonde, inverse_vandermonde
+    # for very high polynomial degree, this is not well conditioned
+    inverse_vandermonde = inv(vandermonde)
+    return vandermonde, inverse_vandermonde
 end
 vandermonde_legendre(nodes) = vandermonde_legendre(nodes, length(nodes) - 1)
-
-
 end # @muladd
diff --git a/src/solvers/dgsem/dgsem.jl b/src/solvers/dgsem/dgsem.jl
index 0e81fdb7bde..27caad4d2dc 100644
--- a/src/solvers/dgsem/dgsem.jl
+++ b/src/solvers/dgsem/dgsem.jl
@@ -3,14 +3,13 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Include utilities
 include("interpolation.jl")
 include("l2projection.jl")
 include("basis_lobatto_legendre.jl")
 
-
 """
     DGSEM(; RealT=Float64, polydeg::Integer,
             surface_flux=flux_central,
@@ -21,57 +20,55 @@ include("basis_lobatto_legendre.jl")
 Create a discontinuous Galerkin spectral element method (DGSEM) using a
 [`LobattoLegendreBasis`](@ref) with polynomials of degree `polydeg`.
 """
-const DGSEM = DG{Basis} where {Basis<:LobattoLegendreBasis}
+const DGSEM = DG{Basis} where {Basis <: LobattoLegendreBasis}
 
 # TODO: Deprecated in v0.3 (no longer documented)
 function DGSEM(basis::LobattoLegendreBasis,
-               surface_flux=flux_central,
-               volume_integral=VolumeIntegralWeakForm(),
-               mortar=MortarL2(basis))
-
-  surface_integral = SurfaceIntegralWeakForm(surface_flux)
-  return DG{typeof(basis), typeof(mortar), typeof(surface_integral), typeof(volume_integral)}(
-    basis, mortar, surface_integral, volume_integral)
+               surface_flux = flux_central,
+               volume_integral = VolumeIntegralWeakForm(),
+               mortar = MortarL2(basis))
+    surface_integral = SurfaceIntegralWeakForm(surface_flux)
+    return DG{typeof(basis), typeof(mortar), typeof(surface_integral),
+              typeof(volume_integral)}(basis, mortar, surface_integral, volume_integral)
 end
 
 # TODO: Deprecated in v0.3 (no longer documented)
 function DGSEM(basis::LobattoLegendreBasis,
                surface_integral::AbstractSurfaceIntegral,
-               volume_integral=VolumeIntegralWeakForm(),
-               mortar=MortarL2(basis))
-
-  return DG{typeof(basis), typeof(mortar), typeof(surface_integral), typeof(volume_integral)}(
-    basis, mortar, surface_integral, volume_integral)
+               volume_integral = VolumeIntegralWeakForm(),
+               mortar = MortarL2(basis))
+    return DG{typeof(basis), typeof(mortar), typeof(surface_integral),
+              typeof(volume_integral)}(basis, mortar, surface_integral, volume_integral)
 end
 
 # TODO: Deprecated in v0.3 (no longer documented)
 function DGSEM(RealT, polydeg::Integer,
-               surface_flux=flux_central,
-               volume_integral=VolumeIntegralWeakForm(),
-               mortar=MortarL2(LobattoLegendreBasis(RealT, polydeg)))
-  basis = LobattoLegendreBasis(RealT, polydeg)
+               surface_flux = flux_central,
+               volume_integral = VolumeIntegralWeakForm(),
+               mortar = MortarL2(LobattoLegendreBasis(RealT, polydeg)))
+    basis = LobattoLegendreBasis(RealT, polydeg)
 
-  return DGSEM(basis, surface_flux, volume_integral, mortar)
+    return DGSEM(basis, surface_flux, volume_integral, mortar)
 end
 
-DGSEM(polydeg, surface_flux=flux_central, volume_integral=VolumeIntegralWeakForm()) = DGSEM(Float64, polydeg, surface_flux, volume_integral)
+function DGSEM(polydeg, surface_flux = flux_central,
+               volume_integral = VolumeIntegralWeakForm())
+    DGSEM(Float64, polydeg, surface_flux, volume_integral)
+end
 
 # The constructor using only keyword arguments is convenient for elixirs since
 # it allows to modify the polynomial degree and other parameters via
 # `trixi_include`.
-function DGSEM(; RealT=Float64,
-                 polydeg::Integer,
-                 surface_flux=flux_central,
-                 surface_integral=SurfaceIntegralWeakForm(surface_flux),
-                 volume_integral=VolumeIntegralWeakForm())
-  basis = LobattoLegendreBasis(RealT, polydeg)
-  return DGSEM(basis, surface_integral, volume_integral)
+function DGSEM(; RealT = Float64,
+               polydeg::Integer,
+               surface_flux = flux_central,
+               surface_integral = SurfaceIntegralWeakForm(surface_flux),
+               volume_integral = VolumeIntegralWeakForm())
+    basis = LobattoLegendreBasis(RealT, polydeg)
+    return DGSEM(basis, surface_integral, volume_integral)
 end
 
 @inline polydeg(dg::DGSEM) = polydeg(dg.basis)
 
 Base.summary(io::IO, dg::DGSEM) = print(io, "DGSEM(polydeg=$(polydeg(dg)))")
-
-
-
 end # @muladd
diff --git a/src/solvers/dgsem/interpolation.jl b/src/solvers/dgsem/interpolation.jl
index bf54d518ee2..3f8f61c072f 100644
--- a/src/solvers/dgsem/interpolation.jl
+++ b/src/solvers/dgsem/interpolation.jl
@@ -2,55 +2,61 @@
 # Naive implementations of multiply_dimensionwise used to demonstrate the functionality
 # without performance optimizations and for testing correctness of the optimized versions
 # implemented below.
-function multiply_dimensionwise_naive(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 2})
-  size_out = size(matrix, 1)
-  size_in  = size(matrix, 2)
-  n_vars   = size(data_in, 1)
-  data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out)
-
-  for i in 1:size_out
-    for ii in 1:size_in
-      for v in 1:n_vars
-        data_out[v, i] += matrix[i, ii] * data_in[v, ii]
-      end
+function multiply_dimensionwise_naive(matrix::AbstractMatrix,
+                                      data_in::AbstractArray{<:Any, 2})
+    size_out = size(matrix, 1)
+    size_in = size(matrix, 2)
+    n_vars = size(data_in, 1)
+    data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out)
+
+    for i in 1:size_out
+        for ii in 1:size_in
+            for v in 1:n_vars
+                data_out[v, i] += matrix[i, ii] * data_in[v, ii]
+            end
+        end
     end
-  end
 
-  return data_out
+    return data_out
 end
 
-function multiply_dimensionwise_naive(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 3})
-  size_out = size(matrix, 1)
-  size_in  = size(matrix, 2)
-  n_vars   = size(data_in, 1)
-  data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out, size_out)
-
-  for j in 1:size_out, i in 1:size_out
-    for jj in 1:size_in, ii in 1:size_in
-      for v in 1:n_vars
-        data_out[v, i, j] += matrix[i, ii] * matrix[j, jj] * data_in[v, ii, jj]
-      end
+function multiply_dimensionwise_naive(matrix::AbstractMatrix,
+                                      data_in::AbstractArray{<:Any, 3})
+    size_out = size(matrix, 1)
+    size_in = size(matrix, 2)
+    n_vars = size(data_in, 1)
+    data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out,
+                     size_out)
+
+    for j in 1:size_out, i in 1:size_out
+        for jj in 1:size_in, ii in 1:size_in
+            for v in 1:n_vars
+                data_out[v, i, j] += matrix[i, ii] * matrix[j, jj] * data_in[v, ii, jj]
+            end
+        end
     end
-  end
 
-  return data_out
+    return data_out
 end
 
-function multiply_dimensionwise_naive(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 4})
-  size_out = size(matrix, 1)
-  size_in  = size(matrix, 2)
-  n_vars   = size(data_in, 1)
-  data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out, size_out, size_out)
-
-  for k in 1:size_out, j in 1:size_out, i in 1:size_out
-    for kk in 1:size_in, jj in 1:size_in, ii in 1:size_in
-      for v in 1:n_vars
-        data_out[v, i, j, k] += matrix[i, ii] * matrix[j, jj] * matrix[k, kk] * data_in[v, ii, jj, kk]
-      end
+function multiply_dimensionwise_naive(matrix::AbstractMatrix,
+                                      data_in::AbstractArray{<:Any, 4})
+    size_out = size(matrix, 1)
+    size_in = size(matrix, 2)
+    n_vars = size(data_in, 1)
+    data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out,
+                     size_out, size_out)
+
+    for k in 1:size_out, j in 1:size_out, i in 1:size_out
+        for kk in 1:size_in, jj in 1:size_in, ii in 1:size_in
+            for v in 1:n_vars
+                data_out[v, i, j, k] += matrix[i, ii] * matrix[j, jj] * matrix[k, kk] *
+                                        data_in[v, ii, jj, kk]
+            end
+        end
     end
-  end
 
-  return data_out
+    return data_out
 end
 
 """
@@ -61,42 +67,43 @@ is assumed to have the first coordinate for the number of variables and the rema
 are multiplied by `matrix`.
 """
 function multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 2})
-  # 1D
-  # optimized version of multiply_dimensionwise_naive
-  size_out = size(matrix, 1)
-  n_vars   = size(data_in, 1)
-  data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out)
+    # 1D
+    # optimized version of multiply_dimensionwise_naive
+    size_out = size(matrix, 1)
+    n_vars = size(data_in, 1)
+    data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out)
 
-  multiply_dimensionwise!(data_out, matrix, data_in)
+    multiply_dimensionwise!(data_out, matrix, data_in)
 
-  return data_out
+    return data_out
 end
 
 function multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 3})
-  # 2D
-  # optimized version of multiply_dimensionwise_naive
-  size_out = size(matrix, 1)
-  n_vars   = size(data_in, 1)
-  data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out, size_out)
+    # 2D
+    # optimized version of multiply_dimensionwise_naive
+    size_out = size(matrix, 1)
+    n_vars = size(data_in, 1)
+    data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out,
+                     size_out)
 
-  multiply_dimensionwise!(data_out, matrix, data_in)
+    multiply_dimensionwise!(data_out, matrix, data_in)
 
-  return data_out
+    return data_out
 end
 
 function multiply_dimensionwise(matrix::AbstractMatrix, data_in::AbstractArray{<:Any, 4})
-  # 3D
-  # optimized version of multiply_dimensionwise_naive
-  size_out = size(matrix, 1)
-  n_vars   = size(data_in, 1)
-  data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out, size_out, size_out)
+    # 3D
+    # optimized version of multiply_dimensionwise_naive
+    size_out = size(matrix, 1)
+    n_vars = size(data_in, 1)
+    data_out = zeros(promote_type(eltype(data_in), eltype(matrix)), n_vars, size_out,
+                     size_out, size_out)
 
-  multiply_dimensionwise!(data_out, matrix, data_in)
+    multiply_dimensionwise!(data_out, matrix, data_in)
 
-  return data_out
+    return data_out
 end
 
-
 # In the following, there are several optimized in-place versions of multiply_dimensionwise.
 # These may make use of advanced optimization features such as the macro `@tullio` from Tullio.jl,
 # which basically uses an Einstein summation convention syntax.
@@ -106,17 +113,17 @@ end
 
 # 1D version
 function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 2}, matrix::AbstractMatrix,
-                                 data_in ::AbstractArray{<:Any, 2})
-  # @tullio threads=false data_out[v, i] = matrix[i, ii] * data_in[v, ii]
-  @turbo for i in axes(data_out, 2), v in axes(data_out, 1)
-    res = zero(eltype(data_out))
-    for ii in axes(matrix, 2)
-      res += matrix[i, ii] * data_in[v, ii]
+                                 data_in::AbstractArray{<:Any, 2})
+    # @tullio threads=false data_out[v, i] = matrix[i, ii] * data_in[v, ii]
+    @turbo for i in axes(data_out, 2), v in axes(data_out, 1)
+        res = zero(eltype(data_out))
+        for ii in axes(matrix, 2)
+            res += matrix[i, ii] * data_in[v, ii]
+        end
+        data_out[v, i] = res
     end
-    data_out[v, i] = res
-  end
 
-  return nothing
+    return nothing
 end
 
 # 1D version for scalars
@@ -124,73 +131,74 @@ end
 # of size unity is dropped, resulting in one dimension less than in `multiply_dimensionwise!`.
 function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 1},
                                         matrix::AbstractMatrix,
-                                        data_in ::AbstractArray{<:Any, 1})
-  # @tullio threads=false data_out[i] = matrix[i, ii] * data_in[ii]
-  @turbo for i in axes(data_out, 1)
-    res = zero(eltype(data_out))
-    for ii in axes(matrix, 2)
-      res += matrix[i, ii] * data_in[ii]
+                                        data_in::AbstractArray{<:Any, 1})
+    # @tullio threads=false data_out[i] = matrix[i, ii] * data_in[ii]
+    @turbo for i in axes(data_out, 1)
+        res = zero(eltype(data_out))
+        for ii in axes(matrix, 2)
+            res += matrix[i, ii] * data_in[ii]
+        end
+        data_out[i] = res
     end
-    data_out[i] = res
-  end
 
-  return nothing
+    return nothing
 end
 
 # 1D version, apply matrixJ to data_inJ
 function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 2}, matrix1::AbstractMatrix,
                                  data_in1::AbstractArray{<:Any, 2}, matrix2::AbstractMatrix,
                                  data_in2::AbstractArray{<:Any, 2})
-  # @tullio threads=false data_out[v, i] = matrix1[i, ii] * data_in1[v, ii] + matrix2[i, ii] * data_in2[v, ii]
-  # TODO: LoopVectorization upgrade
-  #   We would like to use `@turbo` for the outermost loop possibly fuse both inner
-  #   loops, but that does currently not work because of limitations of
-  #   LoopVectorizationjl. However, Chris Elrod is planning to address this in
-  #   the future, cf. https://github.com/JuliaSIMD/LoopVectorization.jl/issues/230#issuecomment-810632972
-  @turbo for i in axes(data_out, 2), v in axes(data_out, 1)
-    res = zero(eltype(data_out))
-    for ii in axes(matrix1, 2)
-      res += matrix1[i, ii] * data_in1[v, ii]
+    # @tullio threads=false data_out[v, i] = matrix1[i, ii] * data_in1[v, ii] + matrix2[i, ii] * data_in2[v, ii]
+    # TODO: LoopVectorization upgrade
+    #   We would like to use `@turbo` for the outermost loop possibly fuse both inner
+    #   loops, but that does currently not work because of limitations of
+    #   LoopVectorizationjl. However, Chris Elrod is planning to address this in
+    #   the future, cf. https://github.com/JuliaSIMD/LoopVectorization.jl/issues/230#issuecomment-810632972
+    @turbo for i in axes(data_out, 2), v in axes(data_out, 1)
+        res = zero(eltype(data_out))
+        for ii in axes(matrix1, 2)
+            res += matrix1[i, ii] * data_in1[v, ii]
+        end
+        data_out[v, i] = res
     end
-    data_out[v, i] = res
-  end
-  @turbo for i in axes(data_out, 2), v in axes(data_out, 1)
-    res = zero(eltype(data_out))
-    for ii in axes(matrix2, 2)
-      res += matrix2[i, ii] * data_in2[v, ii]
+    @turbo for i in axes(data_out, 2), v in axes(data_out, 1)
+        res = zero(eltype(data_out))
+        for ii in axes(matrix2, 2)
+            res += matrix2[i, ii] * data_in2[v, ii]
+        end
+        data_out[v, i] += res
     end
-    data_out[v, i] += res
-  end
 
-  return nothing
+    return nothing
 end
 
 # 2D version
 function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 3}, matrix::AbstractMatrix,
-                                 data_in:: AbstractArray{<:Any, 3},
-                                 tmp1=zeros(eltype(data_out), size(data_out, 1), size(matrix, 1), size(matrix, 2)))
-
-  # Interpolate in x-direction
-  # @tullio threads=false tmp1[v, i, j]     = matrix[i, ii] * data_in[v, ii, j]
-  @turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)
-    res = zero(eltype(tmp1))
-    for ii in axes(matrix, 2)
-      res += matrix[i, ii] * data_in[v, ii, j]
+                                 data_in::AbstractArray{<:Any, 3},
+                                 tmp1 = zeros(eltype(data_out), size(data_out, 1),
+                                              size(matrix, 1), size(matrix, 2)))
+
+    # Interpolate in x-direction
+    # @tullio threads=false tmp1[v, i, j]     = matrix[i, ii] * data_in[v, ii, j]
+    @turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)
+        res = zero(eltype(tmp1))
+        for ii in axes(matrix, 2)
+            res += matrix[i, ii] * data_in[v, ii, j]
+        end
+        tmp1[v, i, j] = res
     end
-    tmp1[v, i, j] = res
-  end
-
-  # Interpolate in y-direction
-  # @tullio threads=false data_out[v, i, j] = matrix[j, jj] * tmp1[v, i, jj]
-  @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
-    res = zero(eltype(data_out))
-    for jj in axes(matrix, 2)
-      res += matrix[j, jj] * tmp1[v, i, jj]
+
+    # Interpolate in y-direction
+    # @tullio threads=false data_out[v, i, j] = matrix[j, jj] * tmp1[v, i, jj]
+    @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
+        res = zero(eltype(data_out))
+        for jj in axes(matrix, 2)
+            res += matrix[j, jj] * tmp1[v, i, jj]
+        end
+        data_out[v, i, j] = res
     end
-    data_out[v, i, j] = res
-  end
 
-  return nothing
+    return nothing
 end
 
 # 2D version for scalars
@@ -198,246 +206,284 @@ end
 # of size unity is dropped, resulting in one dimension less than in `multiply_dimensionwise!`.
 function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 2},
                                         matrix::AbstractMatrix,
-                                        data_in:: AbstractArray{<:Any, 2},
-                                        tmp1=zeros(eltype(data_out), size(matrix, 1), size(matrix, 2)))
-
-  # Interpolate in x-direction
-  # @tullio threads=false     tmp1[i, j] = matrix[i, ii] * data_in[ii, j]
-  @turbo for j in axes(tmp1, 2), i in axes(tmp1, 1)
-    res = zero(eltype(tmp1))
-    for ii in axes(matrix, 2)
-      res += matrix[i, ii] * data_in[ii, j]
+                                        data_in::AbstractArray{<:Any, 2},
+                                        tmp1 = zeros(eltype(data_out), size(matrix, 1),
+                                                     size(matrix, 2)))
+
+    # Interpolate in x-direction
+    # @tullio threads=false     tmp1[i, j] = matrix[i, ii] * data_in[ii, j]
+    @turbo for j in axes(tmp1, 2), i in axes(tmp1, 1)
+        res = zero(eltype(tmp1))
+        for ii in axes(matrix, 2)
+            res += matrix[i, ii] * data_in[ii, j]
+        end
+        tmp1[i, j] = res
     end
-    tmp1[i, j] = res
-  end
-
-  # Interpolate in y-direction
-  # @tullio threads=false data_out[i, j] = matrix[j, jj] * tmp1[i, jj]
-  @turbo for j in axes(data_out, 2), i in axes(data_out, 1)
-    res = zero(eltype(data_out))
-    for jj in axes(matrix, 2)
-      res += matrix[j, jj] * tmp1[i, jj]
+
+    # Interpolate in y-direction
+    # @tullio threads=false data_out[i, j] = matrix[j, jj] * tmp1[i, jj]
+    @turbo for j in axes(data_out, 2), i in axes(data_out, 1)
+        res = zero(eltype(data_out))
+        for jj in axes(matrix, 2)
+            res += matrix[j, jj] * tmp1[i, jj]
+        end
+        data_out[i, j] = res
     end
-    data_out[i, j] = res
-  end
 
-  return nothing
+    return nothing
 end
 
 # 2D version, apply matrixJ to dimension J of data_in
 function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 3},
                                  matrix1::AbstractMatrix, matrix2::AbstractMatrix,
-                                 data_in:: AbstractArray{<:Any, 3},
-                                 tmp1=zeros(eltype(data_out), size(data_out, 1), size(matrix1, 1), size(matrix1, 2)))
-
-  # Interpolate in x-direction
-  # @tullio threads=false tmp1[v, i, j]     = matrix1[i, ii] * data_in[v, ii, j]
-  @turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)
-    res = zero(eltype(tmp1))
-    for ii in axes(matrix1, 2)
-      res += matrix1[i, ii] * data_in[v, ii, j]
+                                 data_in::AbstractArray{<:Any, 3},
+                                 tmp1 = zeros(eltype(data_out), size(data_out, 1),
+                                              size(matrix1, 1), size(matrix1, 2)))
+
+    # Interpolate in x-direction
+    # @tullio threads=false tmp1[v, i, j]     = matrix1[i, ii] * data_in[v, ii, j]
+    @turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)
+        res = zero(eltype(tmp1))
+        for ii in axes(matrix1, 2)
+            res += matrix1[i, ii] * data_in[v, ii, j]
+        end
+        tmp1[v, i, j] = res
     end
-    tmp1[v, i, j] = res
-  end
-
-  # Interpolate in y-direction
-  # @tullio threads=false data_out[v, i, j] = matrix2[j, jj] * tmp1[v, i, jj]
-  @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
-    res = zero(eltype(data_out))
-    for jj in axes(matrix2, 2)
-      res += matrix2[j, jj] * tmp1[v, i, jj]
+
+    # Interpolate in y-direction
+    # @tullio threads=false data_out[v, i, j] = matrix2[j, jj] * tmp1[v, i, jj]
+    @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
+        res = zero(eltype(data_out))
+        for jj in axes(matrix2, 2)
+            res += matrix2[j, jj] * tmp1[v, i, jj]
+        end
+        data_out[v, i, j] = res
     end
-    data_out[v, i, j] = res
-  end
 
-  return nothing
+    return nothing
 end
 
 # 2D version, apply matrixJ to dimension J of data_in and add the result to data_out
 function add_multiply_dimensionwise!(data_out::AbstractArray{<:Any, 3},
                                      matrix1::AbstractMatrix, matrix2::AbstractMatrix,
-                                     data_in:: AbstractArray{<:Any, 3},
-                                     tmp1=zeros(eltype(data_out), size(data_out, 1), size(matrix1, 1), size(matrix1, 2)))
-
-  # Interpolate in x-direction
-  # @tullio threads=false tmp1[v, i, j]     = matrix1[i, ii] * data_in[v, ii, j]
-  @turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)
-    res = zero(eltype(tmp1))
-    for ii in axes(matrix1, 2)
-      res += matrix1[i, ii] * data_in[v, ii, j]
+                                     data_in::AbstractArray{<:Any, 3},
+                                     tmp1 = zeros(eltype(data_out), size(data_out, 1),
+                                                  size(matrix1, 1), size(matrix1, 2)))
+
+    # Interpolate in x-direction
+    # @tullio threads=false tmp1[v, i, j]     = matrix1[i, ii] * data_in[v, ii, j]
+    @turbo for j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)
+        res = zero(eltype(tmp1))
+        for ii in axes(matrix1, 2)
+            res += matrix1[i, ii] * data_in[v, ii, j]
+        end
+        tmp1[v, i, j] = res
     end
-    tmp1[v, i, j] = res
-  end
-
-  # Interpolate in y-direction
-  # @tullio threads=false data_out[v, i, j] += matrix2[j, jj] * tmp1[v, i, jj]
-  @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
-    res = zero(eltype(data_out))
-    for jj in axes(matrix2, 2)
-      res += matrix2[j, jj] * tmp1[v, i, jj]
+
+    # Interpolate in y-direction
+    # @tullio threads=false data_out[v, i, j] += matrix2[j, jj] * tmp1[v, i, jj]
+    @turbo for j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
+        res = zero(eltype(data_out))
+        for jj in axes(matrix2, 2)
+            res += matrix2[j, jj] * tmp1[v, i, jj]
+        end
+        data_out[v, i, j] += res
     end
-    data_out[v, i, j] += res
-  end
 
-  return nothing
+    return nothing
 end
 
 # 3D version
 function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 4}, matrix::AbstractMatrix,
-                                 data_in:: AbstractArray{<:Any, 4},
-                                 tmp1=zeros(eltype(data_out), size(data_out, 1), size(matrix, 1), size(matrix, 2), size(matrix, 2)),
-                                 tmp2=zeros(eltype(data_out), size(data_out, 1), size(matrix, 1), size(matrix, 1), size(matrix, 2)))
-
-  # Interpolate in x-direction
-  # @tullio threads=false tmp1[v, i, j, k]     = matrix[i, ii] * data_in[v, ii, j, k]
-  @turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)
-    res = zero(eltype(tmp1))
-    for ii in axes(matrix, 2)
-      res += matrix[i, ii] * data_in[v, ii, j, k]
+                                 data_in::AbstractArray{<:Any, 4},
+                                 tmp1 = zeros(eltype(data_out), size(data_out, 1),
+                                              size(matrix, 1), size(matrix, 2),
+                                              size(matrix, 2)),
+                                 tmp2 = zeros(eltype(data_out), size(data_out, 1),
+                                              size(matrix, 1), size(matrix, 1),
+                                              size(matrix, 2)))
+
+    # Interpolate in x-direction
+    # @tullio threads=false tmp1[v, i, j, k]     = matrix[i, ii] * data_in[v, ii, j, k]
+    @turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2),
+               v in axes(tmp1, 1)
+
+        res = zero(eltype(tmp1))
+        for ii in axes(matrix, 2)
+            res += matrix[i, ii] * data_in[v, ii, j, k]
+        end
+        tmp1[v, i, j, k] = res
     end
-    tmp1[v, i, j, k] = res
-  end
-
-  # Interpolate in y-direction
-  # @tullio threads=false tmp2[v, i, j, k]     = matrix[j, jj] * tmp1[v, i, jj, k]
-  @turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2), v in axes(tmp2, 1)
-    res = zero(eltype(tmp2))
-    for jj in axes(matrix, 2)
-      res += matrix[j, jj] * tmp1[v, i, jj, k]
+
+    # Interpolate in y-direction
+    # @tullio threads=false tmp2[v, i, j, k]     = matrix[j, jj] * tmp1[v, i, jj, k]
+    @turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2),
+               v in axes(tmp2, 1)
+
+        res = zero(eltype(tmp2))
+        for jj in axes(matrix, 2)
+            res += matrix[j, jj] * tmp1[v, i, jj, k]
+        end
+        tmp2[v, i, j, k] = res
     end
-    tmp2[v, i, j, k] = res
-  end
-
-  # Interpolate in z-direction
-  # @tullio threads=false data_out[v, i, j, k] = matrix[k, kk] * tmp2[v, i, j, kk]
-  @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
-    res = zero(eltype(data_out))
-    for kk in axes(matrix, 2)
-      res += matrix[k, kk] * tmp2[v, i, j, kk]
+
+    # Interpolate in z-direction
+    # @tullio threads=false data_out[v, i, j, k] = matrix[k, kk] * tmp2[v, i, j, kk]
+    @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2),
+               v in axes(data_out, 1)
+
+        res = zero(eltype(data_out))
+        for kk in axes(matrix, 2)
+            res += matrix[k, kk] * tmp2[v, i, j, kk]
+        end
+        data_out[v, i, j, k] = res
     end
-    data_out[v, i, j, k] = res
-  end
 
-  return nothing
+    return nothing
 end
 
 # 3D version for scalars
 # Instead of having a leading dimension of size 1 in `data_out, data_in`, this leading dimension
 # of size unity is dropped, resulting in one dimension less than in `multiply_dimensionwise!`.
-function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 3}, matrix::AbstractMatrix,
-                                        data_in:: AbstractArray{<:Any, 3},
-                                        tmp1=zeros(eltype(data_out), size(matrix, 1), size(matrix, 2), size(matrix, 2)),
-                                        tmp2=zeros(eltype(data_out), size(matrix, 1), size(matrix, 1), size(matrix, 2)))
-
-  # Interpolate in x-direction
-  # @tullio threads=false tmp1[i, j, k]     = matrix[i, ii] * data_in[ii, j, k]
-  @turbo for k in axes(tmp1, 3), j in axes(tmp1, 2), i in axes(tmp1, 1)
-    res = zero(eltype(tmp1))
-    for ii in axes(matrix, 2)
-      res += matrix[i, ii] * data_in[ii, j, k]
+function multiply_scalar_dimensionwise!(data_out::AbstractArray{<:Any, 3},
+                                        matrix::AbstractMatrix,
+                                        data_in::AbstractArray{<:Any, 3},
+                                        tmp1 = zeros(eltype(data_out), size(matrix, 1),
+                                                     size(matrix, 2), size(matrix, 2)),
+                                        tmp2 = zeros(eltype(data_out), size(matrix, 1),
+                                                     size(matrix, 1), size(matrix, 2)))
+
+    # Interpolate in x-direction
+    # @tullio threads=false tmp1[i, j, k]     = matrix[i, ii] * data_in[ii, j, k]
+    @turbo for k in axes(tmp1, 3), j in axes(tmp1, 2), i in axes(tmp1, 1)
+        res = zero(eltype(tmp1))
+        for ii in axes(matrix, 2)
+            res += matrix[i, ii] * data_in[ii, j, k]
+        end
+        tmp1[i, j, k] = res
     end
-    tmp1[i, j, k] = res
-  end
-
-  # Interpolate in y-direction
-  # @tullio threads=false tmp2[i, j, k]     = matrix[j, jj] * tmp1[i, jj, k]
-  @turbo for k in axes(tmp2, 3), j in axes(tmp2, 2), i in axes(tmp2, 1)
-    res = zero(eltype(tmp2))
-    for jj in axes(matrix, 2)
-      res += matrix[j, jj] * tmp1[i, jj, k]
+
+    # Interpolate in y-direction
+    # @tullio threads=false tmp2[i, j, k]     = matrix[j, jj] * tmp1[i, jj, k]
+    @turbo for k in axes(tmp2, 3), j in axes(tmp2, 2), i in axes(tmp2, 1)
+        res = zero(eltype(tmp2))
+        for jj in axes(matrix, 2)
+            res += matrix[j, jj] * tmp1[i, jj, k]
+        end
+        tmp2[i, j, k] = res
     end
-    tmp2[i, j, k] = res
-  end
-
-  # Interpolate in z-direction
-  # @tullio threads=false data_out[i, j, k] = matrix[k, kk] * tmp2[i, j, kk]
-  @turbo for k in axes(data_out, 3), j in axes(data_out, 2), i in axes(data_out, 1)
-    res = zero(eltype(data_out))
-    for kk in axes(matrix, 2)
-      res += matrix[k, kk] * tmp2[i, j, kk]
+
+    # Interpolate in z-direction
+    # @tullio threads=false data_out[i, j, k] = matrix[k, kk] * tmp2[i, j, kk]
+    @turbo for k in axes(data_out, 3), j in axes(data_out, 2), i in axes(data_out, 1)
+        res = zero(eltype(data_out))
+        for kk in axes(matrix, 2)
+            res += matrix[k, kk] * tmp2[i, j, kk]
+        end
+        data_out[i, j, k] = res
     end
-    data_out[i, j, k] = res
-  end
 
-  return nothing
+    return nothing
 end
 
 # 3D version, apply matrixJ to dimension J of data_in
 function multiply_dimensionwise!(data_out::AbstractArray{<:Any, 4},
-                                 matrix1::AbstractMatrix, matrix2::AbstractMatrix, matrix3::AbstractMatrix,
-                                 data_in:: AbstractArray{<:Any, 4},
-                                 tmp1=zeros(eltype(data_out), size(data_out, 1), size(matrix1, 1), size(matrix1, 2), size(matrix1, 2)),
-                                 tmp2=zeros(eltype(data_out), size(data_out, 1), size(matrix1, 1), size(matrix1, 1), size(matrix1, 2)))
-
-  # Interpolate in x-direction
-  # @tullio threads=false tmp1[v, i, j, k]     = matrix1[i, ii] * data_in[v, ii, j, k]
-  @turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)
-    res = zero(eltype(tmp1))
-    for ii in axes(matrix1, 2)
-      res += matrix1[i, ii] * data_in[v, ii, j, k]
+                                 matrix1::AbstractMatrix, matrix2::AbstractMatrix,
+                                 matrix3::AbstractMatrix,
+                                 data_in::AbstractArray{<:Any, 4},
+                                 tmp1 = zeros(eltype(data_out), size(data_out, 1),
+                                              size(matrix1, 1), size(matrix1, 2),
+                                              size(matrix1, 2)),
+                                 tmp2 = zeros(eltype(data_out), size(data_out, 1),
+                                              size(matrix1, 1), size(matrix1, 1),
+                                              size(matrix1, 2)))
+
+    # Interpolate in x-direction
+    # @tullio threads=false tmp1[v, i, j, k]     = matrix1[i, ii] * data_in[v, ii, j, k]
+    @turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2),
+               v in axes(tmp1, 1)
+
+        res = zero(eltype(tmp1))
+        for ii in axes(matrix1, 2)
+            res += matrix1[i, ii] * data_in[v, ii, j, k]
+        end
+        tmp1[v, i, j, k] = res
     end
-    tmp1[v, i, j, k] = res
-  end
-
-  # Interpolate in y-direction
-  # @tullio threads=false tmp2[v, i, j, k]     = matrix2[j, jj] * tmp1[v, i, jj, k]
-  @turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2), v in axes(tmp2, 1)
-    res = zero(eltype(tmp1))
-    for jj in axes(matrix2, 2)
-      res += matrix2[j, jj] * tmp1[v, i, jj, k]
+
+    # Interpolate in y-direction
+    # @tullio threads=false tmp2[v, i, j, k]     = matrix2[j, jj] * tmp1[v, i, jj, k]
+    @turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2),
+               v in axes(tmp2, 1)
+
+        res = zero(eltype(tmp1))
+        for jj in axes(matrix2, 2)
+            res += matrix2[j, jj] * tmp1[v, i, jj, k]
+        end
+        tmp2[v, i, j, k] = res
     end
-    tmp2[v, i, j, k] = res
-  end
-
-  # Interpolate in z-direction
-  # @tullio threads=false data_out[v, i, j, k] = matrix3[k, kk] * tmp2[v, i, j, kk]
-  @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
-    res = zero(eltype(data_out))
-    for kk in axes(matrix3, 2)
-      res += matrix3[k, kk] * tmp2[v, i, j, kk]
+
+    # Interpolate in z-direction
+    # @tullio threads=false data_out[v, i, j, k] = matrix3[k, kk] * tmp2[v, i, j, kk]
+    @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2),
+               v in axes(data_out, 1)
+
+        res = zero(eltype(data_out))
+        for kk in axes(matrix3, 2)
+            res += matrix3[k, kk] * tmp2[v, i, j, kk]
+        end
+        data_out[v, i, j, k] = res
     end
-    data_out[v, i, j, k] = res
-  end
 
-  return nothing
+    return nothing
 end
 
 # 3D version, apply matrixJ to dimension J of data_in and add the result to data_out
 function add_multiply_dimensionwise!(data_out::AbstractArray{<:Any, 4},
-                                     matrix1::AbstractMatrix, matrix2::AbstractMatrix, matrix3::AbstractMatrix,
-                                     data_in:: AbstractArray{<:Any, 4},
-                                     tmp1=zeros(eltype(data_out), size(data_out, 1), size(matrix1, 1), size(matrix1, 2), size(matrix1, 2)),
-                                     tmp2=zeros(eltype(data_out), size(data_out, 1), size(matrix1, 1), size(matrix1, 1), size(matrix1, 2)))
-
-  # Interpolate in x-direction
-  # @tullio threads=false tmp1[v, i, j, k]     = matrix1[i, ii] * data_in[v, ii, j, k]
-  @turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2), v in axes(tmp1, 1)
-    res = zero(eltype(tmp1))
-    for ii in axes(matrix1, 2)
-      res += matrix1[i, ii] * data_in[v, ii, j, k]
+                                     matrix1::AbstractMatrix, matrix2::AbstractMatrix,
+                                     matrix3::AbstractMatrix,
+                                     data_in::AbstractArray{<:Any, 4},
+                                     tmp1 = zeros(eltype(data_out), size(data_out, 1),
+                                                  size(matrix1, 1), size(matrix1, 2),
+                                                  size(matrix1, 2)),
+                                     tmp2 = zeros(eltype(data_out), size(data_out, 1),
+                                                  size(matrix1, 1), size(matrix1, 1),
+                                                  size(matrix1, 2)))
+
+    # Interpolate in x-direction
+    # @tullio threads=false tmp1[v, i, j, k]     = matrix1[i, ii] * data_in[v, ii, j, k]
+    @turbo for k in axes(tmp1, 4), j in axes(tmp1, 3), i in axes(tmp1, 2),
+               v in axes(tmp1, 1)
+
+        res = zero(eltype(tmp1))
+        for ii in axes(matrix1, 2)
+            res += matrix1[i, ii] * data_in[v, ii, j, k]
+        end
+        tmp1[v, i, j, k] = res
     end
-    tmp1[v, i, j, k] = res
-  end
-
-  # Interpolate in y-direction
-  # @tullio threads=false tmp2[v, i, j, k]     = matrix2[j, jj] * tmp1[v, i, jj, k]
-  @turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2), v in axes(tmp2, 1)
-    res = zero(eltype(tmp1))
-    for jj in axes(matrix2, 2)
-      res += matrix2[j, jj] * tmp1[v, i, jj, k]
+
+    # Interpolate in y-direction
+    # @tullio threads=false tmp2[v, i, j, k]     = matrix2[j, jj] * tmp1[v, i, jj, k]
+    @turbo for k in axes(tmp2, 4), j in axes(tmp2, 3), i in axes(tmp2, 2),
+               v in axes(tmp2, 1)
+
+        res = zero(eltype(tmp1))
+        for jj in axes(matrix2, 2)
+            res += matrix2[j, jj] * tmp1[v, i, jj, k]
+        end
+        tmp2[v, i, j, k] = res
     end
-    tmp2[v, i, j, k] = res
-  end
-
-  # Interpolate in z-direction
-  # @tullio threads=false data_out[v, i, j, k] += matrix3[k, kk] * tmp2[v, i, j, kk]
-  @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2), v in axes(data_out, 1)
-    res = zero(eltype(data_out))
-    for kk in axes(matrix3, 2)
-      res += matrix3[k, kk] * tmp2[v, i, j, kk]
+
+    # Interpolate in z-direction
+    # @tullio threads=false data_out[v, i, j, k] += matrix3[k, kk] * tmp2[v, i, j, kk]
+    @turbo for k in axes(data_out, 4), j in axes(data_out, 3), i in axes(data_out, 2),
+               v in axes(data_out, 1)
+
+        res = zero(eltype(data_out))
+        for kk in axes(matrix3, 2)
+            res += matrix3[k, kk] * tmp2[v, i, j, kk]
+        end
+        data_out[v, i, j, k] += res
     end
-    data_out[v, i, j, k] += res
-  end
 
-  return nothing
+    return nothing
 end
diff --git a/src/solvers/dgsem/l2projection.jl b/src/solvers/dgsem/l2projection.jl
index 44092b2f720..0bb46f5ca15 100644
--- a/src/solvers/dgsem/l2projection.jl
+++ b/src/solvers/dgsem/l2projection.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # This diagram shows what is meant by "lower", "upper", and "large":
 #      +1   +1
@@ -20,141 +20,135 @@
 #
 # That is, we are only concerned with 2:1 subdivision of a surface/element.
 
-
 # Calculate forward projection matrix for discrete L2 projection from large to upper
 #
 # Note: This is actually an interpolation.
 function calc_forward_upper(n_nodes)
-  # Calculate nodes, weights, and barycentric weights
-  nodes, weights = gauss_lobatto_nodes_weights(n_nodes)
-  wbary = barycentric_weights(nodes)
-
-  # Calculate projection matrix (actually: interpolation)
-  operator = zeros(n_nodes, n_nodes)
-  for j in 1:n_nodes
-    poly = lagrange_interpolating_polynomials(1/2 * (nodes[j] + 1), nodes, wbary)
-    for i in 1:n_nodes
-      operator[j, i] = poly[i]
+    # Calculate nodes, weights, and barycentric weights
+    nodes, weights = gauss_lobatto_nodes_weights(n_nodes)
+    wbary = barycentric_weights(nodes)
+
+    # Calculate projection matrix (actually: interpolation)
+    operator = zeros(n_nodes, n_nodes)
+    for j in 1:n_nodes
+        poly = lagrange_interpolating_polynomials(1 / 2 * (nodes[j] + 1), nodes, wbary)
+        for i in 1:n_nodes
+            operator[j, i] = poly[i]
+        end
     end
-  end
 
-  return operator
+    return operator
 end
 
-
 # Calculate forward projection matrix for discrete L2 projection from large to lower
 #
 # Note: This is actually an interpolation.
 function calc_forward_lower(n_nodes)
-  # Calculate nodes, weights, and barycentric weights
-  nodes, weights = gauss_lobatto_nodes_weights(n_nodes)
-  wbary = barycentric_weights(nodes)
-
-  # Calculate projection matrix (actually: interpolation)
-  operator = zeros(n_nodes, n_nodes)
-  for j in 1:n_nodes
-    poly = lagrange_interpolating_polynomials(1/2 * (nodes[j] - 1), nodes, wbary)
-    for i in 1:n_nodes
-      operator[j, i] = poly[i]
+    # Calculate nodes, weights, and barycentric weights
+    nodes, weights = gauss_lobatto_nodes_weights(n_nodes)
+    wbary = barycentric_weights(nodes)
+
+    # Calculate projection matrix (actually: interpolation)
+    operator = zeros(n_nodes, n_nodes)
+    for j in 1:n_nodes
+        poly = lagrange_interpolating_polynomials(1 / 2 * (nodes[j] - 1), nodes, wbary)
+        for i in 1:n_nodes
+            operator[j, i] = poly[i]
+        end
     end
-  end
 
-  return operator
+    return operator
 end
 
-
 # Calculate reverse projection matrix for discrete L2 projection from upper to large (Gauss version)
 #
 # Note: To not make the L2 projection exact, first convert to Gauss nodes,
 # perform projection, and convert back to Gauss-Lobatto.
 function calc_reverse_upper(n_nodes, ::Val{:gauss})
-  # Calculate nodes, weights, and barycentric weights for Legendre-Gauss
-  gauss_nodes, gauss_weights = gauss_nodes_weights(n_nodes)
-  gauss_wbary = barycentric_weights(gauss_nodes)
-
-  # Calculate projection matrix (actually: discrete L2 projection with errors)
-  operator = zeros(n_nodes, n_nodes)
-  for j in 1:n_nodes
-    poly = lagrange_interpolating_polynomials(1/2 * (gauss_nodes[j] + 1), gauss_nodes, gauss_wbary)
-    for i in 1:n_nodes
-      operator[i, j] = 1/2 * poly[i] * gauss_weights[j]/gauss_weights[i]
+    # Calculate nodes, weights, and barycentric weights for Legendre-Gauss
+    gauss_nodes, gauss_weights = gauss_nodes_weights(n_nodes)
+    gauss_wbary = barycentric_weights(gauss_nodes)
+
+    # Calculate projection matrix (actually: discrete L2 projection with errors)
+    operator = zeros(n_nodes, n_nodes)
+    for j in 1:n_nodes
+        poly = lagrange_interpolating_polynomials(1 / 2 * (gauss_nodes[j] + 1),
+                                                  gauss_nodes, gauss_wbary)
+        for i in 1:n_nodes
+            operator[i, j] = 1 / 2 * poly[i] * gauss_weights[j] / gauss_weights[i]
+        end
     end
-  end
 
-  # Calculate Vandermondes
-  lobatto_nodes, lobatto_weights = gauss_lobatto_nodes_weights(n_nodes)
-  gauss2lobatto = polynomial_interpolation_matrix(gauss_nodes, lobatto_nodes)
-  lobatto2gauss = polynomial_interpolation_matrix(lobatto_nodes, gauss_nodes)
+    # Calculate Vandermondes
+    lobatto_nodes, lobatto_weights = gauss_lobatto_nodes_weights(n_nodes)
+    gauss2lobatto = polynomial_interpolation_matrix(gauss_nodes, lobatto_nodes)
+    lobatto2gauss = polynomial_interpolation_matrix(lobatto_nodes, gauss_nodes)
 
-  return gauss2lobatto * operator * lobatto2gauss
+    return gauss2lobatto * operator * lobatto2gauss
 end
 
-
 # Calculate reverse projection matrix for discrete L2 projection from lower to large (Gauss version)
 #
 # Note: To not make the L2 projection exact, first convert to Gauss nodes,
 # perform projection, and convert back to Gauss-Lobatto.
 function calc_reverse_lower(n_nodes, ::Val{:gauss})
-  # Calculate nodes, weights, and barycentric weights for Legendre-Gauss
-  gauss_nodes, gauss_weights = gauss_nodes_weights(n_nodes)
-  gauss_wbary = barycentric_weights(gauss_nodes)
-
-  # Calculate projection matrix (actually: discrete L2 projection with errors)
-  operator = zeros(n_nodes, n_nodes)
-  for j in 1:n_nodes
-    poly = lagrange_interpolating_polynomials(1/2 * (gauss_nodes[j] - 1), gauss_nodes, gauss_wbary)
-    for i in 1:n_nodes
-      operator[i, j] = 1/2 * poly[i] * gauss_weights[j]/gauss_weights[i]
+    # Calculate nodes, weights, and barycentric weights for Legendre-Gauss
+    gauss_nodes, gauss_weights = gauss_nodes_weights(n_nodes)
+    gauss_wbary = barycentric_weights(gauss_nodes)
+
+    # Calculate projection matrix (actually: discrete L2 projection with errors)
+    operator = zeros(n_nodes, n_nodes)
+    for j in 1:n_nodes
+        poly = lagrange_interpolating_polynomials(1 / 2 * (gauss_nodes[j] - 1),
+                                                  gauss_nodes, gauss_wbary)
+        for i in 1:n_nodes
+            operator[i, j] = 1 / 2 * poly[i] * gauss_weights[j] / gauss_weights[i]
+        end
     end
-  end
 
-  # Calculate Vandermondes
-  lobatto_nodes, lobatto_weights = gauss_lobatto_nodes_weights(n_nodes)
-  gauss2lobatto = polynomial_interpolation_matrix(gauss_nodes, lobatto_nodes)
-  lobatto2gauss = polynomial_interpolation_matrix(lobatto_nodes, gauss_nodes)
+    # Calculate Vandermondes
+    lobatto_nodes, lobatto_weights = gauss_lobatto_nodes_weights(n_nodes)
+    gauss2lobatto = polynomial_interpolation_matrix(gauss_nodes, lobatto_nodes)
+    lobatto2gauss = polynomial_interpolation_matrix(lobatto_nodes, gauss_nodes)
 
-  return gauss2lobatto * operator * lobatto2gauss
+    return gauss2lobatto * operator * lobatto2gauss
 end
 
-
 # Calculate reverse projection matrix for discrete L2 projection from upper to large (Gauss-Lobatto
 # version)
 function calc_reverse_upper(n_nodes, ::Val{:gauss_lobatto})
-  # Calculate nodes, weights, and barycentric weights
-  nodes, weights = gauss_lobatto_nodes_weights(n_nodes)
-  wbary = barycentric_weights(nodes)
-
-  # Calculate projection matrix (actually: discrete L2 projection with errors)
-  operator = zeros(n_nodes, n_nodes)
-  for j in 1:n_nodes
-    poly = lagrange_interpolating_polynomials(1/2 * (nodes[j] + 1), nodes, wbary)
-    for i in 1:n_nodes
-      operator[i, j] = 1/2 * poly[i] * weights[j]/weights[i]
+    # Calculate nodes, weights, and barycentric weights
+    nodes, weights = gauss_lobatto_nodes_weights(n_nodes)
+    wbary = barycentric_weights(nodes)
+
+    # Calculate projection matrix (actually: discrete L2 projection with errors)
+    operator = zeros(n_nodes, n_nodes)
+    for j in 1:n_nodes
+        poly = lagrange_interpolating_polynomials(1 / 2 * (nodes[j] + 1), nodes, wbary)
+        for i in 1:n_nodes
+            operator[i, j] = 1 / 2 * poly[i] * weights[j] / weights[i]
+        end
     end
-  end
 
-  return operator
+    return operator
 end
 
-
 # Calculate reverse projection matrix for discrete L2 projection from lower to large (Gauss-Lobatto
 # version)
 function calc_reverse_lower(n_nodes, ::Val{:gauss_lobatto})
-  # Calculate nodes, weights, and barycentric weights
-  nodes, weights = gauss_lobatto_nodes_weights(n_nodes)
-  wbary = barycentric_weights(nodes)
-
-  # Calculate projection matrix (actually: discrete L2 projection with errors)
-  operator = zeros(n_nodes, n_nodes)
-  for j in 1:n_nodes
-    poly = lagrange_interpolating_polynomials(1/2 * (nodes[j] - 1), nodes, wbary)
-    for i in 1:n_nodes
-      operator[i, j] = 1/2 * poly[i] * weights[j]/weights[i]
+    # Calculate nodes, weights, and barycentric weights
+    nodes, weights = gauss_lobatto_nodes_weights(n_nodes)
+    wbary = barycentric_weights(nodes)
+
+    # Calculate projection matrix (actually: discrete L2 projection with errors)
+    operator = zeros(n_nodes, n_nodes)
+    for j in 1:n_nodes
+        poly = lagrange_interpolating_polynomials(1 / 2 * (nodes[j] - 1), nodes, wbary)
+        for i in 1:n_nodes
+            operator[i, j] = 1 / 2 * poly[i] * weights[j] / weights[i]
+        end
     end
-  end
 
-  return operator
+    return operator
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl
index ba582b0d47e..9b87de777a6 100644
--- a/src/solvers/dgsem_p4est/containers.jl
+++ b/src/solvers/dgsem_p4est/containers.jl
@@ -3,32 +3,41 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
-
-mutable struct P4estElementContainer{NDIMS, RealT<:Real, uEltype<:Real, NDIMSP1, NDIMSP2, NDIMSP3} <: AbstractContainer
-  # Physical coordinates at each node
-  node_coordinates      ::Array{RealT, NDIMSP2}   # [orientation, node_i, node_j, node_k, element]
-  # Jacobian matrix of the transformation
-  # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,...
-  jacobian_matrix       ::Array{RealT, NDIMSP3}
-  # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension)
-  contravariant_vectors ::Array{RealT, NDIMSP3}   # [dimension, index, node_i, node_j, node_k, element]
-  # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix)
-  inverse_jacobian      ::Array{RealT, NDIMSP1}   # [node_i, node_j, node_k, element]
-  # Buffer for calculated surface flux
-  surface_flux_values   ::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element]
-
-  # internal `resize!`able storage
-  _node_coordinates     ::Vector{RealT}
-  _jacobian_matrix      ::Vector{RealT}
-  _contravariant_vectors::Vector{RealT}
-  _inverse_jacobian     ::Vector{RealT}
-  _surface_flux_values  ::Vector{uEltype}
+#! format: noindent
+
+mutable struct P4estElementContainer{NDIMS, RealT <: Real, uEltype <: Real, NDIMSP1,
+                                     NDIMSP2, NDIMSP3} <: AbstractContainer
+    # Physical coordinates at each node
+    node_coordinates::Array{RealT, NDIMSP2}   # [orientation, node_i, node_j, node_k, element]
+    # Jacobian matrix of the transformation
+    # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,...
+    jacobian_matrix::Array{RealT, NDIMSP3}
+    # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension)
+    contravariant_vectors::Array{RealT, NDIMSP3}   # [dimension, index, node_i, node_j, node_k, element]
+    # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix)
+    inverse_jacobian::Array{RealT, NDIMSP1}   # [node_i, node_j, node_k, element]
+    # Buffer for calculated surface flux
+    surface_flux_values::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element]
+
+    # internal `resize!`able storage
+    _node_coordinates::Vector{RealT}
+    _jacobian_matrix::Vector{RealT}
+    _contravariant_vectors::Vector{RealT}
+    _inverse_jacobian::Vector{RealT}
+    _surface_flux_values::Vector{uEltype}
 end
 
-@inline nelements(elements::P4estElementContainer) = size(elements.node_coordinates, ndims(elements) + 2)
-@inline Base.ndims(::P4estElementContainer{NDIMS}) where NDIMS = NDIMS
-@inline Base.eltype(::P4estElementContainer{NDIMS, RealT, uEltype}) where {NDIMS, RealT, uEltype} = uEltype
+@inline function nelements(elements::P4estElementContainer)
+    size(elements.node_coordinates, ndims(elements) + 2)
+end
+@inline Base.ndims(::P4estElementContainer{NDIMS}) where {NDIMS} = NDIMS
+@inline function Base.eltype(::P4estElementContainer{NDIMS, RealT, uEltype}) where {
+                                                                                    NDIMS,
+                                                                                    RealT,
+                                                                                    uEltype
+                                                                                    }
+    uEltype
+end
 
 # Only one-dimensional `Array`s are `resize!`able in Julia.
 # Hence, we use `Vector`s as internal storage and `resize!`
@@ -36,247 +45,268 @@ end
 # `unsafe_wrap`ping multi-dimensional `Array`s around the
 # internal storage.
 function Base.resize!(elements::P4estElementContainer, capacity)
-  @unpack _node_coordinates, _jacobian_matrix, _contravariant_vectors,
+    @unpack _node_coordinates, _jacobian_matrix, _contravariant_vectors,
     _inverse_jacobian, _surface_flux_values = elements
 
-  n_dims = ndims(elements)
-  n_nodes = size(elements.node_coordinates, 2)
-  n_variables = size(elements.surface_flux_values, 1)
+    n_dims = ndims(elements)
+    n_nodes = size(elements.node_coordinates, 2)
+    n_variables = size(elements.surface_flux_values, 1)
 
-  resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity)
-  elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-    (n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity))
+    resize!(_node_coordinates, n_dims * n_nodes^n_dims * capacity)
+    elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                            (n_dims, ntuple(_ -> n_nodes, n_dims)...,
+                                             capacity))
 
-  resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity)
-  elements.jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix),
-    (n_dims, n_dims, ntuple(_ -> n_nodes, n_dims)..., capacity))
+    resize!(_jacobian_matrix, n_dims^2 * n_nodes^n_dims * capacity)
+    elements.jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix),
+                                           (n_dims, n_dims,
+                                            ntuple(_ -> n_nodes, n_dims)..., capacity))
 
-  resize!(_contravariant_vectors, length(_jacobian_matrix))
-  elements.contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors),
-    size(elements.jacobian_matrix))
+    resize!(_contravariant_vectors, length(_jacobian_matrix))
+    elements.contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors),
+                                                 size(elements.jacobian_matrix))
 
-  resize!(_inverse_jacobian, n_nodes^n_dims * capacity)
-  elements.inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian),
-    (ntuple(_ -> n_nodes, n_dims)..., capacity))
+    resize!(_inverse_jacobian, n_nodes^n_dims * capacity)
+    elements.inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian),
+                                            (ntuple(_ -> n_nodes, n_dims)..., capacity))
 
-  resize!(_surface_flux_values,
-    n_variables * n_nodes^(n_dims-1) * (n_dims*2) * capacity)
-  elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
-    (n_variables, ntuple(_ -> n_nodes, n_dims-1)..., n_dims*2, capacity))
+    resize!(_surface_flux_values,
+            n_variables * n_nodes^(n_dims - 1) * (n_dims * 2) * capacity)
+    elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
+                                               (n_variables,
+                                                ntuple(_ -> n_nodes, n_dims - 1)...,
+                                                n_dims * 2, capacity))
 
-  return nothing
+    return nothing
 end
 
-
 # Create element container and initialize element data
 function init_elements(mesh::P4estMesh{NDIMS, RealT}, equations,
-                       basis, ::Type{uEltype}) where {NDIMS, RealT<:Real, uEltype<:Real}
-  nelements = ncells(mesh)
-
-  _node_coordinates = Vector{RealT}(undef, NDIMS * nnodes(basis)^NDIMS * nelements)
-  node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-    (NDIMS, ntuple(_ -> nnodes(basis), NDIMS)..., nelements))
-
-  _jacobian_matrix = Vector{RealT}(undef, NDIMS^2 * nnodes(basis)^NDIMS * nelements)
-  jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix),
-    (NDIMS, NDIMS, ntuple(_ -> nnodes(basis), NDIMS)..., nelements))
-
-  _contravariant_vectors = similar(_jacobian_matrix)
-  contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors),
-    size(jacobian_matrix))
-
-  _inverse_jacobian = Vector{RealT}(undef, nnodes(basis)^NDIMS * nelements)
-  inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian),
-    (ntuple(_ -> nnodes(basis), NDIMS)..., nelements))
-
-  _surface_flux_values = Vector{uEltype}(undef,
-    nvariables(equations) * nnodes(basis)^(NDIMS-1) * (NDIMS*2) * nelements)
-  surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
-    (nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS-1)..., NDIMS*2, nelements))
-
-  elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMS+1, NDIMS+2, NDIMS+3}(
-    node_coordinates, jacobian_matrix, contravariant_vectors,
-    inverse_jacobian, surface_flux_values,
-    _node_coordinates, _jacobian_matrix, _contravariant_vectors,
-    _inverse_jacobian, _surface_flux_values)
-
-  init_elements!(elements, mesh, basis)
-  return elements
+                       basis,
+                       ::Type{uEltype}) where {NDIMS, RealT <: Real, uEltype <: Real}
+    nelements = ncells(mesh)
+
+    _node_coordinates = Vector{RealT}(undef, NDIMS * nnodes(basis)^NDIMS * nelements)
+    node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                   (NDIMS, ntuple(_ -> nnodes(basis), NDIMS)...,
+                                    nelements))
+
+    _jacobian_matrix = Vector{RealT}(undef, NDIMS^2 * nnodes(basis)^NDIMS * nelements)
+    jacobian_matrix = unsafe_wrap(Array, pointer(_jacobian_matrix),
+                                  (NDIMS, NDIMS, ntuple(_ -> nnodes(basis), NDIMS)...,
+                                   nelements))
+
+    _contravariant_vectors = similar(_jacobian_matrix)
+    contravariant_vectors = unsafe_wrap(Array, pointer(_contravariant_vectors),
+                                        size(jacobian_matrix))
+
+    _inverse_jacobian = Vector{RealT}(undef, nnodes(basis)^NDIMS * nelements)
+    inverse_jacobian = unsafe_wrap(Array, pointer(_inverse_jacobian),
+                                   (ntuple(_ -> nnodes(basis), NDIMS)..., nelements))
+
+    _surface_flux_values = Vector{uEltype}(undef,
+                                           nvariables(equations) *
+                                           nnodes(basis)^(NDIMS - 1) * (NDIMS * 2) *
+                                           nelements)
+    surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
+                                      (nvariables(equations),
+                                       ntuple(_ -> nnodes(basis), NDIMS - 1)...,
+                                       NDIMS * 2, nelements))
+
+    elements = P4estElementContainer{NDIMS, RealT, uEltype, NDIMS + 1, NDIMS + 2,
+                                     NDIMS + 3}(node_coordinates, jacobian_matrix,
+                                                contravariant_vectors,
+                                                inverse_jacobian, surface_flux_values,
+                                                _node_coordinates, _jacobian_matrix,
+                                                _contravariant_vectors,
+                                                _inverse_jacobian, _surface_flux_values)
+
+    init_elements!(elements, mesh, basis)
+    return elements
 end
 
+mutable struct P4estInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <:
+               AbstractContainer
+    u::Array{uEltype, NDIMSP2}       # [primary/secondary, variable, i, j, interface]
+    neighbor_ids::Matrix{Int}                   # [primary/secondary, interface]
+    node_indices::Matrix{NTuple{NDIMS, Symbol}} # [primary/secondary, interface]
 
-mutable struct P4estInterfaceContainer{NDIMS, uEltype<:Real, NDIMSP2} <: AbstractContainer
-  u             ::Array{uEltype, NDIMSP2}       # [primary/secondary, variable, i, j, interface]
-  neighbor_ids  ::Matrix{Int}                   # [primary/secondary, interface]
-  node_indices  ::Matrix{NTuple{NDIMS, Symbol}} # [primary/secondary, interface]
-
-  # internal `resize!`able storage
-  _u            ::Vector{uEltype}
-  _neighbor_ids  ::Vector{Int}
-  _node_indices ::Vector{NTuple{NDIMS, Symbol}}
+    # internal `resize!`able storage
+    _u::Vector{uEltype}
+    _neighbor_ids::Vector{Int}
+    _node_indices::Vector{NTuple{NDIMS, Symbol}}
 end
 
-@inline ninterfaces(interfaces::P4estInterfaceContainer) = size(interfaces.neighbor_ids, 2)
-@inline Base.ndims(::P4estInterfaceContainer{NDIMS}) where NDIMS = NDIMS
+@inline function ninterfaces(interfaces::P4estInterfaceContainer)
+    size(interfaces.neighbor_ids, 2)
+end
+@inline Base.ndims(::P4estInterfaceContainer{NDIMS}) where {NDIMS} = NDIMS
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(interfaces::P4estInterfaceContainer, capacity)
-  @unpack _u, _neighbor_ids, _node_indices = interfaces
+    @unpack _u, _neighbor_ids, _node_indices = interfaces
 
-  n_dims = ndims(interfaces)
-  n_nodes = size(interfaces.u, 3)
-  n_variables = size(interfaces.u, 2)
+    n_dims = ndims(interfaces)
+    n_nodes = size(interfaces.u, 3)
+    n_variables = size(interfaces.u, 2)
 
-  resize!(_u, 2 * n_variables * n_nodes^(n_dims-1) * capacity)
-  interfaces.u = unsafe_wrap(Array, pointer(_u),
-    (2, n_variables, ntuple(_ -> n_nodes, n_dims-1)..., capacity))
+    resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity)
+    interfaces.u = unsafe_wrap(Array, pointer(_u),
+                               (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)...,
+                                capacity))
 
-  resize!(_neighbor_ids, 2 * capacity)
-  interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, capacity))
+    resize!(_neighbor_ids, 2 * capacity)
+    interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, capacity))
 
-  resize!(_node_indices, 2 * capacity)
-  interfaces.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity))
+    resize!(_node_indices, 2 * capacity)
+    interfaces.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity))
 
-  return nothing
+    return nothing
 end
 
-
 # Create interface container and initialize interface data.
 function init_interfaces(mesh::P4estMesh, equations, basis, elements)
-  NDIMS = ndims(elements)
-  uEltype = eltype(elements)
+    NDIMS = ndims(elements)
+    uEltype = eltype(elements)
 
-  # Initialize container
-  n_interfaces = count_required_surfaces(mesh).interfaces
+    # Initialize container
+    n_interfaces = count_required_surfaces(mesh).interfaces
 
-  _u = Vector{uEltype}(undef, 2 * nvariables(equations) * nnodes(basis)^(NDIMS-1) * n_interfaces)
-  u = unsafe_wrap(Array, pointer(_u),
-    (2, nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS-1)..., n_interfaces))
+    _u = Vector{uEltype}(undef,
+                         2 * nvariables(equations) * nnodes(basis)^(NDIMS - 1) *
+                         n_interfaces)
+    u = unsafe_wrap(Array, pointer(_u),
+                    (2, nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS - 1)...,
+                     n_interfaces))
 
-  _neighbor_ids = Vector{Int}(undef, 2 * n_interfaces)
-  neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, n_interfaces))
+    _neighbor_ids = Vector{Int}(undef, 2 * n_interfaces)
+    neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2, n_interfaces))
 
-  _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_interfaces)
-  node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces))
+    _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_interfaces)
+    node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_interfaces))
 
-  interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS+2}(u, neighbor_ids, node_indices,
-                                                                _u, _neighbor_ids, _node_indices)
+    interfaces = P4estInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u, neighbor_ids,
+                                                                    node_indices,
+                                                                    _u, _neighbor_ids,
+                                                                    _node_indices)
 
-  init_interfaces!(interfaces, mesh)
+    init_interfaces!(interfaces, mesh)
 
-  return interfaces
+    return interfaces
 end
 
-
 function init_interfaces!(interfaces, mesh::P4estMesh)
-  init_surfaces!(interfaces, nothing, nothing, mesh)
+    init_surfaces!(interfaces, nothing, nothing, mesh)
 
-  return interfaces
+    return interfaces
 end
 
+mutable struct P4estBoundaryContainer{NDIMS, uEltype <: Real, NDIMSP1} <:
+               AbstractContainer
+    u::Array{uEltype, NDIMSP1}       # [variables, i, j, boundary]
+    neighbor_ids::Vector{Int}                   # [boundary]
+    node_indices::Vector{NTuple{NDIMS, Symbol}} # [boundary]
+    name::Vector{Symbol}                # [boundary]
 
-mutable struct P4estBoundaryContainer{NDIMS, uEltype<:Real, NDIMSP1} <: AbstractContainer
-  u           ::Array{uEltype, NDIMSP1}       # [variables, i, j, boundary]
-  neighbor_ids::Vector{Int}                   # [boundary]
-  node_indices::Vector{NTuple{NDIMS, Symbol}} # [boundary]
-  name        ::Vector{Symbol}                # [boundary]
-
-  # internal `resize!`able storage
-  _u          ::Vector{uEltype}
+    # internal `resize!`able storage
+    _u::Vector{uEltype}
 end
 
-@inline nboundaries(boundaries::P4estBoundaryContainer) = length(boundaries.neighbor_ids)
-@inline Base.ndims(::P4estBoundaryContainer{NDIMS}) where NDIMS = NDIMS
+@inline function nboundaries(boundaries::P4estBoundaryContainer)
+    length(boundaries.neighbor_ids)
+end
+@inline Base.ndims(::P4estBoundaryContainer{NDIMS}) where {NDIMS} = NDIMS
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(boundaries::P4estBoundaryContainer, capacity)
-  @unpack _u, neighbor_ids, node_indices, name = boundaries
+    @unpack _u, neighbor_ids, node_indices, name = boundaries
 
-  n_dims = ndims(boundaries)
-  n_nodes = size(boundaries.u, 2)
-  n_variables = size(boundaries.u, 1)
+    n_dims = ndims(boundaries)
+    n_nodes = size(boundaries.u, 2)
+    n_variables = size(boundaries.u, 1)
 
-  resize!(_u, n_variables * n_nodes^(n_dims-1) * capacity)
-  boundaries.u = unsafe_wrap(Array, pointer(_u),
-    (n_variables, ntuple(_ -> n_nodes, n_dims-1)..., capacity))
+    resize!(_u, n_variables * n_nodes^(n_dims - 1) * capacity)
+    boundaries.u = unsafe_wrap(Array, pointer(_u),
+                               (n_variables, ntuple(_ -> n_nodes, n_dims - 1)...,
+                                capacity))
 
-  resize!(neighbor_ids, capacity)
+    resize!(neighbor_ids, capacity)
 
-  resize!(node_indices, capacity)
+    resize!(node_indices, capacity)
 
-  resize!(name, capacity)
+    resize!(name, capacity)
 
-  return nothing
+    return nothing
 end
 
-
 # Create interface container and initialize interface data in `elements`.
 function init_boundaries(mesh::P4estMesh, equations, basis, elements)
-  NDIMS = ndims(elements)
-  uEltype = eltype(elements)
+    NDIMS = ndims(elements)
+    uEltype = eltype(elements)
 
-  # Initialize container
-  n_boundaries = count_required_surfaces(mesh).boundaries
+    # Initialize container
+    n_boundaries = count_required_surfaces(mesh).boundaries
 
-  _u = Vector{uEltype}(undef, nvariables(equations) * nnodes(basis)^(NDIMS-1) * n_boundaries)
-  u = unsafe_wrap(Array, pointer(_u),
-    (nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS-1)..., n_boundaries))
+    _u = Vector{uEltype}(undef,
+                         nvariables(equations) * nnodes(basis)^(NDIMS - 1) *
+                         n_boundaries)
+    u = unsafe_wrap(Array, pointer(_u),
+                    (nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS - 1)...,
+                     n_boundaries))
 
-  neighbor_ids  = Vector{Int}(undef, n_boundaries)
-  node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_boundaries)
-  names        = Vector{Symbol}(undef, n_boundaries)
+    neighbor_ids = Vector{Int}(undef, n_boundaries)
+    node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_boundaries)
+    names = Vector{Symbol}(undef, n_boundaries)
 
-  boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS+1}(u, neighbor_ids,
-                                                               node_indices, names, _u)
+    boundaries = P4estBoundaryContainer{NDIMS, uEltype, NDIMS + 1}(u, neighbor_ids,
+                                                                   node_indices, names,
+                                                                   _u)
 
-  if n_boundaries > 0
-    init_boundaries!(boundaries, mesh)
-  end
+    if n_boundaries > 0
+        init_boundaries!(boundaries, mesh)
+    end
 
-  return boundaries
+    return boundaries
 end
 
-
 function init_boundaries!(boundaries, mesh::P4estMesh)
-  init_surfaces!(nothing, nothing, boundaries, mesh)
+    init_surfaces!(nothing, nothing, boundaries, mesh)
 
-  return boundaries
+    return boundaries
 end
 
-
 # Function barrier for type stability
 function init_boundaries_iter_face_inner(info, boundaries, boundary_id, mesh)
-  # Extract boundary data
-  side = unsafe_load_side(info)
-  # Get local tree, one-based indexing
-  tree = unsafe_load_tree(mesh.p4est, side.treeid + 1)
-  # Quadrant numbering offset of this quadrant
-  offset = tree.quadrants_offset
+    # Extract boundary data
+    side = unsafe_load_side(info)
+    # Get local tree, one-based indexing
+    tree = unsafe_load_tree(mesh.p4est, side.treeid + 1)
+    # Quadrant numbering offset of this quadrant
+    offset = tree.quadrants_offset
 
-  # Verify before accessing is.full, but this should never happen
-  @assert side.is_hanging == false
+    # Verify before accessing is.full, but this should never happen
+    @assert side.is_hanging == false
 
-  local_quad_id = side.is.full.quadid
-  # Global ID of this quad
-  quad_id = offset + local_quad_id
+    local_quad_id = side.is.full.quadid
+    # Global ID of this quad
+    quad_id = offset + local_quad_id
 
-  # Write data to boundaries container
-  # `p4est` uses zero-based indexing; convert to one-based indexing
-  boundaries.neighbor_ids[boundary_id] = quad_id + 1
+    # Write data to boundaries container
+    # `p4est` uses zero-based indexing; convert to one-based indexing
+    boundaries.neighbor_ids[boundary_id] = quad_id + 1
 
-  # Face at which the boundary lies
-  face = side.face
+    # Face at which the boundary lies
+    face = side.face
 
-  # Save boundaries.node_indices dimension specific in containers_[23]d.jl
-  init_boundary_node_indices!(boundaries, face, boundary_id)
+    # Save boundaries.node_indices dimension specific in containers_[23]d.jl
+    init_boundary_node_indices!(boundaries, face, boundary_id)
 
-  # One-based indexing
-  boundaries.name[boundary_id] = mesh.boundary_names[face + 1, side.treeid + 1]
+    # One-based indexing
+    boundaries.name[boundary_id] = mesh.boundary_names[face + 1, side.treeid + 1]
 
-  return nothing
+    return nothing
 end
 
-
 # Container data structure (structure-of-arrays style) for DG L2 mortars
 #
 # The positions used in `neighbor_ids` are 1:3 (in 2D) or 1:5 (in 3D), where 1:2 (in 2D)
@@ -302,380 +332,391 @@ end
 # │ └─────────────┴─────────────┘  └───────────────────────────┘
 # │
 # ⋅────> ξ
-mutable struct P4estMortarContainer{NDIMS, uEltype<:Real, NDIMSP1, NDIMSP3} <: AbstractContainer
-  u             ::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar]
-  neighbor_ids  ::Matrix{Int}             # [position, mortar]
-  node_indices  ::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar]
-
-  # internal `resize!`able storage
-  _u            ::Vector{uEltype}
-  _neighbor_ids ::Vector{Int}
-  _node_indices ::Vector{NTuple{NDIMS, Symbol}}
+mutable struct P4estMortarContainer{NDIMS, uEltype <: Real, NDIMSP1, NDIMSP3} <:
+               AbstractContainer
+    u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar]
+    neighbor_ids::Matrix{Int}             # [position, mortar]
+    node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar]
+
+    # internal `resize!`able storage
+    _u::Vector{uEltype}
+    _neighbor_ids::Vector{Int}
+    _node_indices::Vector{NTuple{NDIMS, Symbol}}
 end
 
 @inline nmortars(mortars::P4estMortarContainer) = size(mortars.neighbor_ids, 2)
-@inline Base.ndims(::P4estMortarContainer{NDIMS}) where NDIMS = NDIMS
+@inline Base.ndims(::P4estMortarContainer{NDIMS}) where {NDIMS} = NDIMS
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(mortars::P4estMortarContainer, capacity)
-  @unpack _u, _neighbor_ids, _node_indices = mortars
+    @unpack _u, _neighbor_ids, _node_indices = mortars
 
-  n_dims = ndims(mortars)
-  n_nodes = size(mortars.u, 4)
-  n_variables = size(mortars.u, 2)
+    n_dims = ndims(mortars)
+    n_nodes = size(mortars.u, 4)
+    n_variables = size(mortars.u, 2)
 
-  resize!(_u, 2 * n_variables * 2^(n_dims-1) * n_nodes^(n_dims-1) * capacity)
-  mortars.u = unsafe_wrap(Array, pointer(_u),
-    (2, n_variables, 2^(n_dims-1), ntuple(_ -> n_nodes, n_dims-1)..., capacity))
+    resize!(_u, 2 * n_variables * 2^(n_dims - 1) * n_nodes^(n_dims - 1) * capacity)
+    mortars.u = unsafe_wrap(Array, pointer(_u),
+                            (2, n_variables, 2^(n_dims - 1),
+                             ntuple(_ -> n_nodes, n_dims - 1)..., capacity))
 
-  resize!(_neighbor_ids, (2^(n_dims-1) + 1) * capacity)
-  mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
-    (2^(n_dims-1) + 1, capacity))
+    resize!(_neighbor_ids, (2^(n_dims - 1) + 1) * capacity)
+    mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+                                       (2^(n_dims - 1) + 1, capacity))
 
-  resize!(_node_indices, 2 * capacity)
-  mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity))
+    resize!(_node_indices, 2 * capacity)
+    mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity))
 
-  return nothing
+    return nothing
 end
 
-
 # Create mortar container and initialize mortar data.
 function init_mortars(mesh::P4estMesh, equations, basis, elements)
-  NDIMS = ndims(elements)
-  uEltype = eltype(elements)
-
-  # Initialize container
-  n_mortars = count_required_surfaces(mesh).mortars
-
-  _u = Vector{uEltype}(undef,
-    2 * nvariables(equations) * 2^(NDIMS-1) * nnodes(basis)^(NDIMS-1) * n_mortars)
-  u = unsafe_wrap(Array, pointer(_u),
-    (2, nvariables(equations), 2^(NDIMS-1), ntuple(_ -> nnodes(basis), NDIMS-1)..., n_mortars))
-
-  _neighbor_ids = Vector{Int}(undef, (2^(NDIMS-1) + 1) * n_mortars)
-  neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids), (2^(NDIMS-1) + 1, n_mortars))
-
-  _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mortars)
-  node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars))
-
-  mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS+1, NDIMS+3}(u, neighbor_ids, node_indices,
-                                                                   _u, _neighbor_ids, _node_indices)
-
-  if n_mortars > 0
-    init_mortars!(mortars, mesh)
-  end
+    NDIMS = ndims(elements)
+    uEltype = eltype(elements)
+
+    # Initialize container
+    n_mortars = count_required_surfaces(mesh).mortars
+
+    _u = Vector{uEltype}(undef,
+                         2 * nvariables(equations) * 2^(NDIMS - 1) *
+                         nnodes(basis)^(NDIMS - 1) * n_mortars)
+    u = unsafe_wrap(Array, pointer(_u),
+                    (2, nvariables(equations), 2^(NDIMS - 1),
+                     ntuple(_ -> nnodes(basis), NDIMS - 1)..., n_mortars))
+
+    _neighbor_ids = Vector{Int}(undef, (2^(NDIMS - 1) + 1) * n_mortars)
+    neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+                               (2^(NDIMS - 1) + 1, n_mortars))
+
+    _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mortars)
+    node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mortars))
+
+    mortars = P4estMortarContainer{NDIMS, uEltype, NDIMS + 1, NDIMS + 3}(u,
+                                                                         neighbor_ids,
+                                                                         node_indices,
+                                                                         _u,
+                                                                         _neighbor_ids,
+                                                                         _node_indices)
+
+    if n_mortars > 0
+        init_mortars!(mortars, mesh)
+    end
 
-  return mortars
+    return mortars
 end
 
-
 function init_mortars!(mortars, mesh::P4estMesh)
-  init_surfaces!(nothing, mortars, nothing, mesh)
+    init_surfaces!(nothing, mortars, nothing, mesh)
 
-  return mortars
+    return mortars
 end
 
-
 function reinitialize_containers!(mesh::P4estMesh, equations, dg::DGSEM, cache)
-  # Re-initialize elements container
-  @unpack elements = cache
-  resize!(elements, ncells(mesh))
-  init_elements!(elements, mesh, dg.basis)
+    # Re-initialize elements container
+    @unpack elements = cache
+    resize!(elements, ncells(mesh))
+    init_elements!(elements, mesh, dg.basis)
 
-  required = count_required_surfaces(mesh)
+    required = count_required_surfaces(mesh)
 
-  # resize interfaces container
-  @unpack interfaces = cache
-  resize!(interfaces, required.interfaces)
+    # resize interfaces container
+    @unpack interfaces = cache
+    resize!(interfaces, required.interfaces)
 
-  # resize boundaries container
-  @unpack boundaries = cache
-  resize!(boundaries, required.boundaries)
+    # resize boundaries container
+    @unpack boundaries = cache
+    resize!(boundaries, required.boundaries)
 
-  # resize mortars container
-  @unpack mortars = cache
-  resize!(mortars, required.mortars)
+    # resize mortars container
+    @unpack mortars = cache
+    resize!(mortars, required.mortars)
 
-  # re-initialize containers together to reduce
-  # the number of iterations over the mesh in `p4est`
-  init_surfaces!(interfaces, mortars, boundaries, mesh)
+    # re-initialize containers together to reduce
+    # the number of iterations over the mesh in `p4est`
+    init_surfaces!(interfaces, mortars, boundaries, mesh)
 end
 
-
 # A helper struct used in initialization methods below
 mutable struct InitSurfacesIterFaceUserData{Interfaces, Mortars, Boundaries, Mesh}
-  interfaces  ::Interfaces
-  interface_id::Int
-  mortars     ::Mortars
-  mortar_id   ::Int
-  boundaries  ::Boundaries
-  boundary_id ::Int
-  mesh        ::Mesh
+    interfaces::Interfaces
+    interface_id::Int
+    mortars::Mortars
+    mortar_id::Int
+    boundaries::Boundaries
+    boundary_id::Int
+    mesh::Mesh
 end
 
 function InitSurfacesIterFaceUserData(interfaces, mortars, boundaries, mesh)
-  return InitSurfacesIterFaceUserData{
-    typeof(interfaces), typeof(mortars), typeof(boundaries), typeof(mesh)}(
-      interfaces, 1, mortars, 1, boundaries, 1, mesh)
+    return InitSurfacesIterFaceUserData{
+                                        typeof(interfaces), typeof(mortars),
+                                        typeof(boundaries), typeof(mesh)}(interfaces, 1,
+                                                                          mortars, 1,
+                                                                          boundaries, 1,
+                                                                          mesh)
 end
 
 function init_surfaces_iter_face(info, user_data)
-  # Unpack user_data
-  data = unsafe_pointer_to_objref(Ptr{InitSurfacesIterFaceUserData}(user_data))
+    # Unpack user_data
+    data = unsafe_pointer_to_objref(Ptr{InitSurfacesIterFaceUserData}(user_data))
 
-  # Function barrier because the unpacked user_data above is type-unstable
-  init_surfaces_iter_face_inner(info, data)
+    # Function barrier because the unpacked user_data above is type-unstable
+    init_surfaces_iter_face_inner(info, data)
 end
 
 # 2D
-cfunction(::typeof(init_surfaces_iter_face), ::Val{2}) = @cfunction(init_surfaces_iter_face, Cvoid, (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(init_surfaces_iter_face), ::Val{2})
+    @cfunction(init_surfaces_iter_face, Cvoid,
+               (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid}))
+end
 # 3D
-cfunction(::typeof(init_surfaces_iter_face), ::Val{3}) = @cfunction(init_surfaces_iter_face, Cvoid, (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(init_surfaces_iter_face), ::Val{3})
+    @cfunction(init_surfaces_iter_face, Cvoid,
+               (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid}))
+end
 
 # Function barrier for type stability
 function init_surfaces_iter_face_inner(info, user_data)
-  @unpack interfaces, mortars, boundaries = user_data
-  elem_count = unsafe_load(info).sides.elem_count
-
-  if elem_count == 2
-    # Two neighboring elements => Interface or mortar
-
-    # Extract surface data
-    sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
-
-    if sides[1].is_hanging == false && sides[2].is_hanging == false
-      # No hanging nodes => normal interface
-      if interfaces !== nothing
-        init_interfaces_iter_face_inner(info, sides, user_data)
-      end
-    else
-      # Hanging nodes => mortar
-      if mortars !== nothing
-        init_mortars_iter_face_inner(info, sides, user_data)
-      end
-    end
-  elseif elem_count == 1
-    # One neighboring elements => boundary
-    if boundaries !== nothing
-      init_boundaries_iter_face_inner(info, user_data)
+    @unpack interfaces, mortars, boundaries = user_data
+    elem_count = unsafe_load(info).sides.elem_count
+
+    if elem_count == 2
+        # Two neighboring elements => Interface or mortar
+
+        # Extract surface data
+        sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
+
+        if sides[1].is_hanging == false && sides[2].is_hanging == false
+            # No hanging nodes => normal interface
+            if interfaces !== nothing
+                init_interfaces_iter_face_inner(info, sides, user_data)
+            end
+        else
+            # Hanging nodes => mortar
+            if mortars !== nothing
+                init_mortars_iter_face_inner(info, sides, user_data)
+            end
+        end
+    elseif elem_count == 1
+        # One neighboring elements => boundary
+        if boundaries !== nothing
+            init_boundaries_iter_face_inner(info, user_data)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 function init_surfaces!(interfaces, mortars, boundaries, mesh::P4estMesh)
-  # Let `p4est` iterate over all interfaces and call init_surfaces_iter_face
-  iter_face_c = cfunction(init_surfaces_iter_face, Val(ndims(mesh)))
-  user_data = InitSurfacesIterFaceUserData(
-    interfaces, mortars, boundaries, mesh)
+    # Let `p4est` iterate over all interfaces and call init_surfaces_iter_face
+    iter_face_c = cfunction(init_surfaces_iter_face, Val(ndims(mesh)))
+    user_data = InitSurfacesIterFaceUserData(interfaces, mortars, boundaries, mesh)
 
-  iterate_p4est(mesh.p4est, user_data; iter_face_c=iter_face_c)
+    iterate_p4est(mesh.p4est, user_data; iter_face_c = iter_face_c)
 
-  return interfaces
+    return interfaces
 end
 
-
 # Initialization of interfaces after the function barrier
 function init_interfaces_iter_face_inner(info, sides, user_data)
-  @unpack interfaces, interface_id, mesh = user_data
-  user_data.interface_id += 1
-
-  # Get Tuple of local trees, one-based indexing
-  trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1),
-           unsafe_load_tree(mesh.p4est, sides[2].treeid + 1))
-  # Quadrant numbering offsets of the quadrants at this interface
-  offsets = SVector(trees[1].quadrants_offset,
-                    trees[2].quadrants_offset)
-
-  local_quad_ids = SVector(sides[1].is.full.quadid, sides[2].is.full.quadid)
-  # Global IDs of the neighboring quads
-  quad_ids = offsets + local_quad_ids
-
-  # Write data to interfaces container
-  # `p4est` uses zero-based indexing; convert to one-based indexing
-  interfaces.neighbor_ids[1, interface_id] = quad_ids[1] + 1
-  interfaces.neighbor_ids[2, interface_id] = quad_ids[2] + 1
-
-  # Face at which the interface lies
-  faces = (sides[1].face, sides[2].face)
+    @unpack interfaces, interface_id, mesh = user_data
+    user_data.interface_id += 1
+
+    # Get Tuple of local trees, one-based indexing
+    trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1),
+             unsafe_load_tree(mesh.p4est, sides[2].treeid + 1))
+    # Quadrant numbering offsets of the quadrants at this interface
+    offsets = SVector(trees[1].quadrants_offset,
+                      trees[2].quadrants_offset)
+
+    local_quad_ids = SVector(sides[1].is.full.quadid, sides[2].is.full.quadid)
+    # Global IDs of the neighboring quads
+    quad_ids = offsets + local_quad_ids
+
+    # Write data to interfaces container
+    # `p4est` uses zero-based indexing; convert to one-based indexing
+    interfaces.neighbor_ids[1, interface_id] = quad_ids[1] + 1
+    interfaces.neighbor_ids[2, interface_id] = quad_ids[2] + 1
+
+    # Face at which the interface lies
+    faces = (sides[1].face, sides[2].face)
 
-  # Save interfaces.node_indices dimension specific in containers_[23]d.jl
-  init_interface_node_indices!(interfaces, faces,
-    unsafe_load(info).orientation, interface_id)
+    # Save interfaces.node_indices dimension specific in containers_[23]d.jl
+    init_interface_node_indices!(interfaces, faces,
+                                 unsafe_load(info).orientation, interface_id)
 
-  return nothing
+    return nothing
 end
 
-
 # Initialization of boundaries after the function barrier
 function init_boundaries_iter_face_inner(info, user_data)
-  @unpack boundaries, boundary_id, mesh = user_data
-  user_data.boundary_id += 1
+    @unpack boundaries, boundary_id, mesh = user_data
+    user_data.boundary_id += 1
 
-  # Extract boundary data
-  side = unsafe_load_side(info)
-  # Get local tree, one-based indexing
-  tree = unsafe_load_tree(mesh.p4est, side.treeid + 1)
-  # Quadrant numbering offset of this quadrant
-  offset = tree.quadrants_offset
+    # Extract boundary data
+    side = unsafe_load_side(info)
+    # Get local tree, one-based indexing
+    tree = unsafe_load_tree(mesh.p4est, side.treeid + 1)
+    # Quadrant numbering offset of this quadrant
+    offset = tree.quadrants_offset
 
-  # Verify before accessing is.full, but this should never happen
-  @assert side.is_hanging == false
+    # Verify before accessing is.full, but this should never happen
+    @assert side.is_hanging == false
 
-  local_quad_id = side.is.full.quadid
-  # Global ID of this quad
-  quad_id = offset + local_quad_id
+    local_quad_id = side.is.full.quadid
+    # Global ID of this quad
+    quad_id = offset + local_quad_id
 
-  # Write data to boundaries container
-  # `p4est` uses zero-based indexing; convert to one-based indexing
-  boundaries.neighbor_ids[boundary_id] = quad_id + 1
+    # Write data to boundaries container
+    # `p4est` uses zero-based indexing; convert to one-based indexing
+    boundaries.neighbor_ids[boundary_id] = quad_id + 1
 
-  # Face at which the boundary lies
-  face = side.face
+    # Face at which the boundary lies
+    face = side.face
 
-  # Save boundaries.node_indices dimension specific in containers_[23]d.jl
-  init_boundary_node_indices!(boundaries, face, boundary_id)
+    # Save boundaries.node_indices dimension specific in containers_[23]d.jl
+    init_boundary_node_indices!(boundaries, face, boundary_id)
 
-  # One-based indexing
-  boundaries.name[boundary_id] = mesh.boundary_names[face + 1, side.treeid + 1]
+    # One-based indexing
+    boundaries.name[boundary_id] = mesh.boundary_names[face + 1, side.treeid + 1]
 
-  return nothing
+    return nothing
 end
 
-
 # Initialization of mortars after the function barrier
 function init_mortars_iter_face_inner(info, sides, user_data)
-  @unpack mortars, mortar_id, mesh = user_data
-  user_data.mortar_id += 1
-
-  # Get Tuple of local trees, one-based indexing
-  trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1),
-           unsafe_load_tree(mesh.p4est, sides[2].treeid + 1))
-  # Quadrant numbering offsets of the quadrants at this interface
-  offsets = SVector(trees[1].quadrants_offset,
-                    trees[2].quadrants_offset)
-
-  if sides[1].is_hanging == true
-    # Left is small, right is large
-    faces = (sides[1].face, sides[2].face)
-
-    local_small_quad_ids = sides[1].is.hanging.quadid
-    # Global IDs of the two small quads
-    small_quad_ids = offsets[1] .+ local_small_quad_ids
-
-    # Just be sure before accessing is.full
-    @assert sides[2].is_hanging == false
-    large_quad_id = offsets[2] + sides[2].is.full.quadid
-  else # sides[2].is_hanging == true
-    # Right is small, left is large.
-    # init_mortar_node_indices! below expects side 1 to contain the small elements.
-    faces = (sides[2].face, sides[1].face)
-
-    local_small_quad_ids = sides[2].is.hanging.quadid
-    # Global IDs of the two small quads
-    small_quad_ids = offsets[2] .+ local_small_quad_ids
-
-    # Just be sure before accessing is.full
-    @assert sides[1].is_hanging == false
-    large_quad_id = offsets[1] + sides[1].is.full.quadid
-  end
+    @unpack mortars, mortar_id, mesh = user_data
+    user_data.mortar_id += 1
+
+    # Get Tuple of local trees, one-based indexing
+    trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1),
+             unsafe_load_tree(mesh.p4est, sides[2].treeid + 1))
+    # Quadrant numbering offsets of the quadrants at this interface
+    offsets = SVector(trees[1].quadrants_offset,
+                      trees[2].quadrants_offset)
+
+    if sides[1].is_hanging == true
+        # Left is small, right is large
+        faces = (sides[1].face, sides[2].face)
+
+        local_small_quad_ids = sides[1].is.hanging.quadid
+        # Global IDs of the two small quads
+        small_quad_ids = offsets[1] .+ local_small_quad_ids
+
+        # Just be sure before accessing is.full
+        @assert sides[2].is_hanging == false
+        large_quad_id = offsets[2] + sides[2].is.full.quadid
+    else # sides[2].is_hanging == true
+        # Right is small, left is large.
+        # init_mortar_node_indices! below expects side 1 to contain the small elements.
+        faces = (sides[2].face, sides[1].face)
+
+        local_small_quad_ids = sides[2].is.hanging.quadid
+        # Global IDs of the two small quads
+        small_quad_ids = offsets[2] .+ local_small_quad_ids
+
+        # Just be sure before accessing is.full
+        @assert sides[1].is_hanging == false
+        large_quad_id = offsets[1] + sides[1].is.full.quadid
+    end
 
-  # Write data to mortar container, 1 and 2 are the small elements
-  # `p4est` uses zero-based indexing; convert to one-based indexing
-  mortars.neighbor_ids[1:end-1, mortar_id] .= small_quad_ids[:] .+ 1
-  # Last entry is the large element
-  mortars.neighbor_ids[end, mortar_id] = large_quad_id + 1
+    # Write data to mortar container, 1 and 2 are the small elements
+    # `p4est` uses zero-based indexing; convert to one-based indexing
+    mortars.neighbor_ids[1:(end - 1), mortar_id] .= small_quad_ids[:] .+ 1
+    # Last entry is the large element
+    mortars.neighbor_ids[end, mortar_id] = large_quad_id + 1
 
-  init_mortar_node_indices!(mortars, faces, unsafe_load(info).orientation, mortar_id)
+    init_mortar_node_indices!(mortars, faces, unsafe_load(info).orientation, mortar_id)
 
-  return nothing
+    return nothing
 end
 
-
 # Iterate over all interfaces and count
 # - (inner) interfaces
 # - mortars
 # - boundaries
 # and collect the numbers in `user_data` in this order.
 function count_surfaces_iter_face(info, user_data)
-  elem_count = unsafe_load(info).sides.elem_count
-
-  if elem_count == 2
-    # Two neighboring elements => Interface or mortar
-
-    # Extract surface data
-    sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
-
-    if sides[1].is_hanging == false && sides[2].is_hanging == false
-      # No hanging nodes => normal interface
-      # Unpack user_data = [interface_count] and increment interface_count
-      ptr = Ptr{Int}(user_data)
-      id = unsafe_load(ptr, 1)
-      unsafe_store!(ptr, id + 1, 1)
-    else
-      # Hanging nodes => mortar
-      # Unpack user_data = [mortar_count] and increment mortar_count
-      ptr = Ptr{Int}(user_data)
-      id = unsafe_load(ptr, 2)
-      unsafe_store!(ptr, id + 1, 2)
+    elem_count = unsafe_load(info).sides.elem_count
+
+    if elem_count == 2
+        # Two neighboring elements => Interface or mortar
+
+        # Extract surface data
+        sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
+
+        if sides[1].is_hanging == false && sides[2].is_hanging == false
+            # No hanging nodes => normal interface
+            # Unpack user_data = [interface_count] and increment interface_count
+            ptr = Ptr{Int}(user_data)
+            id = unsafe_load(ptr, 1)
+            unsafe_store!(ptr, id + 1, 1)
+        else
+            # Hanging nodes => mortar
+            # Unpack user_data = [mortar_count] and increment mortar_count
+            ptr = Ptr{Int}(user_data)
+            id = unsafe_load(ptr, 2)
+            unsafe_store!(ptr, id + 1, 2)
+        end
+    elseif elem_count == 1
+        # One neighboring elements => boundary
+
+        # Unpack user_data = [boundary_count] and increment boundary_count
+        ptr = Ptr{Int}(user_data)
+        id = unsafe_load(ptr, 3)
+        unsafe_store!(ptr, id + 1, 3)
     end
-  elseif elem_count == 1
-    # One neighboring elements => boundary
-
-    # Unpack user_data = [boundary_count] and increment boundary_count
-    ptr = Ptr{Int}(user_data)
-    id = unsafe_load(ptr, 3)
-    unsafe_store!(ptr, id + 1, 3)
-  end
 
-  return nothing
+    return nothing
 end
 
 # 2D
-cfunction(::typeof(count_surfaces_iter_face), ::Val{2}) = @cfunction(count_surfaces_iter_face, Cvoid, (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(count_surfaces_iter_face), ::Val{2})
+    @cfunction(count_surfaces_iter_face, Cvoid,
+               (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid}))
+end
 # 3D
-cfunction(::typeof(count_surfaces_iter_face), ::Val{3}) = @cfunction(count_surfaces_iter_face, Cvoid, (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(count_surfaces_iter_face), ::Val{3})
+    @cfunction(count_surfaces_iter_face, Cvoid,
+               (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid}))
+end
 
 function count_required_surfaces(mesh::P4estMesh)
-  # Let `p4est` iterate over all interfaces and call count_surfaces_iter_face
-  iter_face_c = cfunction(count_surfaces_iter_face, Val(ndims(mesh)))
+    # Let `p4est` iterate over all interfaces and call count_surfaces_iter_face
+    iter_face_c = cfunction(count_surfaces_iter_face, Val(ndims(mesh)))
 
-  # interfaces, mortars, boundaries
-  user_data = [0, 0, 0]
+    # interfaces, mortars, boundaries
+    user_data = [0, 0, 0]
 
-  iterate_p4est(mesh.p4est, user_data; iter_face_c=iter_face_c)
+    iterate_p4est(mesh.p4est, user_data; iter_face_c = iter_face_c)
 
-  # Return counters
-  return (interfaces = user_data[1],
-          mortars    = user_data[2],
-          boundaries = user_data[3])
+    # Return counters
+    return (interfaces = user_data[1],
+            mortars = user_data[2],
+            boundaries = user_data[3])
 end
 
-
 # Return direction of the face, which is indexed by node_indices
 @inline function indices2direction(indices)
-  if indices[1] === :begin
-    return 1
-  elseif indices[1] === :end
-    return 2
-  elseif indices[2] === :begin
-    return 3
-  elseif indices[2] === :end
-    return 4
-  elseif indices[3] === :begin
-    return 5
-  else # if indices[3] === :end
-    return 6
-  end
+    if indices[1] === :begin
+        return 1
+    elseif indices[1] === :end
+        return 2
+    elseif indices[2] === :begin
+        return 3
+    elseif indices[2] === :end
+        return 4
+    elseif indices[3] === :begin
+        return 5
+    else # if indices[3] === :end
+        return 6
+    end
 end
 
-
-
 include("containers_2d.jl")
 include("containers_3d.jl")
 include("containers_parallel.jl")
 include("containers_parallel_2d.jl")
 include("containers_parallel_3d.jl")
-
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/containers_2d.jl b/src/solvers/dgsem_p4est/containers_2d.jl
index cf18e433ff7..4f7d903897a 100644
--- a/src/solvers/dgsem_p4est/containers_2d.jl
+++ b/src/solvers/dgsem_p4est/containers_2d.jl
@@ -3,174 +3,170 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Initialize data structures in element container
 function init_elements!(elements, mesh::P4estMesh{2}, basis::LobattoLegendreBasis)
-  @unpack node_coordinates, jacobian_matrix,
-          contravariant_vectors, inverse_jacobian = elements
+    @unpack node_coordinates, jacobian_matrix,
+    contravariant_vectors, inverse_jacobian = elements
 
-  calc_node_coordinates!(node_coordinates, mesh, basis)
+    calc_node_coordinates!(node_coordinates, mesh, basis)
 
-  for element in 1:ncells(mesh)
-    calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis)
+    for element in 1:ncells(mesh)
+        calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis)
 
-    calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix)
+        calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix)
 
-    calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix)
-  end
+        calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 # Interpolate tree_node_coordinates to each quadrant at the nodes of the specified basis
 function calc_node_coordinates!(node_coordinates,
                                 mesh::P4estMesh{2},
                                 basis::LobattoLegendreBasis)
-  # Hanging nodes will cause holes in the mesh if its polydeg is higher
-  # than the polydeg of the solver.
-  @assert length(basis.nodes) >= length(mesh.nodes) "The solver can't have a lower polydeg than the mesh"
+    # Hanging nodes will cause holes in the mesh if its polydeg is higher
+    # than the polydeg of the solver.
+    @assert length(basis.nodes)>=length(mesh.nodes) "The solver can't have a lower polydeg than the mesh"
 
-  calc_node_coordinates!(node_coordinates, mesh, basis.nodes)
+    calc_node_coordinates!(node_coordinates, mesh, basis.nodes)
 end
 
 # Interpolate tree_node_coordinates to each quadrant at the specified nodes
 function calc_node_coordinates!(node_coordinates,
                                 mesh::P4estMesh{2},
                                 nodes::AbstractVector)
-  # We use `StrideArray`s here since these buffers are used in performance-critical
-  # places and the additional information passed to the compiler makes them faster
-  # than native `Array`s.
-  tmp1    = StrideArray(undef, real(mesh),
-                        StaticInt(2), static_length(nodes), static_length(mesh.nodes))
-  matrix1 = StrideArray(undef, real(mesh),
-                        static_length(nodes), static_length(mesh.nodes))
-  matrix2 = similar(matrix1)
-  baryweights_in = barycentric_weights(mesh.nodes)
-
-  # Macros from `p4est`
-  p4est_root_len = 1 << P4EST_MAXLEVEL
-  p4est_quadrant_len(l) = 1 << (P4EST_MAXLEVEL - l)
-
-  trees = unsafe_wrap_sc(p4est_tree_t, unsafe_load(mesh.p4est).trees)
-
-  for tree in eachindex(trees)
-    offset = trees[tree].quadrants_offset
-    quadrants = unsafe_wrap_sc(p4est_quadrant_t, trees[tree].quadrants)
-
-    for i in eachindex(quadrants)
-      element = offset + i
-      quad = quadrants[i]
-
-      quad_length = p4est_quadrant_len(quad.level) / p4est_root_len
-
-      nodes_out_x = 2 * (quad_length * 1/2 * (nodes .+ 1) .+ quad.x / p4est_root_len) .- 1
-      nodes_out_y = 2 * (quad_length * 1/2 * (nodes .+ 1) .+ quad.y / p4est_root_len) .- 1
-      polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x, baryweights_in)
-      polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y, baryweights_in)
-
-      multiply_dimensionwise!(
-        view(node_coordinates, :, :, :, element),
-        matrix1, matrix2,
-        view(mesh.tree_node_coordinates, :, :, :, tree),
-        tmp1
-      )
+    # We use `StrideArray`s here since these buffers are used in performance-critical
+    # places and the additional information passed to the compiler makes them faster
+    # than native `Array`s.
+    tmp1 = StrideArray(undef, real(mesh),
+                       StaticInt(2), static_length(nodes), static_length(mesh.nodes))
+    matrix1 = StrideArray(undef, real(mesh),
+                          static_length(nodes), static_length(mesh.nodes))
+    matrix2 = similar(matrix1)
+    baryweights_in = barycentric_weights(mesh.nodes)
+
+    # Macros from `p4est`
+    p4est_root_len = 1 << P4EST_MAXLEVEL
+    p4est_quadrant_len(l) = 1 << (P4EST_MAXLEVEL - l)
+
+    trees = unsafe_wrap_sc(p4est_tree_t, unsafe_load(mesh.p4est).trees)
+
+    for tree in eachindex(trees)
+        offset = trees[tree].quadrants_offset
+        quadrants = unsafe_wrap_sc(p4est_quadrant_t, trees[tree].quadrants)
+
+        for i in eachindex(quadrants)
+            element = offset + i
+            quad = quadrants[i]
+
+            quad_length = p4est_quadrant_len(quad.level) / p4est_root_len
+
+            nodes_out_x = 2 * (quad_length * 1 / 2 * (nodes .+ 1) .+
+                           quad.x / p4est_root_len) .- 1
+            nodes_out_y = 2 * (quad_length * 1 / 2 * (nodes .+ 1) .+
+                           quad.y / p4est_root_len) .- 1
+            polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x,
+                                             baryweights_in)
+            polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y,
+                                             baryweights_in)
+
+            multiply_dimensionwise!(view(node_coordinates, :, :, :, element),
+                                    matrix1, matrix2,
+                                    view(mesh.tree_node_coordinates, :, :, :, tree),
+                                    tmp1)
+        end
     end
-  end
 
-  return node_coordinates
+    return node_coordinates
 end
 
-
 # Initialize node_indices of interface container
 @inline function init_interface_node_indices!(interfaces::P4estInterfaceContainer{2},
                                               faces, orientation, interface_id)
-  # Iterate over primary and secondary element
-  for side in 1:2
-    # Align interface in positive coordinate direction of primary element.
-    # For orientation == 1, the secondary element needs to be indexed backwards
-    # relative to the interface.
-    if side == 1 || orientation == 0
-      # Forward indexing
-      i = :i_forward
-    else
-      # Backward indexing
-      i = :i_backward
+    # Iterate over primary and secondary element
+    for side in 1:2
+        # Align interface in positive coordinate direction of primary element.
+        # For orientation == 1, the secondary element needs to be indexed backwards
+        # relative to the interface.
+        if side == 1 || orientation == 0
+            # Forward indexing
+            i = :i_forward
+        else
+            # Backward indexing
+            i = :i_backward
+        end
+
+        if faces[side] == 0
+            # Index face in negative x-direction
+            interfaces.node_indices[side, interface_id] = (:begin, i)
+        elseif faces[side] == 1
+            # Index face in positive x-direction
+            interfaces.node_indices[side, interface_id] = (:end, i)
+        elseif faces[side] == 2
+            # Index face in negative y-direction
+            interfaces.node_indices[side, interface_id] = (i, :begin)
+        else # faces[side] == 3
+            # Index face in positive y-direction
+            interfaces.node_indices[side, interface_id] = (i, :end)
+        end
     end
 
-    if faces[side] == 0
-      # Index face in negative x-direction
-      interfaces.node_indices[side, interface_id] = (:begin, i)
-    elseif faces[side] == 1
-      # Index face in positive x-direction
-      interfaces.node_indices[side, interface_id] = (:end, i)
-    elseif faces[side] == 2
-      # Index face in negative y-direction
-      interfaces.node_indices[side, interface_id] = (i, :begin)
-    else # faces[side] == 3
-      # Index face in positive y-direction
-      interfaces.node_indices[side, interface_id] = (i, :end)
-    end
-  end
-
-  return interfaces
+    return interfaces
 end
 
-
 # Initialize node_indices of boundary container
 @inline function init_boundary_node_indices!(boundaries::P4estBoundaryContainer{2},
                                              face, boundary_id)
-  if face == 0
-    # Index face in negative x-direction
-    boundaries.node_indices[boundary_id] = (:begin, :i_forward)
-  elseif face == 1
-    # Index face in positive x-direction
-    boundaries.node_indices[boundary_id] = (:end, :i_forward)
-  elseif face == 2
-    # Index face in negative y-direction
-    boundaries.node_indices[boundary_id] = (:i_forward, :begin)
-  else # face == 3
-    # Index face in positive y-direction
-    boundaries.node_indices[boundary_id] = (:i_forward, :end)
-  end
-
-  return boundaries
-end
+    if face == 0
+        # Index face in negative x-direction
+        boundaries.node_indices[boundary_id] = (:begin, :i_forward)
+    elseif face == 1
+        # Index face in positive x-direction
+        boundaries.node_indices[boundary_id] = (:end, :i_forward)
+    elseif face == 2
+        # Index face in negative y-direction
+        boundaries.node_indices[boundary_id] = (:i_forward, :begin)
+    else # face == 3
+        # Index face in positive y-direction
+        boundaries.node_indices[boundary_id] = (:i_forward, :end)
+    end
 
+    return boundaries
+end
 
 # Initialize node_indices of mortar container
 # faces[1] is expected to be the face of the small side.
 @inline function init_mortar_node_indices!(mortars, faces, orientation, mortar_id)
-  for side in 1:2
-    # Align mortar in positive coordinate direction of small side.
-    # For orientation == 1, the large side needs to be indexed backwards
-    # relative to the mortar.
-    if side == 1 || orientation == 0
-      # Forward indexing for small side or orientation == 0
-      i = :i_forward
-    else
-      # Backward indexing for large side with reversed orientation
-      i = :i_backward
+    for side in 1:2
+        # Align mortar in positive coordinate direction of small side.
+        # For orientation == 1, the large side needs to be indexed backwards
+        # relative to the mortar.
+        if side == 1 || orientation == 0
+            # Forward indexing for small side or orientation == 0
+            i = :i_forward
+        else
+            # Backward indexing for large side with reversed orientation
+            i = :i_backward
+        end
+
+        if faces[side] == 0
+            # Index face in negative x-direction
+            mortars.node_indices[side, mortar_id] = (:begin, i)
+        elseif faces[side] == 1
+            # Index face in positive x-direction
+            mortars.node_indices[side, mortar_id] = (:end, i)
+        elseif faces[side] == 2
+            # Index face in negative y-direction
+            mortars.node_indices[side, mortar_id] = (i, :begin)
+        else # faces[side] == 3
+            # Index face in positive y-direction
+            mortars.node_indices[side, mortar_id] = (i, :end)
+        end
     end
 
-    if faces[side] == 0
-      # Index face in negative x-direction
-      mortars.node_indices[side, mortar_id] = (:begin, i)
-    elseif faces[side] == 1
-      # Index face in positive x-direction
-      mortars.node_indices[side, mortar_id] = (:end, i)
-    elseif faces[side] == 2
-      # Index face in negative y-direction
-      mortars.node_indices[side, mortar_id] = (i, :begin)
-    else # faces[side] == 3
-      # Index face in positive y-direction
-      mortars.node_indices[side, mortar_id] = (i, :end)
-    end
-  end
-
-  return mortars
+    return mortars
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/containers_3d.jl b/src/solvers/dgsem_p4est/containers_3d.jl
index f5bffece227..6cdc2cf9611 100644
--- a/src/solvers/dgsem_p4est/containers_3d.jl
+++ b/src/solvers/dgsem_p4est/containers_3d.jl
@@ -3,315 +3,327 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Initialize data structures in element container
 function init_elements!(elements, mesh::P4estMesh{3}, basis::LobattoLegendreBasis)
-  @unpack node_coordinates, jacobian_matrix,
-          contravariant_vectors, inverse_jacobian = elements
+    @unpack node_coordinates, jacobian_matrix,
+    contravariant_vectors, inverse_jacobian = elements
 
-  calc_node_coordinates!(node_coordinates, mesh, basis)
+    calc_node_coordinates!(node_coordinates, mesh, basis)
 
-  for element in 1:ncells(mesh)
-    calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis)
+    for element in 1:ncells(mesh)
+        calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis)
 
-    calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix,
-                                node_coordinates, basis)
+        calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix,
+                                    node_coordinates, basis)
 
-    calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix, basis)
-  end
+        calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix, basis)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 # Interpolate tree_node_coordinates to each quadrant at the nodes of the specified basis
 function calc_node_coordinates!(node_coordinates,
                                 mesh::P4estMesh{3},
                                 basis::LobattoLegendreBasis)
-  # Hanging nodes will cause holes in the mesh if its polydeg is higher
-  # than the polydeg of the solver.
-  @assert length(basis.nodes) >= length(mesh.nodes) "The solver can't have a lower polydeg than the mesh"
+    # Hanging nodes will cause holes in the mesh if its polydeg is higher
+    # than the polydeg of the solver.
+    @assert length(basis.nodes)>=length(mesh.nodes) "The solver can't have a lower polydeg than the mesh"
 
-  calc_node_coordinates!(node_coordinates, mesh, basis.nodes)
+    calc_node_coordinates!(node_coordinates, mesh, basis.nodes)
 end
 
 # Interpolate tree_node_coordinates to each quadrant at the specified nodes
 function calc_node_coordinates!(node_coordinates,
                                 mesh::P4estMesh{3},
                                 nodes::AbstractVector)
-  # Macros from `p4est`
-  p4est_root_len = 1 << P4EST_MAXLEVEL
-  p4est_quadrant_len(l) = 1 << (P4EST_MAXLEVEL - l)
+    # Macros from `p4est`
+    p4est_root_len = 1 << P4EST_MAXLEVEL
+    p4est_quadrant_len(l) = 1 << (P4EST_MAXLEVEL - l)
 
-  trees = unsafe_wrap_sc(p8est_tree_t, unsafe_load(mesh.p4est).trees)
+    trees = unsafe_wrap_sc(p8est_tree_t, unsafe_load(mesh.p4est).trees)
 
-  for tree in eachindex(trees)
-    offset = trees[tree].quadrants_offset
-    quadrants = unsafe_wrap_sc(p8est_quadrant_t, trees[tree].quadrants)
+    for tree in eachindex(trees)
+        offset = trees[tree].quadrants_offset
+        quadrants = unsafe_wrap_sc(p8est_quadrant_t, trees[tree].quadrants)
 
-    for i in eachindex(quadrants)
-      element = offset + i
-      quad = quadrants[i]
+        for i in eachindex(quadrants)
+            element = offset + i
+            quad = quadrants[i]
 
-      quad_length = p4est_quadrant_len(quad.level) / p4est_root_len
+            quad_length = p4est_quadrant_len(quad.level) / p4est_root_len
 
-      nodes_out_x = 2 * (quad_length * 1/2 * (nodes .+ 1) .+ quad.x / p4est_root_len) .- 1
-      nodes_out_y = 2 * (quad_length * 1/2 * (nodes .+ 1) .+ quad.y / p4est_root_len) .- 1
-      nodes_out_z = 2 * (quad_length * 1/2 * (nodes .+ 1) .+ quad.z / p4est_root_len) .- 1
+            nodes_out_x = 2 * (quad_length * 1 / 2 * (nodes .+ 1) .+
+                           quad.x / p4est_root_len) .- 1
+            nodes_out_y = 2 * (quad_length * 1 / 2 * (nodes .+ 1) .+
+                           quad.y / p4est_root_len) .- 1
+            nodes_out_z = 2 * (quad_length * 1 / 2 * (nodes .+ 1) .+
+                           quad.z / p4est_root_len) .- 1
 
-      matrix1 = polynomial_interpolation_matrix(mesh.nodes, nodes_out_x)
-      matrix2 = polynomial_interpolation_matrix(mesh.nodes, nodes_out_y)
-      matrix3 = polynomial_interpolation_matrix(mesh.nodes, nodes_out_z)
+            matrix1 = polynomial_interpolation_matrix(mesh.nodes, nodes_out_x)
+            matrix2 = polynomial_interpolation_matrix(mesh.nodes, nodes_out_y)
+            matrix3 = polynomial_interpolation_matrix(mesh.nodes, nodes_out_z)
 
-      multiply_dimensionwise!(
-        view(node_coordinates, :, :, :, :, element),
-        matrix1, matrix2, matrix3,
-        view(mesh.tree_node_coordinates, :, :, :, :, tree)
-      )
+            multiply_dimensionwise!(view(node_coordinates, :, :, :, :, element),
+                                    matrix1, matrix2, matrix3,
+                                    view(mesh.tree_node_coordinates, :, :, :, :, tree))
+        end
     end
-  end
 
-  return node_coordinates
+    return node_coordinates
 end
 
-
 # Initialize node_indices of interface container
 @inline function init_interface_node_indices!(interfaces::P4estInterfaceContainer{3},
                                               faces, orientation, interface_id)
-  # Iterate over primary and secondary element
-  for side in 1:2
-    # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)).
-    # The secondary element needs to be indexed differently.
-    if side == 1
-      surface_index1 = :i_forward
-      surface_index2 = :j_forward
-    else
-      surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], orientation)
+    # Iterate over primary and secondary element
+    for side in 1:2
+        # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)).
+        # The secondary element needs to be indexed differently.
+        if side == 1
+            surface_index1 = :i_forward
+            surface_index2 = :j_forward
+        else
+            surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2],
+                                                                          faces[1],
+                                                                          orientation)
+        end
+
+        if faces[side] == 0
+            # Index face in negative x-direction
+            interfaces.node_indices[side, interface_id] = (:begin, surface_index1,
+                                                           surface_index2)
+        elseif faces[side] == 1
+            # Index face in positive x-direction
+            interfaces.node_indices[side, interface_id] = (:end, surface_index1,
+                                                           surface_index2)
+        elseif faces[side] == 2
+            # Index face in negative y-direction
+            interfaces.node_indices[side, interface_id] = (surface_index1, :begin,
+                                                           surface_index2)
+        elseif faces[side] == 3
+            # Index face in positive y-direction
+            interfaces.node_indices[side, interface_id] = (surface_index1, :end,
+                                                           surface_index2)
+        elseif faces[side] == 4
+            # Index face in negative z-direction
+            interfaces.node_indices[side, interface_id] = (surface_index1,
+                                                           surface_index2, :begin)
+        else # faces[side] == 5
+            # Index face in positive z-direction
+            interfaces.node_indices[side, interface_id] = (surface_index1,
+                                                           surface_index2, :end)
+        end
     end
 
-    if faces[side] == 0
-      # Index face in negative x-direction
-      interfaces.node_indices[side, interface_id] = (:begin, surface_index1, surface_index2)
-    elseif faces[side] == 1
-      # Index face in positive x-direction
-      interfaces.node_indices[side, interface_id] = (:end, surface_index1, surface_index2)
-    elseif faces[side] == 2
-      # Index face in negative y-direction
-      interfaces.node_indices[side, interface_id] = (surface_index1, :begin, surface_index2)
-    elseif faces[side] == 3
-      # Index face in positive y-direction
-      interfaces.node_indices[side, interface_id] = (surface_index1, :end, surface_index2)
-    elseif faces[side] == 4
-      # Index face in negative z-direction
-      interfaces.node_indices[side, interface_id] = (surface_index1, surface_index2, :begin)
-    else # faces[side] == 5
-      # Index face in positive z-direction
-      interfaces.node_indices[side, interface_id] = (surface_index1, surface_index2, :end)
-    end
-  end
-
-  return interfaces
+    return interfaces
 end
 
-
 # Initialize node_indices of boundary container
 @inline function init_boundary_node_indices!(boundaries::P4estBoundaryContainer{3},
                                              face, boundary_id)
-  if face == 0
-    # Index face in negative x-direction
-    boundaries.node_indices[boundary_id] = (:begin, :i_forward, :j_forward)
-  elseif face == 1
-    # Index face in positive x-direction
-    boundaries.node_indices[boundary_id] = (:end, :i_forward, :j_forward)
-  elseif face == 2
-    # Index face in negative y-direction
-    boundaries.node_indices[boundary_id] = (:i_forward, :begin, :j_forward)
-  elseif face == 3
-    # Index face in positive y-direction
-    boundaries.node_indices[boundary_id] = (:i_forward, :end, :j_forward)
-  elseif face == 4
-    # Index face in negative z-direction
-    boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :begin)
-  else # face == 5
-    # Index face in positive z-direction
-    boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :end)
-  end
+    if face == 0
+        # Index face in negative x-direction
+        boundaries.node_indices[boundary_id] = (:begin, :i_forward, :j_forward)
+    elseif face == 1
+        # Index face in positive x-direction
+        boundaries.node_indices[boundary_id] = (:end, :i_forward, :j_forward)
+    elseif face == 2
+        # Index face in negative y-direction
+        boundaries.node_indices[boundary_id] = (:i_forward, :begin, :j_forward)
+    elseif face == 3
+        # Index face in positive y-direction
+        boundaries.node_indices[boundary_id] = (:i_forward, :end, :j_forward)
+    elseif face == 4
+        # Index face in negative z-direction
+        boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :begin)
+    else # face == 5
+        # Index face in positive z-direction
+        boundaries.node_indices[boundary_id] = (:i_forward, :j_forward, :end)
+    end
 
-  return boundaries
+    return boundaries
 end
 
-
 # Initialize node_indices of mortar container
 # faces[1] is expected to be the face of the small side.
 @inline function init_mortar_node_indices!(mortars::P4estMortarContainer{3},
                                            faces, orientation, mortar_id)
-  for side in 1:2
-    # Align mortar at small side.
-    # The large side needs to be indexed differently.
-    if side == 1
-      surface_index1 = :i_forward
-      surface_index2 = :j_forward
-    else
-      surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], orientation)
+    for side in 1:2
+        # Align mortar at small side.
+        # The large side needs to be indexed differently.
+        if side == 1
+            surface_index1 = :i_forward
+            surface_index2 = :j_forward
+        else
+            surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2],
+                                                                          faces[1],
+                                                                          orientation)
+        end
+
+        if faces[side] == 0
+            # Index face in negative x-direction
+            mortars.node_indices[side, mortar_id] = (:begin, surface_index1,
+                                                     surface_index2)
+        elseif faces[side] == 1
+            # Index face in positive x-direction
+            mortars.node_indices[side, mortar_id] = (:end, surface_index1,
+                                                     surface_index2)
+        elseif faces[side] == 2
+            # Index face in negative y-direction
+            mortars.node_indices[side, mortar_id] = (surface_index1, :begin,
+                                                     surface_index2)
+        elseif faces[side] == 3
+            # Index face in positive y-direction
+            mortars.node_indices[side, mortar_id] = (surface_index1, :end,
+                                                     surface_index2)
+        elseif faces[side] == 4
+            # Index face in negative z-direction
+            mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2,
+                                                     :begin)
+        else # faces[side] == 5
+            # Index face in positive z-direction
+            mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2,
+                                                     :end)
+        end
     end
 
-    if faces[side] == 0
-      # Index face in negative x-direction
-      mortars.node_indices[side, mortar_id] = (:begin, surface_index1, surface_index2)
-    elseif faces[side] == 1
-      # Index face in positive x-direction
-      mortars.node_indices[side, mortar_id] = (:end, surface_index1, surface_index2)
-    elseif faces[side] == 2
-      # Index face in negative y-direction
-      mortars.node_indices[side, mortar_id] = (surface_index1, :begin, surface_index2)
-    elseif faces[side] == 3
-      # Index face in positive y-direction
-      mortars.node_indices[side, mortar_id] = (surface_index1, :end, surface_index2)
-    elseif faces[side] == 4
-      # Index face in negative z-direction
-      mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, :begin)
-    else # faces[side] == 5
-      # Index face in positive z-direction
-      mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, :end)
-    end
-  end
-
-  return mortars
+    return mortars
 end
 
-
 # Convert `p4est` orientation code to node indices.
 # Return node indices that index "my side" wrt "other side",
 # i.e., i and j are indices of other side.
 function orientation_to_indices_p4est(my_face, other_face, orientation_code)
-  # my_face and other_face are the face directions (zero-based)
-  # of "my side" and "other side" respectively.
-  # Face corner 0 of the face with the lower face direction connects to a corner of the other face.
-  # The number of this corner is the orientation code in `p4est`.
-  lower = my_face <= other_face
-
-  # x_pos, y_neg, and z_pos are the directions in which the face has right-handed coordinates
-  # when looked at from the outside.
-  my_right_handed = my_face in (1, 2, 5)
-  other_right_handed = other_face in (1, 2, 5)
-
-  # If both or none are right-handed when looked at from the outside, they will have different
-  # orientations when looked at from the same side of the interface.
-  flipped = my_right_handed == other_right_handed
-
-  # In the following illustrations, the face corner numbering of `p4est` is shown.
-  # ξ and η are the local coordinates of the respective face.
-  # We're looking at both faces from the same side of the interface, so that "other side"
-  # (in the illustrations on the left) has right-handed coordinates.
-  if !flipped
-    if orientation_code == 0
-      # Corner 0 of other side matches corner 0 of my side
-      #   2┌──────┐3   2┌──────┐3
-      #    │      │     │      │
-      #    │      │     │      │
-      #   0└──────┘1   0└──────┘1
-      #     η            η
-      #     ↑            ↑
-      #     │            │
-      #     └───> ξ      └───> ξ
-      surface_index1 = :i_forward
-      surface_index2 = :j_forward
-    elseif ((lower && orientation_code == 2) # Corner 0 of my side matches corner 2 of other side
-        || (!lower && orientation_code == 1)) # Corner 0 of other side matches corner 1 of my side
-      #   2┌──────┐3   0┌──────┐2
-      #    │      │     │      │
-      #    │      │     │      │
-      #   0└──────┘1   1└──────┘3
-      #     η            ┌───> η
-      #     ↑            │
-      #     │            ↓
-      #     └───> ξ      ξ
-      surface_index1 = :j_backward
-      surface_index2 = :i_forward
-    elseif ((lower && orientation_code == 1) # Corner 0 of my side matches corner 1 of other side
-        || (!lower && orientation_code == 2)) # Corner 0 of other side matches corner 2 of my side
-      #   2┌──────┐3   3┌──────┐1
-      #    │      │     │      │
-      #    │      │     │      │
-      #   0└──────┘1   2└──────┘0
-      #     η                 ξ
-      #     ↑                 ↑
-      #     │                 │
-      #     └───> ξ     η <───┘
-      surface_index1 = :j_forward
-      surface_index2 = :i_backward
-    else # orientation_code == 3
-      # Corner 0 of my side matches corner 3 of other side and
-      # corner 0 of other side matches corner 3 of my side.
-      #   2┌──────┐3   1┌──────┐0
-      #    │      │     │      │
-      #    │      │     │      │
-      #   0└──────┘1   3└──────┘2
-      #     η           ξ <───┐
-      #     ↑                 │
-      #     │                 ↓
-      #     └───> ξ           η
-      surface_index1 = :i_backward
-      surface_index2 = :j_backward
-    end
-  else # flipped
-    if orientation_code == 0
-      # Corner 0 of other side matches corner 0 of my side
-      #   2┌──────┐3   1┌──────┐3
-      #    │      │     │      │
-      #    │      │     │      │
-      #   0└──────┘1   0└──────┘2
-      #     η            ξ
-      #     ↑            ↑
-      #     │            │
-      #     └───> ξ      └───> η
-      surface_index1 = :j_forward
-      surface_index2 = :i_forward
-    elseif orientation_code == 2
-      # Corner 0 of my side matches corner 2 of other side and
-      # corner 0 of other side matches corner 2 of my side.
-      #   2┌──────┐3   0┌──────┐1
-      #    │      │     │      │
-      #    │      │     │      │
-      #   0└──────┘1   2└──────┘3
-      #     η            ┌───> ξ
-      #     ↑            │
-      #     │            ↓
-      #     └───> ξ      η
-      surface_index1 = :i_forward
-      surface_index2 = :j_backward
-    elseif orientation_code == 1
-      # Corner 0 of my side matches corner 1 of other side and
-      # corner 0 of other side matches corner 1 of my side.
-      #   2┌──────┐3   3┌──────┐2
-      #    │      │     │      │
-      #    │      │     │      │
-      #   0└──────┘1   1└──────┘0
-      #     η                 η
-      #     ↑                 ↑
-      #     │                 │
-      #     └───> ξ     ξ <───┘
-      surface_index1 = :i_backward
-      surface_index2 = :j_forward
-    else # orientation_code == 3
-      # Corner 0 of my side matches corner 3 of other side and
-      # corner 0 of other side matches corner 3 of my side.
-      #   2┌──────┐3   2┌──────┐0
-      #    │      │     │      │
-      #    │      │     │      │
-      #   0└──────┘1   3└──────┘1
-      #     η           η <───┐
-      #     ↑                 │
-      #     │                 ↓
-      #     └───> ξ           ξ
-      surface_index1 = :j_backward
-      surface_index2 = :i_backward
+    # my_face and other_face are the face directions (zero-based)
+    # of "my side" and "other side" respectively.
+    # Face corner 0 of the face with the lower face direction connects to a corner of the other face.
+    # The number of this corner is the orientation code in `p4est`.
+    lower = my_face <= other_face
+
+    # x_pos, y_neg, and z_pos are the directions in which the face has right-handed coordinates
+    # when looked at from the outside.
+    my_right_handed = my_face in (1, 2, 5)
+    other_right_handed = other_face in (1, 2, 5)
+
+    # If both or none are right-handed when looked at from the outside, they will have different
+    # orientations when looked at from the same side of the interface.
+    flipped = my_right_handed == other_right_handed
+
+    # In the following illustrations, the face corner numbering of `p4est` is shown.
+    # ξ and η are the local coordinates of the respective face.
+    # We're looking at both faces from the same side of the interface, so that "other side"
+    # (in the illustrations on the left) has right-handed coordinates.
+    if !flipped
+        if orientation_code == 0
+            # Corner 0 of other side matches corner 0 of my side
+            #   2┌──────┐3   2┌──────┐3
+            #    │      │     │      │
+            #    │      │     │      │
+            #   0└──────┘1   0└──────┘1
+            #     η            η
+            #     ↑            ↑
+            #     │            │
+            #     └───> ξ      └───> ξ
+            surface_index1 = :i_forward
+            surface_index2 = :j_forward
+        elseif ((lower && orientation_code == 2) # Corner 0 of my side matches corner 2 of other side
+                ||
+                (!lower && orientation_code == 1)) # Corner 0 of other side matches corner 1 of my side
+            #   2┌──────┐3   0┌──────┐2
+            #    │      │     │      │
+            #    │      │     │      │
+            #   0└──────┘1   1└──────┘3
+            #     η            ┌───> η
+            #     ↑            │
+            #     │            ↓
+            #     └───> ξ      ξ
+            surface_index1 = :j_backward
+            surface_index2 = :i_forward
+        elseif ((lower && orientation_code == 1) # Corner 0 of my side matches corner 1 of other side
+                ||
+                (!lower && orientation_code == 2)) # Corner 0 of other side matches corner 2 of my side
+            #   2┌──────┐3   3┌──────┐1
+            #    │      │     │      │
+            #    │      │     │      │
+            #   0└──────┘1   2└──────┘0
+            #     η                 ξ
+            #     ↑                 ↑
+            #     │                 │
+            #     └───> ξ     η <───┘
+            surface_index1 = :j_forward
+            surface_index2 = :i_backward
+        else # orientation_code == 3
+            # Corner 0 of my side matches corner 3 of other side and
+            # corner 0 of other side matches corner 3 of my side.
+            #   2┌──────┐3   1┌──────┐0
+            #    │      │     │      │
+            #    │      │     │      │
+            #   0└──────┘1   3└──────┘2
+            #     η           ξ <───┐
+            #     ↑                 │
+            #     │                 ↓
+            #     └───> ξ           η
+            surface_index1 = :i_backward
+            surface_index2 = :j_backward
+        end
+    else # flipped
+        if orientation_code == 0
+            # Corner 0 of other side matches corner 0 of my side
+            #   2┌──────┐3   1┌──────┐3
+            #    │      │     │      │
+            #    │      │     │      │
+            #   0└──────┘1   0└──────┘2
+            #     η            ξ
+            #     ↑            ↑
+            #     │            │
+            #     └───> ξ      └───> η
+            surface_index1 = :j_forward
+            surface_index2 = :i_forward
+        elseif orientation_code == 2
+            # Corner 0 of my side matches corner 2 of other side and
+            # corner 0 of other side matches corner 2 of my side.
+            #   2┌──────┐3   0┌──────┐1
+            #    │      │     │      │
+            #    │      │     │      │
+            #   0└──────┘1   2└──────┘3
+            #     η            ┌───> ξ
+            #     ↑            │
+            #     │            ↓
+            #     └───> ξ      η
+            surface_index1 = :i_forward
+            surface_index2 = :j_backward
+        elseif orientation_code == 1
+            # Corner 0 of my side matches corner 1 of other side and
+            # corner 0 of other side matches corner 1 of my side.
+            #   2┌──────┐3   3┌──────┐2
+            #    │      │     │      │
+            #    │      │     │      │
+            #   0└──────┘1   1└──────┘0
+            #     η                 η
+            #     ↑                 ↑
+            #     │                 │
+            #     └───> ξ     ξ <───┘
+            surface_index1 = :i_backward
+            surface_index2 = :j_forward
+        else # orientation_code == 3
+            # Corner 0 of my side matches corner 3 of other side and
+            # corner 0 of other side matches corner 3 of my side.
+            #   2┌──────┐3   2┌──────┐0
+            #    │      │     │      │
+            #    │      │     │      │
+            #   0└──────┘1   3└──────┘1
+            #     η           η <───┐
+            #     ↑                 │
+            #     │                 ↓
+            #     └───> ξ           ξ
+            surface_index1 = :j_backward
+            surface_index2 = :i_backward
+        end
     end
-  end
 
-  return surface_index1, surface_index2
+    return surface_index1, surface_index2
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl
index d8283594a22..42d6ea44c5e 100644
--- a/src/solvers/dgsem_p4est/containers_parallel.jl
+++ b/src/solvers/dgsem_p4est/containers_parallel.jl
@@ -3,438 +3,479 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
+mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype <: Real, NDIMSP2} <:
+               AbstractContainer
+    u::Array{uEltype, NDIMSP2}       # [primary/secondary, variable, i, j, interface]
+    local_neighbor_ids::Vector{Int}                   # [interface]
+    node_indices::Vector{NTuple{NDIMS, Symbol}} # [interface]
+    local_sides::Vector{Int}                   # [interface]
 
-mutable struct P4estMPIInterfaceContainer{NDIMS, uEltype<:Real, NDIMSP2} <: AbstractContainer
-  u                 ::Array{uEltype, NDIMSP2}       # [primary/secondary, variable, i, j, interface]
-  local_neighbor_ids::Vector{Int}                   # [interface]
-  node_indices      ::Vector{NTuple{NDIMS, Symbol}} # [interface]
-  local_sides       ::Vector{Int}                   # [interface]
-
-  # internal `resize!`able storage
-  _u                ::Vector{uEltype}
+    # internal `resize!`able storage
+    _u::Vector{uEltype}
 end
 
-@inline nmpiinterfaces(interfaces::P4estMPIInterfaceContainer) = length(interfaces.local_sides)
-@inline Base.ndims(::P4estMPIInterfaceContainer{NDIMS}) where NDIMS = NDIMS
+@inline function nmpiinterfaces(interfaces::P4estMPIInterfaceContainer)
+    length(interfaces.local_sides)
+end
+@inline Base.ndims(::P4estMPIInterfaceContainer{NDIMS}) where {NDIMS} = NDIMS
 
 function Base.resize!(mpi_interfaces::P4estMPIInterfaceContainer, capacity)
-  @unpack _u, local_neighbor_ids, node_indices, local_sides = mpi_interfaces
+    @unpack _u, local_neighbor_ids, node_indices, local_sides = mpi_interfaces
 
-  n_dims = ndims(mpi_interfaces)
-  n_nodes = size(mpi_interfaces.u, 3)
-  n_variables = size(mpi_interfaces.u, 2)
+    n_dims = ndims(mpi_interfaces)
+    n_nodes = size(mpi_interfaces.u, 3)
+    n_variables = size(mpi_interfaces.u, 2)
 
-  resize!(_u, 2 * n_variables * n_nodes^(n_dims-1) * capacity)
-  mpi_interfaces.u = unsafe_wrap(Array, pointer(_u),
-    (2, n_variables, ntuple(_ -> n_nodes, n_dims-1)..., capacity))
+    resize!(_u, 2 * n_variables * n_nodes^(n_dims - 1) * capacity)
+    mpi_interfaces.u = unsafe_wrap(Array, pointer(_u),
+                                   (2, n_variables, ntuple(_ -> n_nodes, n_dims - 1)...,
+                                    capacity))
 
-  resize!(local_neighbor_ids, capacity)
+    resize!(local_neighbor_ids, capacity)
 
-  resize!(node_indices, capacity)
+    resize!(node_indices, capacity)
 
-  resize!(local_sides, capacity)
+    resize!(local_sides, capacity)
 
-  return nothing
+    return nothing
 end
 
-
 # Create MPI interface container and initialize interface data
 function init_mpi_interfaces(mesh::ParallelP4estMesh, equations, basis, elements)
-  NDIMS = ndims(elements)
-  uEltype = eltype(elements)
+    NDIMS = ndims(elements)
+    uEltype = eltype(elements)
 
-  # Initialize container
-  n_mpi_interfaces = count_required_surfaces(mesh).mpi_interfaces
+    # Initialize container
+    n_mpi_interfaces = count_required_surfaces(mesh).mpi_interfaces
 
-  _u = Vector{uEltype}(undef, 2 * nvariables(equations) * nnodes(basis)^(NDIMS-1) * n_mpi_interfaces)
-  u = unsafe_wrap(Array, pointer(_u),
-    (2, nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS-1)..., n_mpi_interfaces))
+    _u = Vector{uEltype}(undef,
+                         2 * nvariables(equations) * nnodes(basis)^(NDIMS - 1) *
+                         n_mpi_interfaces)
+    u = unsafe_wrap(Array, pointer(_u),
+                    (2, nvariables(equations), ntuple(_ -> nnodes(basis), NDIMS - 1)...,
+                     n_mpi_interfaces))
 
-  local_neighbor_ids = Vector{Int}(undef, n_mpi_interfaces)
+    local_neighbor_ids = Vector{Int}(undef, n_mpi_interfaces)
 
-  node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_mpi_interfaces)
+    node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, n_mpi_interfaces)
 
-  local_sides = Vector{Int}(undef, n_mpi_interfaces)
+    local_sides = Vector{Int}(undef, n_mpi_interfaces)
 
-  mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS+2}(
-    u, local_neighbor_ids, node_indices, local_sides, _u)
+    mpi_interfaces = P4estMPIInterfaceContainer{NDIMS, uEltype, NDIMS + 2}(u,
+                                                                           local_neighbor_ids,
+                                                                           node_indices,
+                                                                           local_sides,
+                                                                           _u)
 
-  init_mpi_interfaces!(mpi_interfaces, mesh)
+    init_mpi_interfaces!(mpi_interfaces, mesh)
 
-  return mpi_interfaces
+    return mpi_interfaces
 end
 
 function init_mpi_interfaces!(mpi_interfaces, mesh::ParallelP4estMesh)
-  init_surfaces!(nothing, nothing, nothing, mpi_interfaces, nothing, mesh)
+    init_surfaces!(nothing, nothing, nothing, mpi_interfaces, nothing, mesh)
 
-  return mpi_interfaces
+    return mpi_interfaces
 end
 
-
 # Container data structure (structure-of-arrays style) for DG L2 mortars
 #
 # Similar to `P4estMortarContainer`. The field `neighbor_ids` has been split up into
 # `local_neighbor_ids` and `local_neighbor_positions` to describe the ids and positions of the locally
 # available elements belonging to a particular MPI mortar. Furthermore, `normal_directions` holds
 # the normal vectors on the surface of the small elements for each mortar.
-mutable struct P4estMPIMortarContainer{NDIMS, uEltype<:Real, RealT<:Real, NDIMSP1, NDIMSP2, NDIMSP3} <: AbstractContainer
-  u                       ::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar]
-  local_neighbor_ids      ::Vector{Vector{Int}} # [mortar]
-  local_neighbor_positions::Vector{Vector{Int}} # [mortar]
-  node_indices            ::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar]
-  normal_directions       ::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar]
-  # internal `resize!`able storage
-  _u                      ::Vector{uEltype}
-  _node_indices           ::Vector{NTuple{NDIMS, Symbol}}
-  _normal_directions      ::Vector{RealT}
+mutable struct P4estMPIMortarContainer{NDIMS, uEltype <: Real, RealT <: Real, NDIMSP1,
+                                       NDIMSP2, NDIMSP3} <: AbstractContainer
+    u::Array{uEltype, NDIMSP3} # [small/large side, variable, position, i, j, mortar]
+    local_neighbor_ids::Vector{Vector{Int}} # [mortar]
+    local_neighbor_positions::Vector{Vector{Int}} # [mortar]
+    node_indices::Matrix{NTuple{NDIMS, Symbol}} # [small/large, mortar]
+    normal_directions::Array{RealT, NDIMSP2} # [dimension, i, j, position, mortar]
+    # internal `resize!`able storage
+    _u::Vector{uEltype}
+    _node_indices::Vector{NTuple{NDIMS, Symbol}}
+    _normal_directions::Vector{RealT}
 end
 
-@inline nmpimortars(mpi_mortars::P4estMPIMortarContainer) = length(mpi_mortars.local_neighbor_ids)
-@inline Base.ndims(::P4estMPIMortarContainer{NDIMS}) where NDIMS = NDIMS
+@inline function nmpimortars(mpi_mortars::P4estMPIMortarContainer)
+    length(mpi_mortars.local_neighbor_ids)
+end
+@inline Base.ndims(::P4estMPIMortarContainer{NDIMS}) where {NDIMS} = NDIMS
 
 function Base.resize!(mpi_mortars::P4estMPIMortarContainer, capacity)
-  @unpack _u, _node_indices, _normal_directions = mpi_mortars
+    @unpack _u, _node_indices, _normal_directions = mpi_mortars
 
-  n_dims = ndims(mpi_mortars)
-  n_nodes = size(mpi_mortars.u, 4)
-  n_variables = size(mpi_mortars.u, 2)
+    n_dims = ndims(mpi_mortars)
+    n_nodes = size(mpi_mortars.u, 4)
+    n_variables = size(mpi_mortars.u, 2)
 
-  resize!(_u, 2 * n_variables * 2^(n_dims-1) * n_nodes^(n_dims-1) * capacity)
-  mpi_mortars.u = unsafe_wrap(Array, pointer(_u),
-    (2, n_variables, 2^(n_dims-1), ntuple(_ -> n_nodes, n_dims-1)..., capacity))
+    resize!(_u, 2 * n_variables * 2^(n_dims - 1) * n_nodes^(n_dims - 1) * capacity)
+    mpi_mortars.u = unsafe_wrap(Array, pointer(_u),
+                                (2, n_variables, 2^(n_dims - 1),
+                                 ntuple(_ -> n_nodes, n_dims - 1)..., capacity))
 
-  resize!(mpi_mortars.local_neighbor_ids, capacity)
-  resize!(mpi_mortars.local_neighbor_positions, capacity)
+    resize!(mpi_mortars.local_neighbor_ids, capacity)
+    resize!(mpi_mortars.local_neighbor_positions, capacity)
 
-  resize!(_node_indices, 2 * capacity)
-  mpi_mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity))
+    resize!(_node_indices, 2 * capacity)
+    mpi_mortars.node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, capacity))
 
-  resize!(_normal_directions, n_dims * n_nodes^(n_dims-1) * 2^(n_dims-1) * capacity)
-  mpi_mortars.normal_directions = unsafe_wrap(Array, pointer(_normal_directions),
-    (n_dims, ntuple(_ -> n_nodes, n_dims-1)..., 2^(n_dims-1), capacity))
+    resize!(_normal_directions,
+            n_dims * n_nodes^(n_dims - 1) * 2^(n_dims - 1) * capacity)
+    mpi_mortars.normal_directions = unsafe_wrap(Array, pointer(_normal_directions),
+                                                (n_dims,
+                                                 ntuple(_ -> n_nodes, n_dims - 1)...,
+                                                 2^(n_dims - 1), capacity))
 
-  return nothing
+    return nothing
 end
 
-
 # Create MPI mortar container and initialize MPI mortar data
 function init_mpi_mortars(mesh::ParallelP4estMesh, equations, basis, elements)
-  NDIMS = ndims(mesh)
-  RealT = real(mesh)
-  uEltype = eltype(elements)
-
-  # Initialize container
-  n_mpi_mortars = count_required_surfaces(mesh).mpi_mortars
-
-  _u = Vector{uEltype}(undef,
-    2 * nvariables(equations) * 2^(NDIMS-1) * nnodes(basis)^(NDIMS-1) * n_mpi_mortars)
-  u = unsafe_wrap(Array, pointer(_u),
-    (2, nvariables(equations), 2^(NDIMS-1), ntuple(_ -> nnodes(basis), NDIMS-1)..., n_mpi_mortars))
-
-  local_neighbor_ids = fill(Vector{Int}(), n_mpi_mortars)
-  local_neighbor_positions = fill(Vector{Int}(), n_mpi_mortars)
-
-  _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mpi_mortars)
-  node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mpi_mortars))
-
-  _normal_directions = Vector{RealT}(undef, NDIMS * nnodes(basis)^(NDIMS-1) * 2^(NDIMS-1) * n_mpi_mortars)
-  normal_directions = unsafe_wrap(Array, pointer(_normal_directions),
-    (NDIMS, ntuple(_ -> nnodes(basis), NDIMS-1)..., 2^(NDIMS-1), n_mpi_mortars))
-
-  mpi_mortars = P4estMPIMortarContainer{NDIMS, uEltype, RealT, NDIMS+1, NDIMS+2, NDIMS+3}(
-    u, local_neighbor_ids, local_neighbor_positions, node_indices, normal_directions,
-    _u, _node_indices, _normal_directions)
-
-  if n_mpi_mortars > 0
-    init_mpi_mortars!(mpi_mortars, mesh, basis, elements)
-  end
+    NDIMS = ndims(mesh)
+    RealT = real(mesh)
+    uEltype = eltype(elements)
+
+    # Initialize container
+    n_mpi_mortars = count_required_surfaces(mesh).mpi_mortars
+
+    _u = Vector{uEltype}(undef,
+                         2 * nvariables(equations) * 2^(NDIMS - 1) *
+                         nnodes(basis)^(NDIMS - 1) * n_mpi_mortars)
+    u = unsafe_wrap(Array, pointer(_u),
+                    (2, nvariables(equations), 2^(NDIMS - 1),
+                     ntuple(_ -> nnodes(basis), NDIMS - 1)..., n_mpi_mortars))
+
+    local_neighbor_ids = fill(Vector{Int}(), n_mpi_mortars)
+    local_neighbor_positions = fill(Vector{Int}(), n_mpi_mortars)
+
+    _node_indices = Vector{NTuple{NDIMS, Symbol}}(undef, 2 * n_mpi_mortars)
+    node_indices = unsafe_wrap(Array, pointer(_node_indices), (2, n_mpi_mortars))
+
+    _normal_directions = Vector{RealT}(undef,
+                                       NDIMS * nnodes(basis)^(NDIMS - 1) *
+                                       2^(NDIMS - 1) * n_mpi_mortars)
+    normal_directions = unsafe_wrap(Array, pointer(_normal_directions),
+                                    (NDIMS, ntuple(_ -> nnodes(basis), NDIMS - 1)...,
+                                     2^(NDIMS - 1), n_mpi_mortars))
+
+    mpi_mortars = P4estMPIMortarContainer{NDIMS, uEltype, RealT, NDIMS + 1, NDIMS + 2,
+                                          NDIMS + 3}(u, local_neighbor_ids,
+                                                     local_neighbor_positions,
+                                                     node_indices, normal_directions,
+                                                     _u, _node_indices,
+                                                     _normal_directions)
+
+    if n_mpi_mortars > 0
+        init_mpi_mortars!(mpi_mortars, mesh, basis, elements)
+    end
 
-  return mpi_mortars
+    return mpi_mortars
 end
 
 function init_mpi_mortars!(mpi_mortars, mesh::ParallelP4estMesh, basis, elements)
-  init_surfaces!(nothing, nothing, nothing, nothing, mpi_mortars, mesh)
-  init_normal_directions!(mpi_mortars, basis, elements)
+    init_surfaces!(nothing, nothing, nothing, nothing, mpi_mortars, mesh)
+    init_normal_directions!(mpi_mortars, basis, elements)
 
-  return mpi_mortars
+    return mpi_mortars
 end
 
-
 # Overload init! function for regular interfaces, regular mortars and boundaries since they must
 # call the appropriate init_surfaces! function for parallel p4est meshes
 function init_interfaces!(interfaces, mesh::ParallelP4estMesh)
-  init_surfaces!(interfaces, nothing, nothing, nothing, nothing, mesh)
+    init_surfaces!(interfaces, nothing, nothing, nothing, nothing, mesh)
 
-  return interfaces
+    return interfaces
 end
 
 function init_mortars!(mortars, mesh::ParallelP4estMesh)
-  init_surfaces!(nothing, mortars, nothing, nothing, nothing, mesh)
+    init_surfaces!(nothing, mortars, nothing, nothing, nothing, mesh)
 
-  return mortars
+    return mortars
 end
 
 function init_boundaries!(boundaries, mesh::ParallelP4estMesh)
-  init_surfaces!(nothing, nothing, boundaries, nothing, nothing, mesh)
+    init_surfaces!(nothing, nothing, boundaries, nothing, nothing, mesh)
 
-  return boundaries
+    return boundaries
 end
 
-
 function reinitialize_containers!(mesh::ParallelP4estMesh, equations, dg::DGSEM, cache)
-  # Make sure to re-create ghost layer before reinitializing MPI-related containers
-  update_ghost_layer!(mesh)
+    # Make sure to re-create ghost layer before reinitializing MPI-related containers
+    update_ghost_layer!(mesh)
 
-  # Re-initialize elements container
-  @unpack elements = cache
-  resize!(elements, ncells(mesh))
-  init_elements!(elements, mesh, dg.basis)
+    # Re-initialize elements container
+    @unpack elements = cache
+    resize!(elements, ncells(mesh))
+    init_elements!(elements, mesh, dg.basis)
 
-  required = count_required_surfaces(mesh)
+    required = count_required_surfaces(mesh)
 
-  # resize interfaces container
-  @unpack interfaces = cache
-  resize!(interfaces, required.interfaces)
+    # resize interfaces container
+    @unpack interfaces = cache
+    resize!(interfaces, required.interfaces)
 
-  # resize boundaries container
-  @unpack boundaries = cache
-  resize!(boundaries, required.boundaries)
+    # resize boundaries container
+    @unpack boundaries = cache
+    resize!(boundaries, required.boundaries)
 
-  # resize mortars container
-  @unpack mortars = cache
-  resize!(mortars, required.mortars)
+    # resize mortars container
+    @unpack mortars = cache
+    resize!(mortars, required.mortars)
 
-  # resize mpi_interfaces container
-  @unpack mpi_interfaces = cache
-  resize!(mpi_interfaces, required.mpi_interfaces)
+    # resize mpi_interfaces container
+    @unpack mpi_interfaces = cache
+    resize!(mpi_interfaces, required.mpi_interfaces)
 
-  # resize mpi_mortars container
-  @unpack mpi_mortars = cache
-  resize!(mpi_mortars, required.mpi_mortars)
+    # resize mpi_mortars container
+    @unpack mpi_mortars = cache
+    resize!(mpi_mortars, required.mpi_mortars)
 
-  # re-initialize containers together to reduce
-  # the number of iterations over the mesh in p4est
-  init_surfaces!(interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars, mesh)
+    # re-initialize containers together to reduce
+    # the number of iterations over the mesh in p4est
+    init_surfaces!(interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars, mesh)
 
-  # re-initialize MPI cache
-  @unpack mpi_cache = cache
-  init_mpi_cache!(mpi_cache, mesh, mpi_interfaces, mpi_mortars,
-                  nvariables(equations), nnodes(dg), eltype(elements))
+    # re-initialize MPI cache
+    @unpack mpi_cache = cache
+    init_mpi_cache!(mpi_cache, mesh, mpi_interfaces, mpi_mortars,
+                    nvariables(equations), nnodes(dg), eltype(elements))
 
-  # re-initialize and distribute normal directions of MPI mortars; requires MPI communication, so
-  # the MPI cache must be re-initialized before
-  init_normal_directions!(mpi_mortars, dg.basis, elements)
-  exchange_normal_directions!(mpi_mortars, mpi_cache, mesh, nnodes(dg))
+    # re-initialize and distribute normal directions of MPI mortars; requires MPI communication, so
+    # the MPI cache must be re-initialized before
+    init_normal_directions!(mpi_mortars, dg.basis, elements)
+    exchange_normal_directions!(mpi_mortars, mpi_cache, mesh, nnodes(dg))
 end
 
-
 # A helper struct used in initialization methods below
-mutable struct ParallelInitSurfacesIterFaceUserData{Interfaces, Mortars, Boundaries, MPIInterfaces, MPIMortars, Mesh}
-  interfaces      ::Interfaces
-  interface_id    ::Int
-  mortars         ::Mortars
-  mortar_id       ::Int
-  boundaries      ::Boundaries
-  boundary_id     ::Int
-  mpi_interfaces  ::MPIInterfaces
-  mpi_interface_id::Int
-  mpi_mortars     ::MPIMortars
-  mpi_mortar_id   ::Int
-  mesh            ::Mesh
+mutable struct ParallelInitSurfacesIterFaceUserData{Interfaces, Mortars, Boundaries,
+                                                    MPIInterfaces, MPIMortars, Mesh}
+    interfaces::Interfaces
+    interface_id::Int
+    mortars::Mortars
+    mortar_id::Int
+    boundaries::Boundaries
+    boundary_id::Int
+    mpi_interfaces::MPIInterfaces
+    mpi_interface_id::Int
+    mpi_mortars::MPIMortars
+    mpi_mortar_id::Int
+    mesh::Mesh
 end
 
 function ParallelInitSurfacesIterFaceUserData(interfaces, mortars, boundaries,
                                               mpi_interfaces, mpi_mortars, mesh)
-  return ParallelInitSurfacesIterFaceUserData{
-    typeof(interfaces), typeof(mortars), typeof(boundaries), typeof(mpi_interfaces), typeof(mpi_mortars), typeof(mesh)}(
-      interfaces, 1, mortars, 1, boundaries, 1, mpi_interfaces, 1, mpi_mortars, 1, mesh)
+    return ParallelInitSurfacesIterFaceUserData{
+                                                typeof(interfaces), typeof(mortars),
+                                                typeof(boundaries),
+                                                typeof(mpi_interfaces),
+                                                typeof(mpi_mortars), typeof(mesh)}(interfaces,
+                                                                                   1,
+                                                                                   mortars,
+                                                                                   1,
+                                                                                   boundaries,
+                                                                                   1,
+                                                                                   mpi_interfaces,
+                                                                                   1,
+                                                                                   mpi_mortars,
+                                                                                   1,
+                                                                                   mesh)
 end
 
-
 function init_surfaces_iter_face_parallel(info, user_data)
-  # Unpack user_data
-  data = unsafe_pointer_to_objref(Ptr{ParallelInitSurfacesIterFaceUserData}(user_data))
+    # Unpack user_data
+    data = unsafe_pointer_to_objref(Ptr{ParallelInitSurfacesIterFaceUserData}(user_data))
 
-  # Function barrier because the unpacked user_data above is type-unstable
-  init_surfaces_iter_face_inner(info, data)
+    # Function barrier because the unpacked user_data above is type-unstable
+    init_surfaces_iter_face_inner(info, data)
 end
 
 # 2D
-cfunction(::typeof(init_surfaces_iter_face_parallel), ::Val{2}) = @cfunction(init_surfaces_iter_face_parallel, Cvoid, (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(init_surfaces_iter_face_parallel), ::Val{2})
+    @cfunction(init_surfaces_iter_face_parallel, Cvoid,
+               (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid}))
+end
 # 3D
-cfunction(::typeof(init_surfaces_iter_face_parallel), ::Val{3}) = @cfunction(init_surfaces_iter_face_parallel, Cvoid, (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(init_surfaces_iter_face_parallel), ::Val{3})
+    @cfunction(init_surfaces_iter_face_parallel, Cvoid,
+               (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid}))
+end
 
 # Function barrier for type stability, overload for parallel P4estMesh
-function init_surfaces_iter_face_inner(info, user_data::ParallelInitSurfacesIterFaceUserData)
-  @unpack interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars = user_data
-  # This function is called during `init_surfaces!`, more precisely it is called for each face
-  # while p4est iterates over the forest. Since `init_surfaces!` can be used to initialize all
-  # surfaces at once or any subset of them, some of the unpacked values above may be `nothing` if
-  # they're not supposed to be initialized during this call. That is why we need additional
-  # `!== nothing` checks below before initializing individual faces.
-  if unsafe_load(info).sides.elem_count == 2
-    # Two neighboring elements => Interface or mortar
-
-    # Extract surface data
-    sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
-
-    if sides[1].is_hanging == false && sides[2].is_hanging == false
-      # No hanging nodes => normal interface or MPI interface
-      if sides[1].is.full.is_ghost == true || sides[2].is.full.is_ghost == true # remote side => MPI interface
-        if mpi_interfaces !== nothing
-          init_mpi_interfaces_iter_face_inner(info, sides, user_data)
-        end
-      else
-        if interfaces !== nothing
-          init_interfaces_iter_face_inner(info, sides, user_data)
-        end
-      end
-    else
-      # Hanging nodes => mortar or MPI mortar
-      # First, we check which side is hanging, i.e., on which side we have the refined cells.
-      # Then we check if any of the refined cells or the coarse cell are "ghost" cells, i.e., they
-      # belong to another rank. That way we can determine if this is a regular mortar or MPI mortar
-      if sides[1].is_hanging == true
-        @assert sides[2].is_hanging == false
-        if any(sides[1].is.hanging.is_ghost .== true) || sides[2].is.full.is_ghost == true
-          face_has_ghost_side = true
+function init_surfaces_iter_face_inner(info,
+                                       user_data::ParallelInitSurfacesIterFaceUserData)
+    @unpack interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars = user_data
+    # This function is called during `init_surfaces!`, more precisely it is called for each face
+    # while p4est iterates over the forest. Since `init_surfaces!` can be used to initialize all
+    # surfaces at once or any subset of them, some of the unpacked values above may be `nothing` if
+    # they're not supposed to be initialized during this call. That is why we need additional
+    # `!== nothing` checks below before initializing individual faces.
+    if unsafe_load(info).sides.elem_count == 2
+        # Two neighboring elements => Interface or mortar
+
+        # Extract surface data
+        sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
+
+        if sides[1].is_hanging == false && sides[2].is_hanging == false
+            # No hanging nodes => normal interface or MPI interface
+            if sides[1].is.full.is_ghost == true || sides[2].is.full.is_ghost == true # remote side => MPI interface
+                if mpi_interfaces !== nothing
+                    init_mpi_interfaces_iter_face_inner(info, sides, user_data)
+                end
+            else
+                if interfaces !== nothing
+                    init_interfaces_iter_face_inner(info, sides, user_data)
+                end
+            end
         else
-          face_has_ghost_side = false
+            # Hanging nodes => mortar or MPI mortar
+            # First, we check which side is hanging, i.e., on which side we have the refined cells.
+            # Then we check if any of the refined cells or the coarse cell are "ghost" cells, i.e., they
+            # belong to another rank. That way we can determine if this is a regular mortar or MPI mortar
+            if sides[1].is_hanging == true
+                @assert sides[2].is_hanging == false
+                if any(sides[1].is.hanging.is_ghost .== true) ||
+                   sides[2].is.full.is_ghost == true
+                    face_has_ghost_side = true
+                else
+                    face_has_ghost_side = false
+                end
+            else # sides[2].is_hanging == true
+                @assert sides[1].is_hanging == false
+                if sides[1].is.full.is_ghost == true ||
+                   any(sides[2].is.hanging.is_ghost .== true)
+                    face_has_ghost_side = true
+                else
+                    face_has_ghost_side = false
+                end
+            end
+            # Initialize mortar or MPI mortar
+            if face_has_ghost_side && mpi_mortars !== nothing
+                init_mpi_mortars_iter_face_inner(info, sides, user_data)
+            elseif !face_has_ghost_side && mortars !== nothing
+                init_mortars_iter_face_inner(info, sides, user_data)
+            end
         end
-      else # sides[2].is_hanging == true
-        @assert sides[1].is_hanging == false
-        if sides[1].is.full.is_ghost == true || any(sides[2].is.hanging.is_ghost .== true)
-          face_has_ghost_side = true
-        else
-          face_has_ghost_side = false
+    elseif unsafe_load(info).sides.elem_count == 1
+        # One neighboring elements => boundary
+        if boundaries !== nothing
+            init_boundaries_iter_face_inner(info, user_data)
         end
-      end
-      # Initialize mortar or MPI mortar
-      if face_has_ghost_side && mpi_mortars !== nothing
-        init_mpi_mortars_iter_face_inner(info, sides, user_data)
-      elseif !face_has_ghost_side && mortars !== nothing
-        init_mortars_iter_face_inner(info, sides, user_data)
-      end
     end
-  elseif unsafe_load(info).sides.elem_count == 1
-    # One neighboring elements => boundary
-    if boundaries !== nothing
-      init_boundaries_iter_face_inner(info, user_data)
-    end
-  end
 
-  return nothing
+    return nothing
 end
 
 function init_surfaces!(interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars,
                         mesh::ParallelP4estMesh)
-  # Let p4est iterate over all interfaces and call init_surfaces_iter_face
-  iter_face_c = cfunction(init_surfaces_iter_face_parallel, Val(ndims(mesh)))
-  user_data = ParallelInitSurfacesIterFaceUserData(interfaces, mortars, boundaries,
-                                                   mpi_interfaces, mpi_mortars, mesh)
+    # Let p4est iterate over all interfaces and call init_surfaces_iter_face
+    iter_face_c = cfunction(init_surfaces_iter_face_parallel, Val(ndims(mesh)))
+    user_data = ParallelInitSurfacesIterFaceUserData(interfaces, mortars, boundaries,
+                                                     mpi_interfaces, mpi_mortars, mesh)
 
-  iterate_p4est(mesh.p4est, user_data; ghost_layer=mesh.ghost, iter_face_c=iter_face_c)
+    iterate_p4est(mesh.p4est, user_data; ghost_layer = mesh.ghost,
+                  iter_face_c = iter_face_c)
 
-  return nothing
+    return nothing
 end
 
-
 # Initialization of MPI interfaces after the function barrier
 function init_mpi_interfaces_iter_face_inner(info, sides, user_data)
-  @unpack mpi_interfaces, mpi_interface_id, mesh = user_data
-  user_data.mpi_interface_id += 1
-
-  if sides[1].is.full.is_ghost == true
-    local_side = 2
-  elseif sides[2].is.full.is_ghost == true
-    local_side = 1
-  else
-    error("should not happen")
-  end
-
-  # Get local tree, one-based indexing
-  tree = unsafe_load_tree(mesh.p4est, sides[local_side].treeid + 1)
-  # Quadrant numbering offset of the local quadrant at this interface
-  offset = tree.quadrants_offset
-  tree_quad_id = sides[local_side].is.full.quadid # quadid in the local tree
-  # ID of the local neighboring quad, cumulative over local trees
-  local_quad_id = offset + tree_quad_id
-
-  # p4est uses zero-based indexing, convert to one-based indexing
-  mpi_interfaces.local_neighbor_ids[mpi_interface_id] = local_quad_id + 1
-  mpi_interfaces.local_sides[mpi_interface_id] = local_side
-
-  # Face at which the interface lies
-  faces = (sides[1].face, sides[2].face)
-
-  # Save mpi_interfaces.node_indices dimension specific in containers_[23]d_parallel.jl
-  init_mpi_interface_node_indices!(mpi_interfaces, faces, local_side,
-                                   unsafe_load(info).orientation,
-                                   mpi_interface_id)
-
-  return nothing
-end
+    @unpack mpi_interfaces, mpi_interface_id, mesh = user_data
+    user_data.mpi_interface_id += 1
+
+    if sides[1].is.full.is_ghost == true
+        local_side = 2
+    elseif sides[2].is.full.is_ghost == true
+        local_side = 1
+    else
+        error("should not happen")
+    end
+
+    # Get local tree, one-based indexing
+    tree = unsafe_load_tree(mesh.p4est, sides[local_side].treeid + 1)
+    # Quadrant numbering offset of the local quadrant at this interface
+    offset = tree.quadrants_offset
+    tree_quad_id = sides[local_side].is.full.quadid # quadid in the local tree
+    # ID of the local neighboring quad, cumulative over local trees
+    local_quad_id = offset + tree_quad_id
+
+    # p4est uses zero-based indexing, convert to one-based indexing
+    mpi_interfaces.local_neighbor_ids[mpi_interface_id] = local_quad_id + 1
+    mpi_interfaces.local_sides[mpi_interface_id] = local_side
+
+    # Face at which the interface lies
+    faces = (sides[1].face, sides[2].face)
 
+    # Save mpi_interfaces.node_indices dimension specific in containers_[23]d_parallel.jl
+    init_mpi_interface_node_indices!(mpi_interfaces, faces, local_side,
+                                     unsafe_load(info).orientation,
+                                     mpi_interface_id)
+
+    return nothing
+end
 
 # Initialization of MPI mortars after the function barrier
 function init_mpi_mortars_iter_face_inner(info, sides, user_data)
-  @unpack mpi_mortars, mpi_mortar_id, mesh = user_data
-  user_data.mpi_mortar_id += 1
-
-  # Get Tuple of adjacent trees, one-based indexing
-  trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1),
-           unsafe_load_tree(mesh.p4est, sides[2].treeid + 1))
-  # Quadrant numbering offsets of the quadrants at this mortar
-  offsets = SVector(trees[1].quadrants_offset,
-                    trees[2].quadrants_offset)
-
-  if sides[1].is_hanging == true
-    hanging_side = 1
-    full_side = 2
-  else # sides[2].is_hanging == true
-    hanging_side = 2
-    full_side = 1
-  end
-  # Just be sure before accessing is.full or is.hanging later
-  @assert sides[full_side].is_hanging == false
-  @assert sides[hanging_side].is_hanging == true
-
-  # Find small quads that are locally available
-  local_small_quad_positions = findall(sides[hanging_side].is.hanging.is_ghost .== false)
-
-  # Get id of local small quadrants within their tree
-  # Indexing CBinding.Caccessor via a Vector does not work here -> use map instead
-  tree_small_quad_ids = map(p->sides[hanging_side].is.hanging.quadid[p], local_small_quad_positions)
-  local_small_quad_ids = offsets[hanging_side] .+ tree_small_quad_ids # ids cumulative over local trees
-
-  # Determine if large quadrant is available and if yes, determine its id
-  if sides[full_side].is.full.is_ghost == false
-    local_large_quad_id = offsets[full_side] + sides[full_side].is.full.quadid
-  else
-    local_large_quad_id = -1 # large quad is ghost
-  end
-
-  # Write data to mortar container, convert to 1-based indexing
-  # Start with small elements
-  local_neighbor_ids = local_small_quad_ids .+ 1
-  local_neighbor_positions = local_small_quad_positions
-  # Add large element information if it is locally available
-  if local_large_quad_id > -1
-    push!(local_neighbor_ids, local_large_quad_id + 1) # convert to 1-based index
-    push!(local_neighbor_positions, 2^(ndims(mesh)-1) + 1)
-  end
-
-  mpi_mortars.local_neighbor_ids[mpi_mortar_id] = local_neighbor_ids
-  mpi_mortars.local_neighbor_positions[mpi_mortar_id] = local_neighbor_positions
-
-  # init_mortar_node_indices! expects side 1 to contain small elements
-  faces = (sides[hanging_side].face, sides[full_side].face)
-  init_mortar_node_indices!(mpi_mortars, faces, unsafe_load(info).orientation, mpi_mortar_id)
-
-  return nothing
-end
+    @unpack mpi_mortars, mpi_mortar_id, mesh = user_data
+    user_data.mpi_mortar_id += 1
+
+    # Get Tuple of adjacent trees, one-based indexing
+    trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1),
+             unsafe_load_tree(mesh.p4est, sides[2].treeid + 1))
+    # Quadrant numbering offsets of the quadrants at this mortar
+    offsets = SVector(trees[1].quadrants_offset,
+                      trees[2].quadrants_offset)
+
+    if sides[1].is_hanging == true
+        hanging_side = 1
+        full_side = 2
+    else # sides[2].is_hanging == true
+        hanging_side = 2
+        full_side = 1
+    end
+    # Just be sure before accessing is.full or is.hanging later
+    @assert sides[full_side].is_hanging == false
+    @assert sides[hanging_side].is_hanging == true
+
+    # Find small quads that are locally available
+    local_small_quad_positions = findall(sides[hanging_side].is.hanging.is_ghost .==
+                                         false)
+
+    # Get id of local small quadrants within their tree
+    # Indexing CBinding.Caccessor via a Vector does not work here -> use map instead
+    tree_small_quad_ids = map(p -> sides[hanging_side].is.hanging.quadid[p],
+                              local_small_quad_positions)
+    local_small_quad_ids = offsets[hanging_side] .+ tree_small_quad_ids # ids cumulative over local trees
+
+    # Determine if large quadrant is available and if yes, determine its id
+    if sides[full_side].is.full.is_ghost == false
+        local_large_quad_id = offsets[full_side] + sides[full_side].is.full.quadid
+    else
+        local_large_quad_id = -1 # large quad is ghost
+    end
+
+    # Write data to mortar container, convert to 1-based indexing
+    # Start with small elements
+    local_neighbor_ids = local_small_quad_ids .+ 1
+    local_neighbor_positions = local_small_quad_positions
+    # Add large element information if it is locally available
+    if local_large_quad_id > -1
+        push!(local_neighbor_ids, local_large_quad_id + 1) # convert to 1-based index
+        push!(local_neighbor_positions, 2^(ndims(mesh) - 1) + 1)
+    end
 
+    mpi_mortars.local_neighbor_ids[mpi_mortar_id] = local_neighbor_ids
+    mpi_mortars.local_neighbor_positions[mpi_mortar_id] = local_neighbor_positions
+
+    # init_mortar_node_indices! expects side 1 to contain small elements
+    faces = (sides[hanging_side].face, sides[full_side].face)
+    init_mortar_node_indices!(mpi_mortars, faces, unsafe_load(info).orientation,
+                              mpi_mortar_id)
+
+    return nothing
+end
 
 # Iterate over all interfaces and count
 # - (inner) interfaces
@@ -444,90 +485,97 @@ end
 # - (MPI) mortars at subdomain boundaries
 # and collect the numbers in `user_data` in this order.
 function count_surfaces_iter_face_parallel(info, user_data)
-  if unsafe_load(info).sides.elem_count == 2
-    # Two neighboring elements => Interface or mortar
-
-    # Extract surface data
-    sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
-
-    if sides[1].is_hanging == false && sides[2].is_hanging == false
-      # No hanging nodes => normal interface or MPI interface
-      if sides[1].is.full.is_ghost == true || sides[2].is.full.is_ghost == true # remote side => MPI interface
-        # Unpack user_data = [mpi_interface_count] and increment mpi_interface_count
-        ptr = Ptr{Int}(user_data)
-        id = unsafe_load(ptr, 4)
-        unsafe_store!(ptr, id + 1, 4)
-      else
-        # Unpack user_data = [interface_count] and increment interface_count
-        ptr = Ptr{Int}(user_data)
-        id = unsafe_load(ptr, 1)
-        unsafe_store!(ptr, id + 1, 1)
-      end
-    else
-      # Hanging nodes => mortar or MPI mortar
-      # First, we check which side is hanging, i.e., on which side we have the refined cells.
-      # Then we check if any of the refined cells or the coarse cell are "ghost" cells, i.e., they
-      # belong to another rank. That way we can determine if this is a regular mortar or MPI mortar
-      if sides[1].is_hanging == true
-        @assert sides[2].is_hanging == false
-        if any(sides[1].is.hanging.is_ghost .== true) || sides[2].is.full.is_ghost == true
-          face_has_ghost_side = true
-        else
-          face_has_ghost_side = false
-        end
-      else # sides[2].is_hanging == true
-        @assert sides[1].is_hanging == false
-        if sides[1].is.full.is_ghost == true || any(sides[2].is.hanging.is_ghost .== true)
-          face_has_ghost_side = true
+    if unsafe_load(info).sides.elem_count == 2
+        # Two neighboring elements => Interface or mortar
+
+        # Extract surface data
+        sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
+
+        if sides[1].is_hanging == false && sides[2].is_hanging == false
+            # No hanging nodes => normal interface or MPI interface
+            if sides[1].is.full.is_ghost == true || sides[2].is.full.is_ghost == true # remote side => MPI interface
+                # Unpack user_data = [mpi_interface_count] and increment mpi_interface_count
+                ptr = Ptr{Int}(user_data)
+                id = unsafe_load(ptr, 4)
+                unsafe_store!(ptr, id + 1, 4)
+            else
+                # Unpack user_data = [interface_count] and increment interface_count
+                ptr = Ptr{Int}(user_data)
+                id = unsafe_load(ptr, 1)
+                unsafe_store!(ptr, id + 1, 1)
+            end
         else
-          face_has_ghost_side = false
+            # Hanging nodes => mortar or MPI mortar
+            # First, we check which side is hanging, i.e., on which side we have the refined cells.
+            # Then we check if any of the refined cells or the coarse cell are "ghost" cells, i.e., they
+            # belong to another rank. That way we can determine if this is a regular mortar or MPI mortar
+            if sides[1].is_hanging == true
+                @assert sides[2].is_hanging == false
+                if any(sides[1].is.hanging.is_ghost .== true) ||
+                   sides[2].is.full.is_ghost == true
+                    face_has_ghost_side = true
+                else
+                    face_has_ghost_side = false
+                end
+            else # sides[2].is_hanging == true
+                @assert sides[1].is_hanging == false
+                if sides[1].is.full.is_ghost == true ||
+                   any(sides[2].is.hanging.is_ghost .== true)
+                    face_has_ghost_side = true
+                else
+                    face_has_ghost_side = false
+                end
+            end
+            if face_has_ghost_side
+                # Unpack user_data = [mpi_mortar_count] and increment mpi_mortar_count
+                ptr = Ptr{Int}(user_data)
+                id = unsafe_load(ptr, 5)
+                unsafe_store!(ptr, id + 1, 5)
+            else
+                # Unpack user_data = [mortar_count] and increment mortar_count
+                ptr = Ptr{Int}(user_data)
+                id = unsafe_load(ptr, 2)
+                unsafe_store!(ptr, id + 1, 2)
+            end
         end
-      end
-      if face_has_ghost_side
-        # Unpack user_data = [mpi_mortar_count] and increment mpi_mortar_count
-        ptr = Ptr{Int}(user_data)
-        id = unsafe_load(ptr, 5)
-        unsafe_store!(ptr, id + 1, 5)
-      else
-        # Unpack user_data = [mortar_count] and increment mortar_count
+    elseif unsafe_load(info).sides.elem_count == 1
+        # One neighboring elements => boundary
+
+        # Unpack user_data = [boundary_count] and increment boundary_count
         ptr = Ptr{Int}(user_data)
-        id = unsafe_load(ptr, 2)
-        unsafe_store!(ptr, id + 1, 2)
-      end
+        id = unsafe_load(ptr, 3)
+        unsafe_store!(ptr, id + 1, 3)
     end
-  elseif unsafe_load(info).sides.elem_count == 1
-    # One neighboring elements => boundary
 
-    # Unpack user_data = [boundary_count] and increment boundary_count
-    ptr = Ptr{Int}(user_data)
-    id = unsafe_load(ptr, 3)
-    unsafe_store!(ptr, id + 1, 3)
-  end
-
-  return nothing
+    return nothing
 end
 
 # 2D
-cfunction(::typeof(count_surfaces_iter_face_parallel), ::Val{2}) = @cfunction(count_surfaces_iter_face_parallel, Cvoid, (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(count_surfaces_iter_face_parallel), ::Val{2})
+    @cfunction(count_surfaces_iter_face_parallel, Cvoid,
+               (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid}))
+end
 # 3D
-cfunction(::typeof(count_surfaces_iter_face_parallel), ::Val{3}) = @cfunction(count_surfaces_iter_face_parallel, Cvoid, (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(count_surfaces_iter_face_parallel), ::Val{3})
+    @cfunction(count_surfaces_iter_face_parallel, Cvoid,
+               (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid}))
+end
 
 function count_required_surfaces(mesh::ParallelP4estMesh)
-  # Let p4est iterate over all interfaces and call count_surfaces_iter_face_parallel
-  iter_face_c = cfunction(count_surfaces_iter_face_parallel, Val(ndims(mesh)))
+    # Let p4est iterate over all interfaces and call count_surfaces_iter_face_parallel
+    iter_face_c = cfunction(count_surfaces_iter_face_parallel, Val(ndims(mesh)))
 
-  # interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars
-  user_data = [0, 0, 0, 0, 0]
+    # interfaces, mortars, boundaries, mpi_interfaces, mpi_mortars
+    user_data = [0, 0, 0, 0, 0]
 
-  iterate_p4est(mesh.p4est, user_data; ghost_layer=mesh.ghost, iter_face_c=iter_face_c)
+    iterate_p4est(mesh.p4est, user_data; ghost_layer = mesh.ghost,
+                  iter_face_c = iter_face_c)
 
-  # Return counters
-  return (interfaces     = user_data[1],
-          mortars        = user_data[2],
-          boundaries     = user_data[3],
-          mpi_interfaces = user_data[4],
-          mpi_mortars    = user_data[5])
+    # Return counters
+    return (interfaces = user_data[1],
+            mortars = user_data[2],
+            boundaries = user_data[3],
+            mpi_interfaces = user_data[4],
+            mpi_mortars = user_data[5])
 end
-
-
-end # @muladd
\ No newline at end of file
+end # @muladd
diff --git a/src/solvers/dgsem_p4est/containers_parallel_2d.jl b/src/solvers/dgsem_p4est/containers_parallel_2d.jl
index 8510b4a50c1..8c39e4a69c8 100644
--- a/src/solvers/dgsem_p4est/containers_parallel_2d.jl
+++ b/src/solvers/dgsem_p4est/containers_parallel_2d.jl
@@ -3,77 +3,81 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Initialize node_indices of MPI interface container
-@inline function init_mpi_interface_node_indices!(mpi_interfaces::P4estMPIInterfaceContainer{2},
+@inline function init_mpi_interface_node_indices!(mpi_interfaces::P4estMPIInterfaceContainer{
+                                                                                             2
+                                                                                             },
                                                   faces, local_side, orientation,
                                                   mpi_interface_id)
-  # Align interface in positive coordinate direction of primary element.
-  # For orientation == 1, the secondary element needs to be indexed backwards
-  # relative to the interface.
-  if local_side == 1 || orientation == 0
-    # Forward indexing
-    i = :i_forward
-  else
-    # Backward indexing
-    i = :i_backward
-  end
+    # Align interface in positive coordinate direction of primary element.
+    # For orientation == 1, the secondary element needs to be indexed backwards
+    # relative to the interface.
+    if local_side == 1 || orientation == 0
+        # Forward indexing
+        i = :i_forward
+    else
+        # Backward indexing
+        i = :i_backward
+    end
 
-  if faces[local_side] == 0
-    # Index face in negative x-direction
-    mpi_interfaces.node_indices[mpi_interface_id] = (:begin, i)
-  elseif faces[local_side] == 1
-    # Index face in positive x-direction
-    mpi_interfaces.node_indices[mpi_interface_id] = (:end, i)
-  elseif faces[local_side] == 2
-    # Index face in negative y-direction
-    mpi_interfaces.node_indices[mpi_interface_id] = (i, :begin)
-  else # faces[local_side] == 3
-    # Index face in positive y-direction
-    mpi_interfaces.node_indices[mpi_interface_id] = (i, :end)
-  end
+    if faces[local_side] == 0
+        # Index face in negative x-direction
+        mpi_interfaces.node_indices[mpi_interface_id] = (:begin, i)
+    elseif faces[local_side] == 1
+        # Index face in positive x-direction
+        mpi_interfaces.node_indices[mpi_interface_id] = (:end, i)
+    elseif faces[local_side] == 2
+        # Index face in negative y-direction
+        mpi_interfaces.node_indices[mpi_interface_id] = (i, :begin)
+    else # faces[local_side] == 3
+        # Index face in positive y-direction
+        mpi_interfaces.node_indices[mpi_interface_id] = (i, :end)
+    end
 
-  return mpi_interfaces
+    return mpi_interfaces
 end
 
-
 # Normal directions of small element surfaces are needed to calculate the mortar fluxes. Initialize
 # them for locally available small elements.
-function init_normal_directions!(mpi_mortars::P4estMPIMortarContainer{2}, basis, elements)
-  @unpack local_neighbor_ids, local_neighbor_positions, node_indices = mpi_mortars
-  @unpack contravariant_vectors = elements
-  index_range = eachnode(basis)
+function init_normal_directions!(mpi_mortars::P4estMPIMortarContainer{2}, basis,
+                                 elements)
+    @unpack local_neighbor_ids, local_neighbor_positions, node_indices = mpi_mortars
+    @unpack contravariant_vectors = elements
+    index_range = eachnode(basis)
 
-  @threaded for mortar in 1:nmpimortars(mpi_mortars)
-    small_indices = node_indices[1, mortar]
-    small_direction = indices2direction(small_indices)
+    @threaded for mortar in 1:nmpimortars(mpi_mortars)
+        small_indices = node_indices[1, mortar]
+        small_direction = indices2direction(small_indices)
 
-    i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], index_range)
-    j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], index_range)
+        i_small_start, i_small_step = index_to_start_step_2d(small_indices[1],
+                                                             index_range)
+        j_small_start, j_small_step = index_to_start_step_2d(small_indices[2],
+                                                             index_range)
 
-    for (element, position) in zip(local_neighbor_ids[mortar], local_neighbor_positions[mortar])
-      # ignore large elements
-      if position == 3
-        continue
-      end
+        for (element, position) in zip(local_neighbor_ids[mortar],
+                                       local_neighbor_positions[mortar])
+            # ignore large elements
+            if position == 3
+                continue
+            end
 
-      i_small = i_small_start
-      j_small = j_small_start
-      for node in eachnode(basis)
-        # Get the normal direction on the small element.
-        # Note, contravariant vectors at interfaces in negative coordinate direction
-        # are pointing inwards. This is handled by `get_normal_direction`.
-        normal_direction = get_normal_direction(small_direction, contravariant_vectors,
-                                                i_small, j_small, element)
-        @views mpi_mortars.normal_directions[:, node, position, mortar] .= normal_direction
+            i_small = i_small_start
+            j_small = j_small_start
+            for node in eachnode(basis)
+                # Get the normal direction on the small element.
+                # Note, contravariant vectors at interfaces in negative coordinate direction
+                # are pointing inwards. This is handled by `get_normal_direction`.
+                normal_direction = get_normal_direction(small_direction,
+                                                        contravariant_vectors,
+                                                        i_small, j_small, element)
+                @views mpi_mortars.normal_directions[:, node, position, mortar] .= normal_direction
 
-        i_small += i_small_step
-        j_small += j_small_step
-      end
+                i_small += i_small_step
+                j_small += j_small_step
+            end
+        end
     end
-  end
 end
-
-
-end # muladd
\ No newline at end of file
+end # muladd
diff --git a/src/solvers/dgsem_p4est/containers_parallel_3d.jl b/src/solvers/dgsem_p4est/containers_parallel_3d.jl
index 3a9fe90a8fb..be4e2bfbfc9 100644
--- a/src/solvers/dgsem_p4est/containers_parallel_3d.jl
+++ b/src/solvers/dgsem_p4est/containers_parallel_3d.jl
@@ -3,128 +3,149 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Initialize node_indices of MPI interface container
-@inline function init_mpi_interface_node_indices!(mpi_interfaces::P4estMPIInterfaceContainer{3},
+@inline function init_mpi_interface_node_indices!(mpi_interfaces::P4estMPIInterfaceContainer{
+                                                                                             3
+                                                                                             },
                                                   faces, local_side, orientation,
                                                   mpi_interface_id)
-  # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)).
-  # The secondary element needs to be indexed differently.
-  if local_side == 1
-    surface_index1 = :i_forward
-    surface_index2 = :j_forward
-  else # local_side == 2
-    surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], orientation)
-  end
+    # Align interface at the primary element (primary element has surface indices (:i_forward, :j_forward)).
+    # The secondary element needs to be indexed differently.
+    if local_side == 1
+        surface_index1 = :i_forward
+        surface_index2 = :j_forward
+    else # local_side == 2
+        surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2],
+                                                                      faces[1],
+                                                                      orientation)
+    end
 
-  if faces[local_side] == 0
-    # Index face in negative x-direction
-    mpi_interfaces.node_indices[mpi_interface_id] = (:begin, surface_index1, surface_index2)
-  elseif faces[local_side] == 1
-    # Index face in positive x-direction
-    mpi_interfaces.node_indices[mpi_interface_id] = (:end, surface_index1, surface_index2)
-  elseif faces[local_side] == 2
-    # Index face in negative y-direction
-    mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :begin, surface_index2)
-  elseif faces[local_side] == 3
-    # Index face in positive y-direction
-    mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :end, surface_index2)
-  elseif faces[local_side] == 4
-    # Index face in negative z-direction
-    mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2, :begin)
-  else # faces[local_side] == 5
-    # Index face in positive z-direction
-    mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2, :end)
-  end
+    if faces[local_side] == 0
+        # Index face in negative x-direction
+        mpi_interfaces.node_indices[mpi_interface_id] = (:begin, surface_index1,
+                                                         surface_index2)
+    elseif faces[local_side] == 1
+        # Index face in positive x-direction
+        mpi_interfaces.node_indices[mpi_interface_id] = (:end, surface_index1,
+                                                         surface_index2)
+    elseif faces[local_side] == 2
+        # Index face in negative y-direction
+        mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :begin,
+                                                         surface_index2)
+    elseif faces[local_side] == 3
+        # Index face in positive y-direction
+        mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, :end,
+                                                         surface_index2)
+    elseif faces[local_side] == 4
+        # Index face in negative z-direction
+        mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2,
+                                                         :begin)
+    else # faces[local_side] == 5
+        # Index face in positive z-direction
+        mpi_interfaces.node_indices[mpi_interface_id] = (surface_index1, surface_index2,
+                                                         :end)
+    end
 
-  return mpi_interfaces
+    return mpi_interfaces
 end
 
-
 # Initialize node_indices of MPI mortar container. Works the same as for its serial counterpart.
 # faces[1] is expected to be the face of the small side.
 @inline function init_mortar_node_indices!(mortars::P4estMPIMortarContainer{3},
                                            faces, orientation, mortar_id)
-  for side in 1:2
-    # Align mortar at small side.
-    # The large side needs to be indexed differently.
-    if side == 1
-      surface_index1 = :i_forward
-      surface_index2 = :j_forward
-    else
-      surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2], faces[1], orientation)
-    end
+    for side in 1:2
+        # Align mortar at small side.
+        # The large side needs to be indexed differently.
+        if side == 1
+            surface_index1 = :i_forward
+            surface_index2 = :j_forward
+        else
+            surface_index1, surface_index2 = orientation_to_indices_p4est(faces[2],
+                                                                          faces[1],
+                                                                          orientation)
+        end
 
-    if faces[side] == 0
-      # Index face in negative x-direction
-      mortars.node_indices[side, mortar_id] = (:begin, surface_index1, surface_index2)
-    elseif faces[side] == 1
-      # Index face in positive x-direction
-      mortars.node_indices[side, mortar_id] = (:end, surface_index1, surface_index2)
-    elseif faces[side] == 2
-      # Index face in negative y-direction
-      mortars.node_indices[side, mortar_id] = (surface_index1, :begin, surface_index2)
-    elseif faces[side] == 3
-      # Index face in positive y-direction
-      mortars.node_indices[side, mortar_id] = (surface_index1, :end, surface_index2)
-    elseif faces[side] == 4
-      # Index face in negative z-direction
-      mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, :begin)
-    else # faces[side] == 5
-      # Index face in positive z-direction
-      mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2, :end)
+        if faces[side] == 0
+            # Index face in negative x-direction
+            mortars.node_indices[side, mortar_id] = (:begin, surface_index1,
+                                                     surface_index2)
+        elseif faces[side] == 1
+            # Index face in positive x-direction
+            mortars.node_indices[side, mortar_id] = (:end, surface_index1,
+                                                     surface_index2)
+        elseif faces[side] == 2
+            # Index face in negative y-direction
+            mortars.node_indices[side, mortar_id] = (surface_index1, :begin,
+                                                     surface_index2)
+        elseif faces[side] == 3
+            # Index face in positive y-direction
+            mortars.node_indices[side, mortar_id] = (surface_index1, :end,
+                                                     surface_index2)
+        elseif faces[side] == 4
+            # Index face in negative z-direction
+            mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2,
+                                                     :begin)
+        else # faces[side] == 5
+            # Index face in positive z-direction
+            mortars.node_indices[side, mortar_id] = (surface_index1, surface_index2,
+                                                     :end)
+        end
     end
-  end
 
-  return mortars
+    return mortars
 end
 
-
 # Normal directions of small element surfaces are needed to calculate the mortar fluxes. Initialize
 # them for locally available small elements.
-function init_normal_directions!(mpi_mortars::P4estMPIMortarContainer{3}, basis, elements)
-  @unpack local_neighbor_ids, local_neighbor_positions, node_indices = mpi_mortars
-  @unpack contravariant_vectors = elements
-  index_range = eachnode(basis)
-
-  @threaded for mortar in 1:nmpimortars(mpi_mortars)
-    small_indices = node_indices[1, mortar]
-    small_direction = indices2direction(small_indices)
-
-    i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], index_range)
-    j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], index_range)
-    k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], index_range)
-
-    for (element, position) in zip(local_neighbor_ids[mortar], local_neighbor_positions[mortar])
-      # ignore large elements
-      if position == 5
-        continue
-      end
-
-      i_small = i_small_start
-      j_small = j_small_start
-      k_small = k_small_start
-      for j in eachnode(basis)
-        for i in eachnode(basis)
-          # Get the normal direction on the small element.
-          # Note, contravariant vectors at interfaces in negative coordinate direction
-          # are pointing inwards. This is handled by `get_normal_direction`.
-          normal_direction = get_normal_direction(small_direction, contravariant_vectors,
-                                                  i_small, j_small, k_small, element)
-          @views mpi_mortars.normal_directions[:, i, j, position, mortar] .= normal_direction
-
-          i_small += i_small_step_i
-          j_small += j_small_step_i
-          k_small += k_small_step_i
+function init_normal_directions!(mpi_mortars::P4estMPIMortarContainer{3}, basis,
+                                 elements)
+    @unpack local_neighbor_ids, local_neighbor_positions, node_indices = mpi_mortars
+    @unpack contravariant_vectors = elements
+    index_range = eachnode(basis)
+
+    @threaded for mortar in 1:nmpimortars(mpi_mortars)
+        small_indices = node_indices[1, mortar]
+        small_direction = indices2direction(small_indices)
+
+        i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1],
+                                                                               index_range)
+        j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2],
+                                                                               index_range)
+        k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3],
+                                                                               index_range)
+
+        for (element, position) in zip(local_neighbor_ids[mortar],
+                                       local_neighbor_positions[mortar])
+            # ignore large elements
+            if position == 5
+                continue
+            end
+
+            i_small = i_small_start
+            j_small = j_small_start
+            k_small = k_small_start
+            for j in eachnode(basis)
+                for i in eachnode(basis)
+                    # Get the normal direction on the small element.
+                    # Note, contravariant vectors at interfaces in negative coordinate direction
+                    # are pointing inwards. This is handled by `get_normal_direction`.
+                    normal_direction = get_normal_direction(small_direction,
+                                                            contravariant_vectors,
+                                                            i_small, j_small, k_small,
+                                                            element)
+                    @views mpi_mortars.normal_directions[:, i, j, position, mortar] .= normal_direction
+
+                    i_small += i_small_step_i
+                    j_small += j_small_step_i
+                    k_small += k_small_step_i
+                end
+                i_small += i_small_step_j
+                j_small += j_small_step_j
+                k_small += k_small_step_j
+            end
         end
-        i_small += i_small_step_j
-        j_small += j_small_step_j
-        k_small += k_small_step_j
-      end
     end
-  end
 end
-
-
-end # muladd
\ No newline at end of file
+end # muladd
diff --git a/src/solvers/dgsem_p4est/dg.jl b/src/solvers/dgsem_p4est/dg.jl
index dabaa896fbf..a7cc1eee04d 100644
--- a/src/solvers/dgsem_p4est/dg.jl
+++ b/src/solvers/dgsem_p4est/dg.jl
@@ -3,48 +3,47 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # This method is called when a SemidiscretizationHyperbolic is constructed.
 # It constructs the basic `cache` used throughout the simulation to compute
 # the RHS etc.
-function create_cache(mesh::P4estMesh, equations::AbstractEquations, dg::DG, ::Any, ::Type{uEltype}) where {uEltype<:Real}
-  # Make sure to balance the `p4est` before creating any containers
-  # in case someone has tampered with the `p4est` after creating the mesh
-  balance!(mesh)
+function create_cache(mesh::P4estMesh, equations::AbstractEquations, dg::DG, ::Any,
+                      ::Type{uEltype}) where {uEltype <: Real}
+    # Make sure to balance the `p4est` before creating any containers
+    # in case someone has tampered with the `p4est` after creating the mesh
+    balance!(mesh)
 
-  elements   = init_elements(mesh, equations, dg.basis, uEltype)
-  interfaces = init_interfaces(mesh, equations, dg.basis, elements)
-  boundaries = init_boundaries(mesh, equations, dg.basis, elements)
-  mortars    = init_mortars(mesh, equations, dg.basis, elements)
+    elements = init_elements(mesh, equations, dg.basis, uEltype)
+    interfaces = init_interfaces(mesh, equations, dg.basis, elements)
+    boundaries = init_boundaries(mesh, equations, dg.basis, elements)
+    mortars = init_mortars(mesh, equations, dg.basis, elements)
 
-  cache = (; elements, interfaces, boundaries, mortars)
+    cache = (; elements, interfaces, boundaries, mortars)
 
-  # Add specialized parts of the cache required to compute the volume integral etc.
-  cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
-  cache = (;cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
+    # Add specialized parts of the cache required to compute the volume integral etc.
+    cache = (; cache...,
+             create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
+    cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
 
-  return cache
+    return cache
 end
 
-
 # Extract outward-pointing normal direction
 # (contravariant vector ±Ja^i, i = index)
 # Note that this vector is not normalized
 @inline function get_normal_direction(direction, contravariant_vectors, indices...)
-
-  orientation = (direction + 1) >> 1
-  normal = get_contravariant_vector(orientation, contravariant_vectors, indices...)
-
-  # Contravariant vectors at interfaces in negative coordinate direction are pointing inwards
-  if isodd(direction)
-    return -normal
-  else
-    return normal
-  end
+    orientation = (direction + 1) >> 1
+    normal = get_contravariant_vector(orientation, contravariant_vectors, indices...)
+
+    # Contravariant vectors at interfaces in negative coordinate direction are pointing inwards
+    if isodd(direction)
+        return -normal
+    else
+        return normal
+    end
 end
 
-
 include("containers.jl")
 
 include("dg_2d.jl")
@@ -52,6 +51,4 @@ include("dg_2d_parabolic.jl")
 
 include("dg_3d.jl")
 include("dg_parallel.jl")
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl
index a6d3d6abaeb..bc7d9edb6ef 100644
--- a/src/solvers/dgsem_p4est/dg_2d.jl
+++ b/src/solvers/dgsem_p4est/dg_2d.jl
@@ -3,23 +3,23 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # The methods below are specialized on the mortar type
 # and called from the basic `create_cache` method at the top.
-function create_cache(mesh::P4estMesh{2}, equations, mortar_l2::LobattoLegendreMortarL2, uEltype)
-  # TODO: Taal performance using different types
-  MA2d = MArray{Tuple{nvariables(equations), nnodes(mortar_l2)},
-                uEltype, 2,
-                nvariables(equations) * nnodes(mortar_l2)}
-  fstar_upper_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()]
-  fstar_lower_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()]
-  u_threaded =           MA2d[MA2d(undef) for _ in 1:Threads.nthreads()]
-
-  (; fstar_upper_threaded, fstar_lower_threaded, u_threaded)
+function create_cache(mesh::P4estMesh{2}, equations, mortar_l2::LobattoLegendreMortarL2,
+                      uEltype)
+    # TODO: Taal performance using different types
+    MA2d = MArray{Tuple{nvariables(equations), nnodes(mortar_l2)},
+                  uEltype, 2,
+                  nvariables(equations) * nnodes(mortar_l2)}
+    fstar_upper_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()]
+    fstar_lower_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()]
+    u_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()]
+
+    (; fstar_upper_threaded, fstar_lower_threaded, u_threaded)
 end
 
-
 #     index_to_start_step_2d(index::Symbol, index_range)
 #
 # Given a symbolic `index` and an `indexrange` (usually `eachnode(dg)`),
@@ -42,154 +42,166 @@ end
 #       j_volume += j_volume_step
 #     end
 @inline function index_to_start_step_2d(index::Symbol, index_range)
-  index_begin = first(index_range)
-  index_end   = last(index_range)
-
-  if index === :begin
-    return index_begin, 0
-  elseif index === :end
-    return index_end, 0
-  elseif index === :i_forward
-    return index_begin, 1
-  else # if index === :i_backward
-    return index_end, -1
-  end
+    index_begin = first(index_range)
+    index_end = last(index_range)
+
+    if index === :begin
+        return index_begin, 0
+    elseif index === :end
+        return index_end, 0
+    elseif index === :i_forward
+        return index_begin, 1
+    else # if index === :i_backward
+        return index_end, -1
+    end
 end
 
 # We pass the `surface_integral` argument solely for dispatch
 function prolong2interfaces!(cache, u,
                              mesh::P4estMesh{2},
                              equations, surface_integral, dg::DG)
-  @unpack interfaces = cache
-  index_range = eachnode(dg)
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Copy solution data from the primary element using "delayed indexing" with
-    # a start value and a step size to get the correct face and orientation.
-    # Note that in the current implementation, the interface will be
-    # "aligned at the primary element", i.e., the index of the primary side
-    # will always run forwards.
-    primary_element = interfaces.neighbor_ids[1, interface]
-    primary_indices = interfaces.node_indices[1, interface]
-
-    i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], index_range)
-    j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], index_range)
-
-    i_primary = i_primary_start
-    j_primary = j_primary_start
-    for i in eachnode(dg)
-      for v in eachvariable(equations)
-        interfaces.u[1, v, i, interface] = u[v, i_primary, j_primary, primary_element]
-      end
-      i_primary += i_primary_step
-      j_primary += j_primary_step
-    end
+    @unpack interfaces = cache
+    index_range = eachnode(dg)
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Copy solution data from the primary element using "delayed indexing" with
+        # a start value and a step size to get the correct face and orientation.
+        # Note that in the current implementation, the interface will be
+        # "aligned at the primary element", i.e., the index of the primary side
+        # will always run forwards.
+        primary_element = interfaces.neighbor_ids[1, interface]
+        primary_indices = interfaces.node_indices[1, interface]
+
+        i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1],
+                                                                 index_range)
+        j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2],
+                                                                 index_range)
+
+        i_primary = i_primary_start
+        j_primary = j_primary_start
+        for i in eachnode(dg)
+            for v in eachvariable(equations)
+                interfaces.u[1, v, i, interface] = u[v, i_primary, j_primary,
+                                                     primary_element]
+            end
+            i_primary += i_primary_step
+            j_primary += j_primary_step
+        end
 
-    # Copy solution data from the secondary element using "delayed indexing" with
-    # a start value and a step size to get the correct face and orientation.
-    secondary_element = interfaces.neighbor_ids[2, interface]
-    secondary_indices = interfaces.node_indices[2, interface]
-
-    i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], index_range)
-    j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], index_range)
-
-    i_secondary = i_secondary_start
-    j_secondary = j_secondary_start
-    for i in eachnode(dg)
-      for v in eachvariable(equations)
-        interfaces.u[2, v, i, interface] = u[v, i_secondary, j_secondary, secondary_element]
-      end
-      i_secondary += i_secondary_step
-      j_secondary += j_secondary_step
+        # Copy solution data from the secondary element using "delayed indexing" with
+        # a start value and a step size to get the correct face and orientation.
+        secondary_element = interfaces.neighbor_ids[2, interface]
+        secondary_indices = interfaces.node_indices[2, interface]
+
+        i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1],
+                                                                     index_range)
+        j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2],
+                                                                     index_range)
+
+        i_secondary = i_secondary_start
+        j_secondary = j_secondary_start
+        for i in eachnode(dg)
+            for v in eachvariable(equations)
+                interfaces.u[2, v, i, interface] = u[v, i_secondary, j_secondary,
+                                                     secondary_element]
+            end
+            i_secondary += i_secondary_step
+            j_secondary += j_secondary_step
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_interface_flux!(surface_flux_values,
                               mesh::P4estMesh{2},
                               nonconservative_terms,
                               equations, surface_integral, dg::DG, cache)
-  @unpack neighbor_ids, node_indices = cache.interfaces
-  @unpack contravariant_vectors = cache.elements
-  index_range = eachnode(dg)
-  index_end = last(index_range)
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Get element and side index information on the primary element
-    primary_element = neighbor_ids[1, interface]
-    primary_indices = node_indices[1, interface]
-    primary_direction = indices2direction(primary_indices)
-
-    # Create the local i,j indexing on the primary element used to pull normal direction information
-    i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], index_range)
-    j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], index_range)
-
-    i_primary = i_primary_start
-    j_primary = j_primary_start
-
-    # Get element and side index information on the secondary element
-    secondary_element = neighbor_ids[2, interface]
-    secondary_indices = node_indices[2, interface]
-    secondary_direction = indices2direction(secondary_indices)
-
-    # Initiate the secondary index to be used in the surface for loop.
-    # This index on the primary side will always run forward but
-    # the secondary index might need to run backwards for flipped sides.
-    if :i_backward in secondary_indices
-      node_secondary = index_end
-      node_secondary_step = -1
-    else
-      node_secondary = 1
-      node_secondary_step = 1
-    end
+    @unpack neighbor_ids, node_indices = cache.interfaces
+    @unpack contravariant_vectors = cache.elements
+    index_range = eachnode(dg)
+    index_end = last(index_range)
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Get element and side index information on the primary element
+        primary_element = neighbor_ids[1, interface]
+        primary_indices = node_indices[1, interface]
+        primary_direction = indices2direction(primary_indices)
+
+        # Create the local i,j indexing on the primary element used to pull normal direction information
+        i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1],
+                                                                 index_range)
+        j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2],
+                                                                 index_range)
+
+        i_primary = i_primary_start
+        j_primary = j_primary_start
+
+        # Get element and side index information on the secondary element
+        secondary_element = neighbor_ids[2, interface]
+        secondary_indices = node_indices[2, interface]
+        secondary_direction = indices2direction(secondary_indices)
+
+        # Initiate the secondary index to be used in the surface for loop.
+        # This index on the primary side will always run forward but
+        # the secondary index might need to run backwards for flipped sides.
+        if :i_backward in secondary_indices
+            node_secondary = index_end
+            node_secondary_step = -1
+        else
+            node_secondary = 1
+            node_secondary_step = 1
+        end
 
-    for node in eachnode(dg)
-      # Get the normal direction on the primary element.
-      # Contravariant vectors at interfaces in negative coordinate direction
-      # are pointing inwards. This is handled by `get_normal_direction`.
-      normal_direction = get_normal_direction(primary_direction, contravariant_vectors,
-                                              i_primary, j_primary, primary_element)
-
-      calc_interface_flux!(surface_flux_values, mesh, nonconservative_terms, equations,
-                           surface_integral, dg, cache,
-                           interface, normal_direction,
-                           node, primary_direction, primary_element,
-                           node_secondary, secondary_direction, secondary_element)
-
-      # Increment primary element indices to pull the normal direction
-      i_primary += i_primary_step
-      j_primary += j_primary_step
-      # Increment the surface node index along the secondary element
-      node_secondary += node_secondary_step
+        for node in eachnode(dg)
+            # Get the normal direction on the primary element.
+            # Contravariant vectors at interfaces in negative coordinate direction
+            # are pointing inwards. This is handled by `get_normal_direction`.
+            normal_direction = get_normal_direction(primary_direction,
+                                                    contravariant_vectors,
+                                                    i_primary, j_primary,
+                                                    primary_element)
+
+            calc_interface_flux!(surface_flux_values, mesh, nonconservative_terms,
+                                 equations,
+                                 surface_integral, dg, cache,
+                                 interface, normal_direction,
+                                 node, primary_direction, primary_element,
+                                 node_secondary, secondary_direction, secondary_element)
+
+            # Increment primary element indices to pull the normal direction
+            i_primary += i_primary_step
+            j_primary += j_primary_step
+            # Increment the surface node index along the secondary element
+            node_secondary += node_secondary_step
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Inlined version of the interface flux computation for conservation laws
 @inline function calc_interface_flux!(surface_flux_values,
                                       mesh::P4estMesh{2},
                                       nonconservative_terms::False, equations,
                                       surface_integral, dg::DG, cache,
                                       interface_index, normal_direction,
-                                      primary_node_index, primary_direction_index, primary_element_index,
-                                      secondary_node_index, secondary_direction_index, secondary_element_index)
-  @unpack u = cache.interfaces
-  @unpack surface_flux = surface_integral
+                                      primary_node_index, primary_direction_index,
+                                      primary_element_index,
+                                      secondary_node_index, secondary_direction_index,
+                                      secondary_element_index)
+    @unpack u = cache.interfaces
+    @unpack surface_flux = surface_integral
 
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, interface_index)
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index,
+                                       interface_index)
 
-  flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
+    flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
 
-  for v in eachvariable(equations)
-    surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = flux_[v]
-    surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = -flux_[v]
-  end
+    for v in eachvariable(equations)
+        surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = flux_[v]
+        surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = -flux_[v]
+    end
 end
 
 # Inlined version of the interface flux computation for equations with conservative and nonconservative terms
@@ -198,129 +210,135 @@ end
                                       nonconservative_terms::True, equations,
                                       surface_integral, dg::DG, cache,
                                       interface_index, normal_direction,
-                                      primary_node_index, primary_direction_index, primary_element_index,
-                                      secondary_node_index, secondary_direction_index, secondary_element_index)
-  @unpack u = cache.interfaces
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, interface_index)
-
-  flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
-
-  # Compute both nonconservative fluxes
-  # In general, nonconservative fluxes can depend on both the contravariant
-  # vectors (normal direction) at the current node and the averaged ones.
-  # However, both are the same at watertight interfaces, so we pass the
-  # `normal_direction` twice.
-  noncons_primary   = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations)
-  noncons_secondary = nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations)
-
-  # Store the flux with nonconservative terms on the primary and secondary elements
-  for v in eachvariable(equations)
-    # Note the factor 0.5 necessary for the nonconservative fluxes based on
-    # the interpretation of global SBP operators coupled discontinuously via
-    # central fluxes/SATs
-    surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = (
-      flux_[v] + 0.5 * noncons_primary[v])
-    surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = -(
-      flux_[v] + 0.5 * noncons_secondary[v])
-  end
+                                      primary_node_index, primary_direction_index,
+                                      primary_element_index,
+                                      secondary_node_index, secondary_direction_index,
+                                      secondary_element_index)
+    @unpack u = cache.interfaces
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index,
+                                       interface_index)
+
+    flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
+
+    # Compute both nonconservative fluxes
+    # In general, nonconservative fluxes can depend on both the contravariant
+    # vectors (normal direction) at the current node and the averaged ones.
+    # However, both are the same at watertight interfaces, so we pass the
+    # `normal_direction` twice.
+    noncons_primary = nonconservative_flux(u_ll, u_rr, normal_direction,
+                                           normal_direction, equations)
+    noncons_secondary = nonconservative_flux(u_rr, u_ll, normal_direction,
+                                             normal_direction, equations)
+
+    # Store the flux with nonconservative terms on the primary and secondary elements
+    for v in eachvariable(equations)
+        # Note the factor 0.5 necessary for the nonconservative fluxes based on
+        # the interpretation of global SBP operators coupled discontinuously via
+        # central fluxes/SATs
+        surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = (flux_[v] +
+                                                                                                      0.5 *
+                                                                                                      noncons_primary[v])
+        surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = -(flux_[v] +
+                                                                                                             0.5 *
+                                                                                                             noncons_secondary[v])
+    end
 end
 
-
 function prolong2boundaries!(cache, u,
                              mesh::P4estMesh{2},
                              equations, surface_integral, dg::DG)
-  @unpack boundaries = cache
-  index_range = eachnode(dg)
-
-  @threaded for boundary in eachboundary(dg, cache)
-    # Copy solution data from the element using "delayed indexing" with
-    # a start value and a step size to get the correct face and orientation.
-    element       = boundaries.neighbor_ids[boundary]
-    node_indices  = boundaries.node_indices[boundary]
-
-    i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range)
-    j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range)
-
-    i_node = i_node_start
-    j_node = j_node_start
-    for i in eachnode(dg)
-      for v in eachvariable(equations)
-        boundaries.u[v, i, boundary] = u[v, i_node, j_node, element]
-      end
-      i_node += i_node_step
-      j_node += j_node_step
+    @unpack boundaries = cache
+    index_range = eachnode(dg)
+
+    @threaded for boundary in eachboundary(dg, cache)
+        # Copy solution data from the element using "delayed indexing" with
+        # a start value and a step size to get the correct face and orientation.
+        element = boundaries.neighbor_ids[boundary]
+        node_indices = boundaries.node_indices[boundary]
+
+        i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range)
+        j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range)
+
+        i_node = i_node_start
+        j_node = j_node_start
+        for i in eachnode(dg)
+            for v in eachvariable(equations)
+                boundaries.u[v, i, boundary] = u[v, i_node, j_node, element]
+            end
+            i_node += i_node_step
+            j_node += j_node_step
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing,
                              mesh::P4estMesh{2},
                              equations, surface_integral, dg::DG)
-  @unpack boundaries = cache
-  @unpack surface_flux_values = cache.elements
-  index_range = eachnode(dg)
-
-  @threaded for local_index in eachindex(boundary_indexing)
-    # Use the local index to get the global boundary index from the pre-sorted list
-    boundary = boundary_indexing[local_index]
-
-    # Get information on the adjacent element, compute the surface fluxes,
-    # and store them
-    element       = boundaries.neighbor_ids[boundary]
-    node_indices  = boundaries.node_indices[boundary]
-    direction     = indices2direction(node_indices)
-
-    i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range)
-    j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range)
-
-    i_node = i_node_start
-    j_node = j_node_start
-    for node in eachnode(dg)
-      calc_boundary_flux!(surface_flux_values, t, boundary_condition,
-                          mesh, have_nonconservative_terms(equations),
-                          equations, surface_integral, dg, cache,
-                          i_node, j_node,
-                          node, direction, element, boundary)
-
-      i_node += i_node_step
-      j_node += j_node_step
+    @unpack boundaries = cache
+    @unpack surface_flux_values = cache.elements
+    index_range = eachnode(dg)
+
+    @threaded for local_index in eachindex(boundary_indexing)
+        # Use the local index to get the global boundary index from the pre-sorted list
+        boundary = boundary_indexing[local_index]
+
+        # Get information on the adjacent element, compute the surface fluxes,
+        # and store them
+        element = boundaries.neighbor_ids[boundary]
+        node_indices = boundaries.node_indices[boundary]
+        direction = indices2direction(node_indices)
+
+        i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range)
+        j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range)
+
+        i_node = i_node_start
+        j_node = j_node_start
+        for node in eachnode(dg)
+            calc_boundary_flux!(surface_flux_values, t, boundary_condition,
+                                mesh, have_nonconservative_terms(equations),
+                                equations, surface_integral, dg, cache,
+                                i_node, j_node,
+                                node, direction, element, boundary)
+
+            i_node += i_node_step
+            j_node += j_node_step
+        end
     end
-  end
 end
 
-
 # inlined version of the boundary flux calculation along a physical interface
 @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition,
                                      mesh::P4estMesh{2},
                                      nonconservative_terms::False, equations,
                                      surface_integral, dg::DG, cache,
                                      i_index, j_index,
-                                     node_index, direction_index, element_index, boundary_index)
-  @unpack boundaries = cache
-  @unpack node_coordinates, contravariant_vectors = cache.elements
-  @unpack surface_flux = surface_integral
+                                     node_index, direction_index, element_index,
+                                     boundary_index)
+    @unpack boundaries = cache
+    @unpack node_coordinates, contravariant_vectors = cache.elements
+    @unpack surface_flux = surface_integral
 
-  # Extract solution data from boundary container
-  u_inner = get_node_vars(boundaries.u, equations, dg, node_index, boundary_index)
+    # Extract solution data from boundary container
+    u_inner = get_node_vars(boundaries.u, equations, dg, node_index, boundary_index)
 
-  # Outward-pointing normal direction (not normalized)
-  normal_direction = get_normal_direction(direction_index, contravariant_vectors,
-                                          i_index, j_index, element_index)
+    # Outward-pointing normal direction (not normalized)
+    normal_direction = get_normal_direction(direction_index, contravariant_vectors,
+                                            i_index, j_index, element_index)
 
-  # Coordinates at boundary node
-  x = get_node_coords(node_coordinates, equations, dg, i_index, j_index, element_index)
+    # Coordinates at boundary node
+    x = get_node_coords(node_coordinates, equations, dg, i_index, j_index,
+                        element_index)
 
-  flux_ = boundary_condition(u_inner, normal_direction, x, t, surface_flux, equations)
+    flux_ = boundary_condition(u_inner, normal_direction, x, t, surface_flux, equations)
 
-  # Copy flux to element storage in the correct orientation
-  for v in eachvariable(equations)
-    surface_flux_values[v, node_index, direction_index, element_index] = flux_[v]
-  end
+    # Copy flux to element storage in the correct orientation
+    for v in eachvariable(equations)
+        surface_flux_values[v, node_index, direction_index, element_index] = flux_[v]
+    end
 end
 
 # inlined version of the boundary flux with nonconservative terms calculation along a physical interface
@@ -329,159 +347,168 @@ end
                                      nonconservative_terms::True, equations,
                                      surface_integral, dg::DG, cache,
                                      i_index, j_index,
-                                     node_index, direction_index, element_index, boundary_index)
-  @unpack boundaries = cache
-  @unpack node_coordinates, contravariant_vectors = cache.elements
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-
-  # Extract solution data from boundary container
-  u_inner = get_node_vars(boundaries.u, equations, dg, node_index, boundary_index)
-
-  # Outward-pointing normal direction (not normalized)
-  normal_direction = get_normal_direction(direction_index, contravariant_vectors,
-                                          i_index, j_index, element_index)
-
-  # Coordinates at boundary node
-  x = get_node_coords(node_coordinates, equations, dg, i_index, j_index, element_index)
-
-  # Call pointwise numerical flux function for the conservative part
-  # in the normal direction on the boundary
-  flux_ = boundary_condition(u_inner, normal_direction, x, t, surface_flux, equations)
-
-  # Compute pointwise nonconservative numerical flux at the boundary.
-  # Note: This does not set any type of boundary condition for the nonconservative term
-  noncons_ = nonconservative_flux(u_inner, u_inner, normal_direction, normal_direction, equations)
-
-  # Copy flux to element storage in the correct orientation
-  for v in eachvariable(equations)
-    # Note the factor 0.5 necessary for the nonconservative fluxes based on
-    # the interpretation of global SBP operators coupled discontinuously via
-    # central fluxes/SATs
-    surface_flux_values[v, node_index, direction_index, element_index] = flux_[v] + 0.5 * noncons_[v]
-  end
+                                     node_index, direction_index, element_index,
+                                     boundary_index)
+    @unpack boundaries = cache
+    @unpack node_coordinates, contravariant_vectors = cache.elements
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+
+    # Extract solution data from boundary container
+    u_inner = get_node_vars(boundaries.u, equations, dg, node_index, boundary_index)
+
+    # Outward-pointing normal direction (not normalized)
+    normal_direction = get_normal_direction(direction_index, contravariant_vectors,
+                                            i_index, j_index, element_index)
+
+    # Coordinates at boundary node
+    x = get_node_coords(node_coordinates, equations, dg, i_index, j_index,
+                        element_index)
+
+    # Call pointwise numerical flux function for the conservative part
+    # in the normal direction on the boundary
+    flux_ = boundary_condition(u_inner, normal_direction, x, t, surface_flux, equations)
+
+    # Compute pointwise nonconservative numerical flux at the boundary.
+    # Note: This does not set any type of boundary condition for the nonconservative term
+    noncons_ = nonconservative_flux(u_inner, u_inner, normal_direction,
+                                    normal_direction, equations)
+
+    # Copy flux to element storage in the correct orientation
+    for v in eachvariable(equations)
+        # Note the factor 0.5 necessary for the nonconservative fluxes based on
+        # the interpretation of global SBP operators coupled discontinuously via
+        # central fluxes/SATs
+        surface_flux_values[v, node_index, direction_index, element_index] = flux_[v] +
+                                                                             0.5 *
+                                                                             noncons_[v]
+    end
 end
 
-
 function prolong2mortars!(cache, u,
                           mesh::P4estMesh{2}, equations,
                           mortar_l2::LobattoLegendreMortarL2,
                           surface_integral, dg::DGSEM)
-  @unpack neighbor_ids, node_indices = cache.mortars
-  index_range = eachnode(dg)
-
-  @threaded for mortar in eachmortar(dg, cache)
-    # Copy solution data from the small elements using "delayed indexing" with
-    # a start value and a step size to get the correct face and orientation.
-    small_indices = node_indices[1, mortar]
-
-    i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], index_range)
-    j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], index_range)
-
-    for position in 1:2
-      i_small = i_small_start
-      j_small = j_small_start
-      element = neighbor_ids[position, mortar]
-      for i in eachnode(dg)
-        for v in eachvariable(equations)
-          cache.mortars.u[1, v, position, i, mortar] = u[v, i_small, j_small, element]
+    @unpack neighbor_ids, node_indices = cache.mortars
+    index_range = eachnode(dg)
+
+    @threaded for mortar in eachmortar(dg, cache)
+        # Copy solution data from the small elements using "delayed indexing" with
+        # a start value and a step size to get the correct face and orientation.
+        small_indices = node_indices[1, mortar]
+
+        i_small_start, i_small_step = index_to_start_step_2d(small_indices[1],
+                                                             index_range)
+        j_small_start, j_small_step = index_to_start_step_2d(small_indices[2],
+                                                             index_range)
+
+        for position in 1:2
+            i_small = i_small_start
+            j_small = j_small_start
+            element = neighbor_ids[position, mortar]
+            for i in eachnode(dg)
+                for v in eachvariable(equations)
+                    cache.mortars.u[1, v, position, i, mortar] = u[v, i_small, j_small,
+                                                                   element]
+                end
+                i_small += i_small_step
+                j_small += j_small_step
+            end
         end
-        i_small += i_small_step
-        j_small += j_small_step
-      end
-    end
-
 
-    # Buffer to copy solution values of the large element in the correct orientation
-    # before interpolating
-    u_buffer = cache.u_threaded[Threads.threadid()]
-
-    # Copy solution of large element face to buffer in the
-    # correct orientation
-    large_indices = node_indices[2, mortar]
+        # Buffer to copy solution values of the large element in the correct orientation
+        # before interpolating
+        u_buffer = cache.u_threaded[Threads.threadid()]
+
+        # Copy solution of large element face to buffer in the
+        # correct orientation
+        large_indices = node_indices[2, mortar]
+
+        i_large_start, i_large_step = index_to_start_step_2d(large_indices[1],
+                                                             index_range)
+        j_large_start, j_large_step = index_to_start_step_2d(large_indices[2],
+                                                             index_range)
+
+        i_large = i_large_start
+        j_large = j_large_start
+        element = neighbor_ids[3, mortar]
+        for i in eachnode(dg)
+            for v in eachvariable(equations)
+                u_buffer[v, i] = u[v, i_large, j_large, element]
+            end
+            i_large += i_large_step
+            j_large += j_large_step
+        end
 
-    i_large_start, i_large_step = index_to_start_step_2d(large_indices[1], index_range)
-    j_large_start, j_large_step = index_to_start_step_2d(large_indices[2], index_range)
-
-    i_large = i_large_start
-    j_large = j_large_start
-    element = neighbor_ids[3, mortar]
-    for i in eachnode(dg)
-      for v in eachvariable(equations)
-        u_buffer[v, i] = u[v, i_large, j_large, element]
-      end
-      i_large += i_large_step
-      j_large += j_large_step
+        # Interpolate large element face data from buffer to small face locations
+        multiply_dimensionwise!(view(cache.mortars.u, 2, :, 1, :, mortar),
+                                mortar_l2.forward_lower,
+                                u_buffer)
+        multiply_dimensionwise!(view(cache.mortars.u, 2, :, 2, :, mortar),
+                                mortar_l2.forward_upper,
+                                u_buffer)
     end
 
-    # Interpolate large element face data from buffer to small face locations
-    multiply_dimensionwise!(view(cache.mortars.u, 2, :, 1, :, mortar),
-                            mortar_l2.forward_lower,
-                            u_buffer)
-    multiply_dimensionwise!(view(cache.mortars.u, 2, :, 2, :, mortar),
-                            mortar_l2.forward_upper,
-                            u_buffer)
-  end
-
-  return nothing
+    return nothing
 end
 
-
 function calc_mortar_flux!(surface_flux_values,
                            mesh::P4estMesh{2},
                            nonconservative_terms, equations,
                            mortar_l2::LobattoLegendreMortarL2,
                            surface_integral, dg::DG, cache)
-  @unpack neighbor_ids, node_indices = cache.mortars
-  @unpack contravariant_vectors = cache.elements
-  @unpack fstar_upper_threaded, fstar_lower_threaded = cache
-  index_range = eachnode(dg)
-
-  @threaded for mortar in eachmortar(dg, cache)
-    # Choose thread-specific pre-allocated container
-    fstar = (fstar_lower_threaded[Threads.threadid()],
-             fstar_upper_threaded[Threads.threadid()])
-
-    # Get index information on the small elements
-    small_indices = node_indices[1, mortar]
-    small_direction = indices2direction(small_indices)
+    @unpack neighbor_ids, node_indices = cache.mortars
+    @unpack contravariant_vectors = cache.elements
+    @unpack fstar_upper_threaded, fstar_lower_threaded = cache
+    index_range = eachnode(dg)
+
+    @threaded for mortar in eachmortar(dg, cache)
+        # Choose thread-specific pre-allocated container
+        fstar = (fstar_lower_threaded[Threads.threadid()],
+                 fstar_upper_threaded[Threads.threadid()])
+
+        # Get index information on the small elements
+        small_indices = node_indices[1, mortar]
+        small_direction = indices2direction(small_indices)
+
+        i_small_start, i_small_step = index_to_start_step_2d(small_indices[1],
+                                                             index_range)
+        j_small_start, j_small_step = index_to_start_step_2d(small_indices[2],
+                                                             index_range)
+
+        for position in 1:2
+            i_small = i_small_start
+            j_small = j_small_start
+            element = neighbor_ids[position, mortar]
+            for node in eachnode(dg)
+                # Get the normal direction on the small element.
+                # Note, contravariant vectors at interfaces in negative coordinate direction
+                # are pointing inwards. This is handled by `get_normal_direction`.
+                normal_direction = get_normal_direction(small_direction,
+                                                        contravariant_vectors,
+                                                        i_small, j_small, element)
+
+                calc_mortar_flux!(fstar, mesh, nonconservative_terms, equations,
+                                  surface_integral, dg, cache,
+                                  mortar, position, normal_direction,
+                                  node)
+
+                i_small += i_small_step
+                j_small += j_small_step
+            end
+        end
 
-    i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], index_range)
-    j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], index_range)
+        # Buffer to interpolate flux values of the large element to before
+        # copying in the correct orientation
+        u_buffer = cache.u_threaded[Threads.threadid()]
 
-    for position in 1:2
-      i_small = i_small_start
-      j_small = j_small_start
-      element = neighbor_ids[position, mortar]
-      for node in eachnode(dg)
-        # Get the normal direction on the small element.
-        # Note, contravariant vectors at interfaces in negative coordinate direction
-        # are pointing inwards. This is handled by `get_normal_direction`.
-        normal_direction = get_normal_direction(small_direction, contravariant_vectors,
-                                                i_small, j_small, element)
-
-        calc_mortar_flux!(fstar, mesh, nonconservative_terms, equations,
-                          surface_integral, dg, cache,
-                          mortar, position, normal_direction,
-                          node)
-
-        i_small += i_small_step
-        j_small += j_small_step
-      end
+        mortar_fluxes_to_elements!(surface_flux_values,
+                                   mesh, equations, mortar_l2, dg, cache,
+                                   mortar, fstar, u_buffer)
     end
 
-    # Buffer to interpolate flux values of the large element to before
-    # copying in the correct orientation
-    u_buffer = cache.u_threaded[Threads.threadid()]
-
-    mortar_fluxes_to_elements!(surface_flux_values,
-                               mesh, equations, mortar_l2, dg, cache,
-                               mortar, fstar, u_buffer)
-  end
-
-  return nothing
+    return nothing
 end
 
-
 # Inlined version of the mortar flux computation on small elements for conservation laws
 @inline function calc_mortar_flux!(fstar,
                                    mesh::P4estMesh{2},
@@ -489,15 +516,16 @@ end
                                    surface_integral, dg::DG, cache,
                                    mortar_index, position_index, normal_direction,
                                    node_index)
-  @unpack u = cache.mortars
-  @unpack surface_flux = surface_integral
+    @unpack u = cache.mortars
+    @unpack surface_flux = surface_integral
 
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, node_index, mortar_index)
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, node_index,
+                                       mortar_index)
 
-  flux = surface_flux(u_ll, u_rr, normal_direction, equations)
+    flux = surface_flux(u_ll, u_rr, normal_direction, equations)
 
-  # Copy flux to buffer
-  set_node_vars!(fstar[position_index], flux, equations, dg, node_index)
+    # Copy flux to buffer
+    set_node_vars!(fstar[position_index], flux, equations, dg, node_index)
 end
 
 # Inlined version of the mortar flux computation on small elements for equations with conservative and
@@ -508,124 +536,129 @@ end
                                    surface_integral, dg::DG, cache,
                                    mortar_index, position_index, normal_direction,
                                    node_index)
-  @unpack u = cache.mortars
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack u = cache.mortars
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
 
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, node_index, mortar_index)
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, node_index,
+                                       mortar_index)
 
-  # Compute conservative flux
-  flux = surface_flux(u_ll, u_rr, normal_direction, equations)
+    # Compute conservative flux
+    flux = surface_flux(u_ll, u_rr, normal_direction, equations)
 
-  # Compute nonconservative flux and add it to the conservative flux.
-  # The nonconservative flux is scaled by a factor of 0.5 based on
-  # the interpretation of global SBP operators coupled discontinuously via
-  # central fluxes/SATs
-  noncons = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations)
+    # Compute nonconservative flux and add it to the conservative flux.
+    # The nonconservative flux is scaled by a factor of 0.5 based on
+    # the interpretation of global SBP operators coupled discontinuously via
+    # central fluxes/SATs
+    noncons = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction,
+                                   equations)
 
-  flux_plus_noncons = flux + 0.5 * noncons
+    flux_plus_noncons = flux + 0.5 * noncons
 
-  # Copy to buffer
-  set_node_vars!(fstar[position_index], flux_plus_noncons, equations, dg, node_index)
+    # Copy to buffer
+    set_node_vars!(fstar[position_index], flux_plus_noncons, equations, dg, node_index)
 end
 
-
 @inline function mortar_fluxes_to_elements!(surface_flux_values,
                                             mesh::P4estMesh{2}, equations,
                                             mortar_l2::LobattoLegendreMortarL2,
                                             dg::DGSEM, cache, mortar, fstar, u_buffer)
-  @unpack neighbor_ids, node_indices = cache.mortars
-
-  # Copy solution small to small
-  small_indices   = node_indices[1, mortar]
-  small_direction = indices2direction(small_indices)
-
-  for position in 1:2
-    element = neighbor_ids[position, mortar]
-    for i in eachnode(dg)
-      for v in eachvariable(equations)
-        surface_flux_values[v, i, small_direction, element] = fstar[position][v, i]
-      end
-    end
-  end
-
-  # Project small fluxes to large element.
-  multiply_dimensionwise!(u_buffer,
-                          mortar_l2.reverse_upper, fstar[2],
-                          mortar_l2.reverse_lower, fstar[1])
-
-  # The flux is calculated in the outward direction of the small elements,
-  # so the sign must be switched to get the flux in outward direction
-  # of the large element.
-  # The contravariant vectors of the large element (and therefore the normal
-  # vectors of the large element as well) are twice as large as the
-  # contravariant vectors of the small elements. Therefore, the flux needs
-  # to be scaled by a factor of 2 to obtain the flux of the large element.
-  u_buffer .*= -2
-
-  # Copy interpolated flux values from buffer to large element face in the
-  # correct orientation.
-  # Note that the index of the small sides will always run forward but
-  # the index of the large side might need to run backwards for flipped sides.
-  large_element  = neighbor_ids[3, mortar]
-  large_indices  = node_indices[2, mortar]
-  large_direction = indices2direction(large_indices)
-
-  if :i_backward in large_indices
-    for i in eachnode(dg)
-      for v in eachvariable(equations)
-        surface_flux_values[v, end + 1 - i, large_direction, large_element] = u_buffer[v, i]
-      end
+    @unpack neighbor_ids, node_indices = cache.mortars
+
+    # Copy solution small to small
+    small_indices = node_indices[1, mortar]
+    small_direction = indices2direction(small_indices)
+
+    for position in 1:2
+        element = neighbor_ids[position, mortar]
+        for i in eachnode(dg)
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, small_direction, element] = fstar[position][v,
+                                                                                      i]
+            end
+        end
     end
-  else
-    for i in eachnode(dg)
-      for v in eachvariable(equations)
-        surface_flux_values[v, i, large_direction, large_element] = u_buffer[v, i]
-      end
+
+    # Project small fluxes to large element.
+    multiply_dimensionwise!(u_buffer,
+                            mortar_l2.reverse_upper, fstar[2],
+                            mortar_l2.reverse_lower, fstar[1])
+
+    # The flux is calculated in the outward direction of the small elements,
+    # so the sign must be switched to get the flux in outward direction
+    # of the large element.
+    # The contravariant vectors of the large element (and therefore the normal
+    # vectors of the large element as well) are twice as large as the
+    # contravariant vectors of the small elements. Therefore, the flux needs
+    # to be scaled by a factor of 2 to obtain the flux of the large element.
+    u_buffer .*= -2
+
+    # Copy interpolated flux values from buffer to large element face in the
+    # correct orientation.
+    # Note that the index of the small sides will always run forward but
+    # the index of the large side might need to run backwards for flipped sides.
+    large_element = neighbor_ids[3, mortar]
+    large_indices = node_indices[2, mortar]
+    large_direction = indices2direction(large_indices)
+
+    if :i_backward in large_indices
+        for i in eachnode(dg)
+            for v in eachvariable(equations)
+                surface_flux_values[v, end + 1 - i, large_direction, large_element] = u_buffer[v,
+                                                                                               i]
+            end
+        end
+    else
+        for i in eachnode(dg)
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, large_direction, large_element] = u_buffer[v,
+                                                                                     i]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_surface_integral!(du, u,
                                 mesh::P4estMesh{2},
                                 equations,
                                 surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGSEM, cache)
-  @unpack boundary_interpolation = dg.basis
-  @unpack surface_flux_values = cache.elements
-
-  # Note that all fluxes have been computed with outward-pointing normal vectors.
-  # Access the factors only once before beginning the loop to increase performance.
-  # We also use explicit assignments instead of `+=` to let `@muladd` turn these
-  # into FMAs (see comment at the top of the file).
-  factor_1 = boundary_interpolation[1,          1]
-  factor_2 = boundary_interpolation[nnodes(dg), 2]
-  @threaded for element in eachelement(dg, cache)
-    for l in eachnode(dg)
-      for v in eachvariable(equations)
-        # surface at -x
-        du[v, 1,          l, element] = (
-          du[v, 1,          l, element] + surface_flux_values[v, l, 1, element] * factor_1)
-
-        # surface at +x
-        du[v, nnodes(dg), l, element] = (
-          du[v, nnodes(dg), l, element] + surface_flux_values[v, l, 2, element] * factor_2)
-
-        # surface at -y
-        du[v, l, 1,          element] = (
-          du[v, l, 1,          element] + surface_flux_values[v, l, 3, element] * factor_1)
-
-        # surface at +y
-        du[v, l, nnodes(dg), element] = (
-          du[v, l, nnodes(dg), element] + surface_flux_values[v, l, 4, element] * factor_2)
-      end
+    @unpack boundary_interpolation = dg.basis
+    @unpack surface_flux_values = cache.elements
+
+    # Note that all fluxes have been computed with outward-pointing normal vectors.
+    # Access the factors only once before beginning the loop to increase performance.
+    # We also use explicit assignments instead of `+=` to let `@muladd` turn these
+    # into FMAs (see comment at the top of the file).
+    factor_1 = boundary_interpolation[1, 1]
+    factor_2 = boundary_interpolation[nnodes(dg), 2]
+    @threaded for element in eachelement(dg, cache)
+        for l in eachnode(dg)
+            for v in eachvariable(equations)
+                # surface at -x
+                du[v, 1, l, element] = (du[v, 1, l, element] +
+                                        surface_flux_values[v, l, 1, element] *
+                                        factor_1)
+
+                # surface at +x
+                du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] +
+                                                 surface_flux_values[v, l, 2, element] *
+                                                 factor_2)
+
+                # surface at -y
+                du[v, l, 1, element] = (du[v, l, 1, element] +
+                                        surface_flux_values[v, l, 3, element] *
+                                        factor_1)
+
+                # surface at +y
+                du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] +
+                                                 surface_flux_values[v, l, 4, element] *
+                                                 factor_2)
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
index 7ddb83f97db..e73a8cda9b8 100644
--- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
+++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
@@ -4,408 +4,472 @@
 function create_cache_parabolic(mesh::P4estMesh, equations_hyperbolic::AbstractEquations,
                                 equations_parabolic::AbstractEquationsParabolic,
                                 dg::DG, parabolic_scheme, RealT, uEltype)
+    balance!(mesh)
 
-  balance!(mesh)
+    elements = init_elements(mesh, equations_hyperbolic, dg.basis, uEltype)
+    interfaces = init_interfaces(mesh, equations_hyperbolic, dg.basis, elements)
+    boundaries = init_boundaries(mesh, equations_hyperbolic, dg.basis, elements)
 
-  elements   = init_elements(mesh, equations_hyperbolic, dg.basis, uEltype)
-  interfaces = init_interfaces(mesh, equations_hyperbolic, dg.basis, elements)
-  boundaries = init_boundaries(mesh, equations_hyperbolic, dg.basis, elements)
- 
-  n_vars = nvariables(equations_hyperbolic)
-  n_elements = nelements(elements)
-  n_nodes = nnodes(dg.basis) # nodes in one direction
-  u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_elements)
-  gradients = ntuple(_ -> similar(u_transformed), ndims(mesh))
-  flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh))
+    n_vars = nvariables(equations_hyperbolic)
+    n_elements = nelements(elements)
+    n_nodes = nnodes(dg.basis) # nodes in one direction
+    u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_elements)
+    gradients = ntuple(_ -> similar(u_transformed), ndims(mesh))
+    flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh))
 
-  cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed)
+    cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed)
 
-  return cache
+    return cache
 end
 
 function calc_gradient!(gradients, u_transformed, t,
                         mesh::P4estMesh{2}, equations_parabolic,
-                        boundary_conditions_parabolic, dg::DG, 
+                        boundary_conditions_parabolic, dg::DG,
                         cache, cache_parabolic)
+    gradients_x, gradients_y = gradients
 
-  gradients_x, gradients_y = gradients
-
-  # Reset du
-  @trixi_timeit timer() "reset gradients" begin
-    reset_du!(gradients_x, dg, cache)
-    reset_du!(gradients_y, dg, cache)
-  end
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" begin
-    (; derivative_dhat) = dg.basis
-    (; contravariant_vectors) = cache.elements
-    
-    @threaded for element in eachelement(dg, cache)
-
-      # Calculate gradients with respect to reference coordinates in one element
-      for j in eachnode(dg), i in eachnode(dg)
-        u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, element)
-
-        for ii in eachnode(dg)
-          multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i], u_node, equations_parabolic, dg, ii, j, element)
-        end
+    # Reset du
+    @trixi_timeit timer() "reset gradients" begin
+        reset_du!(gradients_x, dg, cache)
+        reset_du!(gradients_y, dg, cache)
+    end
 
-        for jj in eachnode(dg)
-          multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j], u_node, equations_parabolic, dg, i, jj, element)
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        (; derivative_dhat) = dg.basis
+        (; contravariant_vectors) = cache.elements
+
+        @threaded for element in eachelement(dg, cache)
+
+            # Calculate gradients with respect to reference coordinates in one element
+            for j in eachnode(dg), i in eachnode(dg)
+                u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j,
+                                       element)
+
+                for ii in eachnode(dg)
+                    multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i], u_node,
+                                               equations_parabolic, dg, ii, j, element)
+                end
+
+                for jj in eachnode(dg)
+                    multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j], u_node,
+                                               equations_parabolic, dg, i, jj, element)
+                end
+            end
+
+            # now that the reference coordinate gradients are computed, transform them node-by-node to physical gradients
+            # using the contravariant vectors
+            for j in eachnode(dg), i in eachnode(dg)
+                Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j,
+                                                      element)
+                Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j,
+                                                      element)
+
+                gradients_reference_1 = get_node_vars(gradients_x, equations_parabolic, dg,
+                                                      i, j, element)
+                gradients_reference_2 = get_node_vars(gradients_y, equations_parabolic, dg,
+                                                      i, j, element)
+
+                # note that the contravariant vectors are transposed compared with computations of flux
+                # divergences in `calc_volume_integral!`. See
+                # https://github.com/trixi-framework/Trixi.jl/pull/1490#discussion_r1213345190
+                # for a more detailed discussion.
+                gradient_x_node = Ja11 * gradients_reference_1 +
+                                  Ja21 * gradients_reference_2
+                gradient_y_node = Ja12 * gradients_reference_1 +
+                                  Ja22 * gradients_reference_2
+
+                set_node_vars!(gradients_x, gradient_x_node, equations_parabolic, dg, i, j,
+                               element)
+                set_node_vars!(gradients_y, gradient_y_node, equations_parabolic, dg, i, j,
+                               element)
+            end
         end
-      end
-
-      # now that the reference coordinate gradients are computed, transform them node-by-node to physical gradients
-      # using the contravariant vectors
-      for j in eachnode(dg), i in eachnode(dg)
-        Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element)
-        Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+    end
 
-        gradients_reference_1 = get_node_vars(gradients_x, equations_parabolic, dg, i, j, element)
-        gradients_reference_2 = get_node_vars(gradients_y, equations_parabolic, dg, i, j, element)
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache_parabolic, u_transformed, mesh,
+                            equations_parabolic, dg.surface_integral, dg)
+    end
 
-        # note that the contravariant vectors are transposed compared with computations of flux
-        # divergences in `calc_volume_integral!`. See 
-        # https://github.com/trixi-framework/Trixi.jl/pull/1490#discussion_r1213345190 
-        # for a more detailed discussion. 
-        gradient_x_node = Ja11 * gradients_reference_1 + Ja21 * gradients_reference_2
-        gradient_y_node = Ja12 * gradients_reference_1 + Ja22 * gradients_reference_2
+    # Calculate interface fluxes for the gradient. This reuses P4est `calc_interface_flux!` along with a
+    # specialization for AbstractEquationsParabolic.
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache_parabolic.elements.surface_flux_values,
+                             mesh, False(), # False() = no nonconservative terms
+                             equations_parabolic, dg.surface_integral, dg, cache_parabolic)
+    end
 
-        set_node_vars!(gradients_x, gradient_x_node, equations_parabolic, dg, i, j, element)
-        set_node_vars!(gradients_y, gradient_y_node, equations_parabolic, dg, i, j, element)
-      end
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache_parabolic, u_transformed, mesh,
+                            equations_parabolic, dg.surface_integral, dg)
+    end
 
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux_gradients!(cache_parabolic, t, boundary_conditions_parabolic,
+                                      mesh, equations_parabolic, dg.surface_integral, dg)
     end
-  end
-
-  # Prolong solution to interfaces
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg)
-
-  # Calculate interface fluxes for the gradient. This reuses P4est `calc_interface_flux!` along with a 
-  # specialization for AbstractEquationsParabolic.
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(cache_parabolic.elements.surface_flux_values, 
-                                                              mesh, False(), # False() = no nonconservative terms
-                                                              equations_parabolic, dg.surface_integral, dg, 
-                                                              cache_parabolic)
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux_gradients!(
-    cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic,
-    dg.surface_integral, dg)
-
-  # TODO: parabolic; mortars
-  @assert nmortars(dg, cache) == 0
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" begin
-    (; boundary_interpolation) = dg.basis
-    (; surface_flux_values) = cache_parabolic.elements
-    (; contravariant_vectors) = cache.elements
 
-    # Access the factors only once before beginning the loop to increase performance.
-    # We also use explicit assignments instead of `+=` to let `@muladd` turn these
-    # into FMAs (see comment at the top of the file).
-    factor_1 = boundary_interpolation[1,          1]
-    factor_2 = boundary_interpolation[nnodes(dg), 2]
-    @threaded for element in eachelement(dg, cache)
-      for l in eachnode(dg)
-        for v in eachvariable(equations_parabolic)
-
-          # Compute x-component of gradients
-
-          # surface at -x
-          normal_direction_x, _ = get_normal_direction(1, contravariant_vectors, 1, l, element)
-          gradients_x[v, 1,          l, element] = (
-            gradients_x[v, 1,          l, element] + surface_flux_values[v, l, 1, element] * factor_1 * normal_direction_x)
-
-          # surface at +x
-          normal_direction_x, _ = get_normal_direction(2, contravariant_vectors, nnodes(dg), l, element)
-          gradients_x[v, nnodes(dg), l, element] = (
-            gradients_x[v, nnodes(dg), l, element] + surface_flux_values[v, l, 2, element] * factor_2 * normal_direction_x)
-
-          # surface at -y
-          normal_direction_x, _ = get_normal_direction(3, contravariant_vectors, l, 1, element)
-          gradients_x[v, l, 1,          element] = (
-            gradients_x[v, l, 1,          element] + surface_flux_values[v, l, 3, element] * factor_1 * normal_direction_x)
-
-          # surface at +y
-          normal_direction_x, _ = get_normal_direction(4, contravariant_vectors, l, nnodes(dg), element)
-          gradients_x[v, l, nnodes(dg), element] = (
-            gradients_x[v, l, nnodes(dg), element] + surface_flux_values[v, l, 4, element] * factor_2 * normal_direction_x)
-
-          # Compute y-component of gradients
-
-          # surface at -x
-          _, normal_direction_y = get_normal_direction(1, contravariant_vectors, 1, l, element)
-          gradients_y[v, 1,          l, element] = (
-            gradients_y[v, 1,          l, element] + surface_flux_values[v, l, 1, element] * factor_1 * normal_direction_y)
-
-          # surface at +x
-          _, normal_direction_y = get_normal_direction(2, contravariant_vectors, nnodes(dg), l, element)
-          gradients_y[v, nnodes(dg), l, element] = (
-            gradients_y[v, nnodes(dg), l, element] + surface_flux_values[v, l, 2, element] * factor_2 * normal_direction_y)
-
-          # surface at -y
-          _, normal_direction_y = get_normal_direction(3, contravariant_vectors, l, 1, element)
-          gradients_y[v, l, 1,          element] = (
-            gradients_y[v, l, 1,          element] + surface_flux_values[v, l, 3, element] * factor_1 * normal_direction_y)
-
-          # surface at +y
-          _, normal_direction_y = get_normal_direction(4, contravariant_vectors, l, nnodes(dg), element)
-          gradients_y[v, l, nnodes(dg), element] = (
-            gradients_y[v, l, nnodes(dg), element] + surface_flux_values[v, l, 4, element] * factor_2 * normal_direction_y)
+    # TODO: parabolic; mortars
+    @assert nmortars(dg, cache) == 0
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        (; boundary_interpolation) = dg.basis
+        (; surface_flux_values) = cache_parabolic.elements
+        (; contravariant_vectors) = cache.elements
+
+        # Access the factors only once before beginning the loop to increase performance.
+        # We also use explicit assignments instead of `+=` to let `@muladd` turn these
+        # into FMAs (see comment at the top of the file).
+        factor_1 = boundary_interpolation[1, 1]
+        factor_2 = boundary_interpolation[nnodes(dg), 2]
+        @threaded for element in eachelement(dg, cache)
+            for l in eachnode(dg)
+                for v in eachvariable(equations_parabolic)
+
+                    # Compute x-component of gradients
+
+                    # surface at -x
+                    normal_direction_x, _ = get_normal_direction(1, contravariant_vectors,
+                                                                 1, l, element)
+                    gradients_x[v, 1, l, element] = (gradients_x[v, 1, l, element] +
+                                                     surface_flux_values[v, l, 1, element] *
+                                                     factor_1 * normal_direction_x)
+
+                    # surface at +x
+                    normal_direction_x, _ = get_normal_direction(2, contravariant_vectors,
+                                                                 nnodes(dg), l, element)
+                    gradients_x[v, nnodes(dg), l, element] = (gradients_x[v, nnodes(dg), l,
+                                                                          element] +
+                                                              surface_flux_values[v, l, 2,
+                                                                                  element] *
+                                                              factor_2 * normal_direction_x)
+
+                    # surface at -y
+                    normal_direction_x, _ = get_normal_direction(3, contravariant_vectors,
+                                                                 l, 1, element)
+                    gradients_x[v, l, 1, element] = (gradients_x[v, l, 1, element] +
+                                                     surface_flux_values[v, l, 3, element] *
+                                                     factor_1 * normal_direction_x)
+
+                    # surface at +y
+                    normal_direction_x, _ = get_normal_direction(4, contravariant_vectors,
+                                                                 l, nnodes(dg), element)
+                    gradients_x[v, l, nnodes(dg), element] = (gradients_x[v, l, nnodes(dg),
+                                                                          element] +
+                                                              surface_flux_values[v, l, 4,
+                                                                                  element] *
+                                                              factor_2 * normal_direction_x)
+
+                    # Compute y-component of gradients
+
+                    # surface at -x
+                    _, normal_direction_y = get_normal_direction(1, contravariant_vectors,
+                                                                 1, l, element)
+                    gradients_y[v, 1, l, element] = (gradients_y[v, 1, l, element] +
+                                                     surface_flux_values[v, l, 1, element] *
+                                                     factor_1 * normal_direction_y)
+
+                    # surface at +x
+                    _, normal_direction_y = get_normal_direction(2, contravariant_vectors,
+                                                                 nnodes(dg), l, element)
+                    gradients_y[v, nnodes(dg), l, element] = (gradients_y[v, nnodes(dg), l,
+                                                                          element] +
+                                                              surface_flux_values[v, l, 2,
+                                                                                  element] *
+                                                              factor_2 * normal_direction_y)
+
+                    # surface at -y
+                    _, normal_direction_y = get_normal_direction(3, contravariant_vectors,
+                                                                 l, 1, element)
+                    gradients_y[v, l, 1, element] = (gradients_y[v, l, 1, element] +
+                                                     surface_flux_values[v, l, 3, element] *
+                                                     factor_1 * normal_direction_y)
+
+                    # surface at +y
+                    _, normal_direction_y = get_normal_direction(4, contravariant_vectors,
+                                                                 l, nnodes(dg), element)
+                    gradients_y[v, l, nnodes(dg), element] = (gradients_y[v, l, nnodes(dg),
+                                                                          element] +
+                                                              surface_flux_values[v, l, 4,
+                                                                                  element] *
+                                                              factor_2 * normal_direction_y)
+                end
+            end
         end
-      end
     end
-  end
 
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" begin
-    apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg, cache_parabolic)
-    apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg, cache_parabolic)
-  end
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" begin
+        apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg,
+                                  cache_parabolic)
+        apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg,
+                                  cache_parabolic)
+    end
 
-  return nothing
+    return nothing
 end
 
 # This version is used for parabolic gradient computations
 @inline function calc_interface_flux!(surface_flux_values, mesh::P4estMesh{2},
-                                      nonconservative_terms::False, 
+                                      nonconservative_terms::False,
                                       equations::AbstractEquationsParabolic,
                                       surface_integral, dg::DG, cache,
                                       interface_index, normal_direction,
-                                      primary_node_index, primary_direction_index, primary_element_index,
-                                      secondary_node_index, secondary_direction_index, secondary_element_index)
-  @unpack u = cache.interfaces
-  @unpack surface_flux = surface_integral
-
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index, interface_index)
-
-  flux_ = 0.5 * (u_ll + u_rr) # we assume that the gradient computations utilize a central flux
-
-  # Note that we don't flip the sign on the secondondary flux. This is because for parabolic terms,
-  # the normals are not embedded in `flux_` for the parabolic gradient computations. 
-  for v in eachvariable(equations)
-    surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = flux_[v]
-    surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = flux_[v]
-  end
+                                      primary_node_index, primary_direction_index,
+                                      primary_element_index,
+                                      secondary_node_index, secondary_direction_index,
+                                      secondary_element_index)
+    @unpack u = cache.interfaces
+    @unpack surface_flux = surface_integral
+
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_node_index,
+                                       interface_index)
+
+    flux_ = 0.5 * (u_ll + u_rr) # we assume that the gradient computations utilize a central flux
+
+    # Note that we don't flip the sign on the secondondary flux. This is because for parabolic terms,
+    # the normals are not embedded in `flux_` for the parabolic gradient computations.
+    for v in eachvariable(equations)
+        surface_flux_values[v, primary_node_index, primary_direction_index, primary_element_index] = flux_[v]
+        surface_flux_values[v, secondary_node_index, secondary_direction_index, secondary_element_index] = flux_[v]
+    end
 end
 
 # This is the version used when calculating the divergence of the viscous fluxes
 function calc_volume_integral!(du, flux_viscous,
-                               mesh::P4estMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+                               mesh::P4estMesh{2},
+                               equations_parabolic::AbstractEquationsParabolic,
                                dg::DGSEM, cache)
-  (; derivative_dhat) = dg.basis
-  (; contravariant_vectors) = cache.elements
-  flux_viscous_x, flux_viscous_y = flux_viscous
-
-  @threaded for element in eachelement(dg, cache)
-    # Calculate volume terms in one element
-    for j in eachnode(dg), i in eachnode(dg)
-      flux1 = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j, element)
-      flux2 = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j, element)
-  
-      # Compute the contravariant flux by taking the scalar product of the
-      # first contravariant vector Ja^1 and the flux vector
-      Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element)
-      contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2
-      for ii in eachnode(dg)
-        multiply_add_to_node_vars!(du, derivative_dhat[ii, i], contravariant_flux1, equations_parabolic, dg, ii, j, element)
-      end
-  
-      # Compute the contravariant flux by taking the scalar product of the
-      # second contravariant vector Ja^2 and the flux vector
-      Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element)
-      contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2
-      for jj in eachnode(dg)
-        multiply_add_to_node_vars!(du, derivative_dhat[jj, j], contravariant_flux2, equations_parabolic, dg, i, jj, element)
-      end
+    (; derivative_dhat) = dg.basis
+    (; contravariant_vectors) = cache.elements
+    flux_viscous_x, flux_viscous_y = flux_viscous
+
+    @threaded for element in eachelement(dg, cache)
+        # Calculate volume terms in one element
+        for j in eachnode(dg), i in eachnode(dg)
+            flux1 = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j, element)
+            flux2 = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j, element)
+
+            # Compute the contravariant flux by taking the scalar product of the
+            # first contravariant vector Ja^1 and the flux vector
+            Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element)
+            contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2
+            for ii in eachnode(dg)
+                multiply_add_to_node_vars!(du, derivative_dhat[ii, i], contravariant_flux1,
+                                           equations_parabolic, dg, ii, j, element)
+            end
+
+            # Compute the contravariant flux by taking the scalar product of the
+            # second contravariant vector Ja^2 and the flux vector
+            Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+            contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2
+            for jj in eachnode(dg)
+                multiply_add_to_node_vars!(du, derivative_dhat[jj, j], contravariant_flux2,
+                                           equations_parabolic, dg, i, jj, element)
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # This is the version used when calculating the divergence of the viscous fluxes
 # We pass the `surface_integral` argument solely for dispatch
 function prolong2interfaces!(cache_parabolic, flux_viscous,
-                             mesh::P4estMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+                             mesh::P4estMesh{2},
+                             equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
-  (; interfaces) = cache_parabolic
-  (; contravariant_vectors) = cache_parabolic.elements 
-  index_range = eachnode(dg)
-  flux_viscous_x, flux_viscous_y = flux_viscous
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Copy solution data from the primary element using "delayed indexing" with
-    # a start value and a step size to get the correct face and orientation.
-    # Note that in the current implementation, the interface will be
-    # "aligned at the primary element", i.e., the index of the primary side
-    # will always run forwards.
-    primary_element = interfaces.neighbor_ids[1, interface]
-    primary_indices = interfaces.node_indices[1, interface]
-    primary_direction = indices2direction(primary_indices)
-
-    i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], index_range)
-    j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], index_range)
-
-    i_primary = i_primary_start
-    j_primary = j_primary_start
-    for i in eachnode(dg)
-
-      # this is the outward normal direction on the primary element
-      normal_direction = get_normal_direction(primary_direction, contravariant_vectors,
-                                              i_primary, j_primary, primary_element)
-
-      for v in eachvariable(equations_parabolic)
-        # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-        flux_viscous = SVector(flux_viscous_x[v, i_primary, j_primary, primary_element], 
-                               flux_viscous_y[v, i_primary, j_primary, primary_element])
-
-        interfaces.u[1, v, i, interface] = dot(flux_viscous, normal_direction)
-      end
-      i_primary += i_primary_step
-      j_primary += j_primary_step
-    end
+    (; interfaces) = cache_parabolic
+    (; contravariant_vectors) = cache_parabolic.elements
+    index_range = eachnode(dg)
+    flux_viscous_x, flux_viscous_y = flux_viscous
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Copy solution data from the primary element using "delayed indexing" with
+        # a start value and a step size to get the correct face and orientation.
+        # Note that in the current implementation, the interface will be
+        # "aligned at the primary element", i.e., the index of the primary side
+        # will always run forwards.
+        primary_element = interfaces.neighbor_ids[1, interface]
+        primary_indices = interfaces.node_indices[1, interface]
+        primary_direction = indices2direction(primary_indices)
+
+        i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1],
+                                                                 index_range)
+        j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2],
+                                                                 index_range)
+
+        i_primary = i_primary_start
+        j_primary = j_primary_start
+        for i in eachnode(dg)
+
+            # this is the outward normal direction on the primary element
+            normal_direction = get_normal_direction(primary_direction,
+                                                    contravariant_vectors,
+                                                    i_primary, j_primary, primary_element)
+
+            for v in eachvariable(equations_parabolic)
+                # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
+                flux_viscous = SVector(flux_viscous_x[v, i_primary, j_primary,
+                                                      primary_element],
+                                       flux_viscous_y[v, i_primary, j_primary,
+                                                      primary_element])
+
+                interfaces.u[1, v, i, interface] = dot(flux_viscous, normal_direction)
+            end
+            i_primary += i_primary_step
+            j_primary += j_primary_step
+        end
 
-    # Copy solution data from the secondary element using "delayed indexing" with
-    # a start value and a step size to get the correct face and orientation.
-    secondary_element = interfaces.neighbor_ids[2, interface]
-    secondary_indices = interfaces.node_indices[2, interface]
-    secondary_direction = indices2direction(secondary_indices)
-
-    i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1], index_range)
-    j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2], index_range)
-
-    i_secondary = i_secondary_start
-    j_secondary = j_secondary_start
-    for i in eachnode(dg)
-      # This is the outward normal direction on the secondary element.
-      # Here, we assume that normal_direction on the secondary element is 
-      # the negative of normal_direction on the primary element.  
-      normal_direction = get_normal_direction(secondary_direction, contravariant_vectors,
-                                              i_secondary, j_secondary, secondary_element)
-
-      for v in eachvariable(equations_parabolic)
-        # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-        flux_viscous = SVector(flux_viscous_x[v, i_secondary, j_secondary, secondary_element], 
-                               flux_viscous_y[v, i_secondary, j_secondary, secondary_element])
-        # store the normal flux with respect to the primary normal direction
-        interfaces.u[2, v, i, interface] = -dot(flux_viscous, normal_direction)
-      end
-      i_secondary += i_secondary_step
-      j_secondary += j_secondary_step
+        # Copy solution data from the secondary element using "delayed indexing" with
+        # a start value and a step size to get the correct face and orientation.
+        secondary_element = interfaces.neighbor_ids[2, interface]
+        secondary_indices = interfaces.node_indices[2, interface]
+        secondary_direction = indices2direction(secondary_indices)
+
+        i_secondary_start, i_secondary_step = index_to_start_step_2d(secondary_indices[1],
+                                                                     index_range)
+        j_secondary_start, j_secondary_step = index_to_start_step_2d(secondary_indices[2],
+                                                                     index_range)
+
+        i_secondary = i_secondary_start
+        j_secondary = j_secondary_start
+        for i in eachnode(dg)
+            # This is the outward normal direction on the secondary element.
+            # Here, we assume that normal_direction on the secondary element is
+            # the negative of normal_direction on the primary element.
+            normal_direction = get_normal_direction(secondary_direction,
+                                                    contravariant_vectors,
+                                                    i_secondary, j_secondary,
+                                                    secondary_element)
+
+            for v in eachvariable(equations_parabolic)
+                # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
+                flux_viscous = SVector(flux_viscous_x[v, i_secondary, j_secondary,
+                                                      secondary_element],
+                                       flux_viscous_y[v, i_secondary, j_secondary,
+                                                      secondary_element])
+                # store the normal flux with respect to the primary normal direction
+                interfaces.u[2, v, i, interface] = -dot(flux_viscous, normal_direction)
+            end
+            i_secondary += i_secondary_step
+            j_secondary += j_secondary_step
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 function calc_interface_flux!(surface_flux_values,
                               mesh::P4estMesh{2}, equations_parabolic,
                               dg::DG, cache_parabolic)
+    (; neighbor_ids, node_indices) = cache_parabolic.interfaces
+    (; contravariant_vectors) = cache_parabolic.elements
+    index_range = eachnode(dg)
+    index_end = last(index_range)
+
+    @threaded for interface in eachinterface(dg, cache_parabolic)
+        # Get element and side index information on the primary element
+        primary_element = neighbor_ids[1, interface]
+        primary_indices = node_indices[1, interface]
+        primary_direction_index = indices2direction(primary_indices)
+
+        # Create the local i,j indexing on the primary element used to pull normal direction information
+        i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1],
+                                                                 index_range)
+        j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2],
+                                                                 index_range)
+
+        i_primary = i_primary_start
+        j_primary = j_primary_start
+
+        # Get element and side index information on the secondary element
+        secondary_element = neighbor_ids[2, interface]
+        secondary_indices = node_indices[2, interface]
+        secondary_direction_index = indices2direction(secondary_indices)
+
+        # Initiate the secondary index to be used in the surface for loop.
+        # This index on the primary side will always run forward but
+        # the secondary index might need to run backwards for flipped sides.
+        if :i_backward in secondary_indices
+            node_secondary = index_end
+            node_secondary_step = -1
+        else
+            node_secondary = 1
+            node_secondary_step = 1
+        end
 
-  (; neighbor_ids, node_indices) = cache_parabolic.interfaces
-  (; contravariant_vectors) = cache_parabolic.elements
-  index_range = eachnode(dg)
-  index_end = last(index_range)
-
-  @threaded for interface in eachinterface(dg, cache_parabolic)
-    # Get element and side index information on the primary element
-    primary_element = neighbor_ids[1, interface]
-    primary_indices = node_indices[1, interface]
-    primary_direction_index = indices2direction(primary_indices)
-
-    # Create the local i,j indexing on the primary element used to pull normal direction information
-    i_primary_start, i_primary_step = index_to_start_step_2d(primary_indices[1], index_range)
-    j_primary_start, j_primary_step = index_to_start_step_2d(primary_indices[2], index_range)
-
-    i_primary = i_primary_start
-    j_primary = j_primary_start
-
-    # Get element and side index information on the secondary element
-    secondary_element = neighbor_ids[2, interface]
-    secondary_indices = node_indices[2, interface]
-    secondary_direction_index = indices2direction(secondary_indices)
-
-    # Initiate the secondary index to be used in the surface for loop.
-    # This index on the primary side will always run forward but
-    # the secondary index might need to run backwards for flipped sides.
-    if :i_backward in secondary_indices
-      node_secondary = index_end
-      node_secondary_step = -1
-    else
-      node_secondary = 1
-      node_secondary_step = 1
-    end
-
-    for node in eachnode(dg)
-      # We prolong the viscous flux dotted with respect the outward normal on the 
-      # primary element. We assume a BR-1 type of flux.
-      viscous_flux_normal_ll, viscous_flux_normal_rr = 
-        get_surface_node_vars(cache_parabolic.interfaces.u, equations_parabolic, dg, node, interface)
-
-      flux = 0.5 * (viscous_flux_normal_ll + viscous_flux_normal_rr)
-
-      for v in eachvariable(equations_parabolic)
-        surface_flux_values[v, node, primary_direction_index, primary_element] = flux[v]
-        surface_flux_values[v, node_secondary, secondary_direction_index, secondary_element] = -flux[v]
-      end
-
-      # Increment primary element indices to pull the normal direction
-      i_primary += i_primary_step
-      j_primary += j_primary_step
-      # Increment the surface node index along the secondary element
-      node_secondary += node_secondary_step
+        for node in eachnode(dg)
+            # We prolong the viscous flux dotted with respect the outward normal on the
+            # primary element. We assume a BR-1 type of flux.
+            viscous_flux_normal_ll, viscous_flux_normal_rr = get_surface_node_vars(cache_parabolic.interfaces.u,
+                                                                                   equations_parabolic,
+                                                                                   dg, node,
+                                                                                   interface)
+
+            flux = 0.5 * (viscous_flux_normal_ll + viscous_flux_normal_rr)
+
+            for v in eachvariable(equations_parabolic)
+                surface_flux_values[v, node, primary_direction_index, primary_element] = flux[v]
+                surface_flux_values[v, node_secondary, secondary_direction_index, secondary_element] = -flux[v]
+            end
+
+            # Increment primary element indices to pull the normal direction
+            i_primary += i_primary_step
+            j_primary += j_primary_step
+            # Increment the surface node index along the secondary element
+            node_secondary += node_secondary_step
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 # TODO: parabolic, finish implementing `calc_boundary_flux_gradients!` and `calc_boundary_flux_divergence!`
 function prolong2boundaries!(cache_parabolic, flux_viscous,
-                             mesh::P4estMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+                             mesh::P4estMesh{2},
+                             equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
-  (; boundaries) = cache_parabolic
-  (; contravariant_vectors) = cache_parabolic.elements
-  index_range = eachnode(dg)
-
-  flux_viscous_x, flux_viscous_y = flux_viscous
-
-  @threaded for boundary in eachboundary(dg, cache_parabolic)
-    # Copy solution data from the element using "delayed indexing" with
-    # a start value and a step size to get the correct face and orientation.
-    element       = boundaries.neighbor_ids[boundary]
-    node_indices  = boundaries.node_indices[boundary]
-
-    i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range)
-    j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range)
-
-    i_node = i_node_start
-    j_node = j_node_start
-    for i in eachnode(dg)
-      # this is the outward normal direction on the primary element
-      normal_direction = get_normal_direction(primary_direction, contravariant_vectors,
-                        i_node, j_node, primary_element)
-
-      for v in eachvariable(equations_parabolic)
-        flux_viscous = SVector(flux_viscous_x[v, i_primary, j_primary, primary_element], 
-            flux_viscous_y[v, i_primary, j_primary, primary_element])
-
-        boundaries.u[v, i, boundary] = dot(flux_viscous, normal_direction)
-      end
-      i_node += i_node_step
-      j_node += j_node_step
+    (; boundaries) = cache_parabolic
+    (; contravariant_vectors) = cache_parabolic.elements
+    index_range = eachnode(dg)
+
+    flux_viscous_x, flux_viscous_y = flux_viscous
+
+    @threaded for boundary in eachboundary(dg, cache_parabolic)
+        # Copy solution data from the element using "delayed indexing" with
+        # a start value and a step size to get the correct face and orientation.
+        element = boundaries.neighbor_ids[boundary]
+        node_indices = boundaries.node_indices[boundary]
+
+        i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range)
+        j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range)
+
+        i_node = i_node_start
+        j_node = j_node_start
+        for i in eachnode(dg)
+            # this is the outward normal direction on the primary element
+            normal_direction = get_normal_direction(primary_direction,
+                                                    contravariant_vectors,
+                                                    i_node, j_node, primary_element)
+
+            for v in eachvariable(equations_parabolic)
+                flux_viscous = SVector(flux_viscous_x[v, i_primary, j_primary,
+                                                      primary_element],
+                                       flux_viscous_y[v, i_primary, j_primary,
+                                                      primary_element])
+
+                boundaries.u[v, i, boundary] = dot(flux_viscous, normal_direction)
+            end
+            i_node += i_node_step
+            j_node += j_node_step
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
diff --git a/src/solvers/dgsem_p4est/dg_2d_parallel.jl b/src/solvers/dgsem_p4est/dg_2d_parallel.jl
index fdfce1deaca..a8887351c46 100644
--- a/src/solvers/dgsem_p4est/dg_2d_parallel.jl
+++ b/src/solvers/dgsem_p4est/dg_2d_parallel.jl
@@ -3,98 +3,105 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function prolong2mpiinterfaces!(cache, u,
                                 mesh::ParallelP4estMesh{2},
                                 equations, surface_integral, dg::DG)
-  @unpack mpi_interfaces = cache
-  index_range = eachnode(dg)
-
-  @threaded for interface in eachmpiinterface(dg, cache)
-    # Copy solution data from the local element using "delayed indexing" with
-    # a start value and a step size to get the correct face and orientation.
-    # Note that in the current implementation, the interface will be
-    # "aligned at the primary element", i.e., the index of the primary side
-    # will always run forwards.
-    local_side = mpi_interfaces.local_sides[interface]
-    local_element = mpi_interfaces.local_neighbor_ids[interface]
-    local_indices = mpi_interfaces.node_indices[interface]
-
-    i_element_start, i_element_step = index_to_start_step_2d(local_indices[1], index_range)
-    j_element_start, j_element_step = index_to_start_step_2d(local_indices[2], index_range)
-
-    i_element = i_element_start
-    j_element = j_element_start
-    for i in eachnode(dg)
-      for v in eachvariable(equations)
-        mpi_interfaces.u[local_side, v, i, interface] = u[v, i_element, j_element, local_element]
-      end
-      i_element += i_element_step
-      j_element += j_element_step
+    @unpack mpi_interfaces = cache
+    index_range = eachnode(dg)
+
+    @threaded for interface in eachmpiinterface(dg, cache)
+        # Copy solution data from the local element using "delayed indexing" with
+        # a start value and a step size to get the correct face and orientation.
+        # Note that in the current implementation, the interface will be
+        # "aligned at the primary element", i.e., the index of the primary side
+        # will always run forwards.
+        local_side = mpi_interfaces.local_sides[interface]
+        local_element = mpi_interfaces.local_neighbor_ids[interface]
+        local_indices = mpi_interfaces.node_indices[interface]
+
+        i_element_start, i_element_step = index_to_start_step_2d(local_indices[1],
+                                                                 index_range)
+        j_element_start, j_element_step = index_to_start_step_2d(local_indices[2],
+                                                                 index_range)
+
+        i_element = i_element_start
+        j_element = j_element_start
+        for i in eachnode(dg)
+            for v in eachvariable(equations)
+                mpi_interfaces.u[local_side, v, i, interface] = u[v, i_element,
+                                                                  j_element,
+                                                                  local_element]
+            end
+            i_element += i_element_step
+            j_element += j_element_step
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_mpi_interface_flux!(surface_flux_values,
                                   mesh::ParallelP4estMesh{2},
                                   nonconservative_terms,
                                   equations, surface_integral, dg::DG, cache)
-  @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces
-  @unpack contravariant_vectors = cache.elements
-  index_range = eachnode(dg)
-  index_end = last(index_range)
-
-  @threaded for interface in eachmpiinterface(dg, cache)
-    # Get element and side index information on the local element
-    local_element = local_neighbor_ids[interface]
-    local_indices = node_indices[interface]
-    local_direction = indices2direction(local_indices)
-    local_side = local_sides[interface]
-
-    # Create the local i,j indexing on the local element used to pull normal direction information
-    i_element_start, i_element_step = index_to_start_step_2d(local_indices[1], index_range)
-    j_element_start, j_element_step = index_to_start_step_2d(local_indices[2], index_range)
-
-    i_element = i_element_start
-    j_element = j_element_start
-
-    # Initiate the node index to be used in the surface for loop,
-    # the surface flux storage must be indexed in alignment with the local element indexing
-    if :i_backward in local_indices
-      surface_node = index_end
-      surface_node_step = -1
-    else
-      surface_node = 1
-      surface_node_step = 1
-    end
+    @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces
+    @unpack contravariant_vectors = cache.elements
+    index_range = eachnode(dg)
+    index_end = last(index_range)
+
+    @threaded for interface in eachmpiinterface(dg, cache)
+        # Get element and side index information on the local element
+        local_element = local_neighbor_ids[interface]
+        local_indices = node_indices[interface]
+        local_direction = indices2direction(local_indices)
+        local_side = local_sides[interface]
+
+        # Create the local i,j indexing on the local element used to pull normal direction information
+        i_element_start, i_element_step = index_to_start_step_2d(local_indices[1],
+                                                                 index_range)
+        j_element_start, j_element_step = index_to_start_step_2d(local_indices[2],
+                                                                 index_range)
+
+        i_element = i_element_start
+        j_element = j_element_start
+
+        # Initiate the node index to be used in the surface for loop,
+        # the surface flux storage must be indexed in alignment with the local element indexing
+        if :i_backward in local_indices
+            surface_node = index_end
+            surface_node_step = -1
+        else
+            surface_node = 1
+            surface_node_step = 1
+        end
 
-    for node in eachnode(dg)
-      # Get the normal direction on the local element
-      # Contravariant vectors at interfaces in negative coordinate direction
-      # are pointing inwards. This is handled by `get_normal_direction`.
-      normal_direction = get_normal_direction(local_direction, contravariant_vectors,
-                                              i_element, j_element, local_element)
-
-      calc_mpi_interface_flux!(surface_flux_values, mesh, nonconservative_terms, equations,
-                               surface_integral, dg, cache,
-                               interface, normal_direction,
-                               node, local_side,
-                               surface_node, local_direction, local_element)
-
-      # Increment local element indices to pull the normal direction
-      i_element += i_element_step
-      j_element += j_element_step
-
-      # Increment the surface node index along the local element
-      surface_node += surface_node_step
+        for node in eachnode(dg)
+            # Get the normal direction on the local element
+            # Contravariant vectors at interfaces in negative coordinate direction
+            # are pointing inwards. This is handled by `get_normal_direction`.
+            normal_direction = get_normal_direction(local_direction,
+                                                    contravariant_vectors,
+                                                    i_element, j_element, local_element)
+
+            calc_mpi_interface_flux!(surface_flux_values, mesh, nonconservative_terms,
+                                     equations,
+                                     surface_integral, dg, cache,
+                                     interface, normal_direction,
+                                     node, local_side,
+                                     surface_node, local_direction, local_element)
+
+            # Increment local element indices to pull the normal direction
+            i_element += i_element_step
+            j_element += j_element_step
+
+            # Increment the surface node index along the local element
+            surface_node += surface_node_step
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 # Inlined version of the interface flux computation for conservation laws
@@ -104,138 +111,146 @@ end
                                           surface_integral, dg::DG, cache,
                                           interface_index, normal_direction,
                                           interface_node_index, local_side,
-                                          surface_node_index, local_direction_index, local_element_index)
-  @unpack u = cache.mpi_interfaces
-  @unpack surface_flux = surface_integral
-
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface_node_index, interface_index)
-
-  if local_side == 1
-    flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
-  else # local_side == 2
-    flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations)
-  end
+                                          surface_node_index, local_direction_index,
+                                          local_element_index)
+    @unpack u = cache.mpi_interfaces
+    @unpack surface_flux = surface_integral
+
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface_node_index,
+                                       interface_index)
+
+    if local_side == 1
+        flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
+    else # local_side == 2
+        flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations)
+    end
 
-  for v in eachvariable(equations)
-    surface_flux_values[v, surface_node_index, local_direction_index, local_element_index] = flux_[v]
-  end
+    for v in eachvariable(equations)
+        surface_flux_values[v, surface_node_index, local_direction_index, local_element_index] = flux_[v]
+    end
 end
 
-
 function prolong2mpimortars!(cache, u,
                              mesh::ParallelP4estMesh{2}, equations,
                              mortar_l2::LobattoLegendreMortarL2,
                              surface_integral, dg::DGSEM)
-  @unpack node_indices = cache.mpi_mortars
-  index_range = eachnode(dg)
-
-  @threaded for mortar in eachmpimortar(dg, cache)
-    local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar]
-    local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar]
-
-    # Get start value and step size for indices on both sides to get the correct face
-    # and orientation
-    small_indices = node_indices[1, mortar]
-    i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], index_range)
-    j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], index_range)
-
-    large_indices = node_indices[2, mortar]
-    i_large_start, i_large_step = index_to_start_step_2d(large_indices[1], index_range)
-    j_large_start, j_large_step = index_to_start_step_2d(large_indices[2], index_range)
-
-    for (element, position) in zip(local_neighbor_ids, local_neighbor_positions)
-      if position == 3 # -> large element
-        # Buffer to copy solution values of the large element in the correct orientation
-        # before interpolating
-        u_buffer = cache.u_threaded[Threads.threadid()]
-        i_large = i_large_start
-        j_large = j_large_start
-        for i in eachnode(dg)
-          for v in eachvariable(equations)
-            u_buffer[v, i] = u[v, i_large, j_large, element]
-          end
-
-          i_large += i_large_step
-          j_large += j_large_step
+    @unpack node_indices = cache.mpi_mortars
+    index_range = eachnode(dg)
+
+    @threaded for mortar in eachmpimortar(dg, cache)
+        local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar]
+        local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar]
+
+        # Get start value and step size for indices on both sides to get the correct face
+        # and orientation
+        small_indices = node_indices[1, mortar]
+        i_small_start, i_small_step = index_to_start_step_2d(small_indices[1],
+                                                             index_range)
+        j_small_start, j_small_step = index_to_start_step_2d(small_indices[2],
+                                                             index_range)
+
+        large_indices = node_indices[2, mortar]
+        i_large_start, i_large_step = index_to_start_step_2d(large_indices[1],
+                                                             index_range)
+        j_large_start, j_large_step = index_to_start_step_2d(large_indices[2],
+                                                             index_range)
+
+        for (element, position) in zip(local_neighbor_ids, local_neighbor_positions)
+            if position == 3 # -> large element
+                # Buffer to copy solution values of the large element in the correct orientation
+                # before interpolating
+                u_buffer = cache.u_threaded[Threads.threadid()]
+                i_large = i_large_start
+                j_large = j_large_start
+                for i in eachnode(dg)
+                    for v in eachvariable(equations)
+                        u_buffer[v, i] = u[v, i_large, j_large, element]
+                    end
+
+                    i_large += i_large_step
+                    j_large += j_large_step
+                end
+
+                # Interpolate large element face data from buffer to small face locations
+                multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 1, :, mortar),
+                                        mortar_l2.forward_lower,
+                                        u_buffer)
+                multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 2, :, mortar),
+                                        mortar_l2.forward_upper,
+                                        u_buffer)
+            else # position in (1, 2) -> small element
+                # Copy solution data from the small elements
+                i_small = i_small_start
+                j_small = j_small_start
+                for i in eachnode(dg)
+                    for v in eachvariable(equations)
+                        cache.mpi_mortars.u[1, v, position, i, mortar] = u[v, i_small,
+                                                                           j_small,
+                                                                           element]
+                    end
+                    i_small += i_small_step
+                    j_small += j_small_step
+                end
+            end
         end
-
-        # Interpolate large element face data from buffer to small face locations
-        multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 1, :, mortar),
-                                mortar_l2.forward_lower,
-                                u_buffer)
-        multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 2, :, mortar),
-                                mortar_l2.forward_upper,
-                                u_buffer)
-      else # position in (1, 2) -> small element
-        # Copy solution data from the small elements
-        i_small = i_small_start
-        j_small = j_small_start
-        for i in eachnode(dg)
-          for v in eachvariable(equations)
-            cache.mpi_mortars.u[1, v, position, i, mortar] = u[v, i_small, j_small, element]
-          end
-          i_small += i_small_step
-          j_small += j_small_step
-        end
-      end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_mpi_mortar_flux!(surface_flux_values,
                                mesh::ParallelP4estMesh{2},
                                nonconservative_terms, equations,
                                mortar_l2::LobattoLegendreMortarL2,
                                surface_integral, dg::DG, cache)
-  @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
-  @unpack contravariant_vectors = cache.elements
-  @unpack fstar_upper_threaded, fstar_lower_threaded = cache
-  index_range = eachnode(dg)
-
-  @threaded for mortar in eachmpimortar(dg, cache)
-    # Choose thread-specific pre-allocated container
-    fstar = (fstar_lower_threaded[Threads.threadid()],
-             fstar_upper_threaded[Threads.threadid()])
-
-    # Get index information on the small elements
-    small_indices = node_indices[1, mortar]
-
-    i_small_start, i_small_step = index_to_start_step_2d(small_indices[1], index_range)
-    j_small_start, j_small_step = index_to_start_step_2d(small_indices[2], index_range)
-
-    for position in 1:2
-      i_small = i_small_start
-      j_small = j_small_start
-      for node in eachnode(dg)
-        # Get the normal direction on the small element.
-        normal_direction = get_normal_direction(cache.mpi_mortars, node, position, mortar)
+    @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
+    @unpack contravariant_vectors = cache.elements
+    @unpack fstar_upper_threaded, fstar_lower_threaded = cache
+    index_range = eachnode(dg)
+
+    @threaded for mortar in eachmpimortar(dg, cache)
+        # Choose thread-specific pre-allocated container
+        fstar = (fstar_lower_threaded[Threads.threadid()],
+                 fstar_upper_threaded[Threads.threadid()])
+
+        # Get index information on the small elements
+        small_indices = node_indices[1, mortar]
+
+        i_small_start, i_small_step = index_to_start_step_2d(small_indices[1],
+                                                             index_range)
+        j_small_start, j_small_step = index_to_start_step_2d(small_indices[2],
+                                                             index_range)
+
+        for position in 1:2
+            i_small = i_small_start
+            j_small = j_small_start
+            for node in eachnode(dg)
+                # Get the normal direction on the small element.
+                normal_direction = get_normal_direction(cache.mpi_mortars, node,
+                                                        position, mortar)
+
+                calc_mpi_mortar_flux!(fstar, mesh, nonconservative_terms, equations,
+                                      surface_integral, dg, cache,
+                                      mortar, position, normal_direction,
+                                      node)
+
+                i_small += i_small_step
+                j_small += j_small_step
+            end
+        end
 
-        calc_mpi_mortar_flux!(fstar, mesh, nonconservative_terms, equations,
-                              surface_integral, dg, cache,
-                              mortar, position, normal_direction,
-                              node)
+        # Buffer to interpolate flux values of the large element to before
+        # copying in the correct orientation
+        u_buffer = cache.u_threaded[Threads.threadid()]
 
-        i_small += i_small_step
-        j_small += j_small_step
-      end
+        mpi_mortar_fluxes_to_elements!(surface_flux_values,
+                                       mesh, equations, mortar_l2, dg, cache,
+                                       mortar, fstar, u_buffer)
     end
 
-    # Buffer to interpolate flux values of the large element to before
-    # copying in the correct orientation
-    u_buffer = cache.u_threaded[Threads.threadid()]
-
-    mpi_mortar_fluxes_to_elements!(surface_flux_values,
-                                   mesh, equations, mortar_l2, dg, cache,
-                                   mortar, fstar, u_buffer)
-  end
-
-  return nothing
+    return nothing
 end
 
-
 # Inlined version of the mortar flux computation on small elements for conservation laws
 @inline function calc_mpi_mortar_flux!(fstar,
                                        mesh::ParallelP4estMesh{2},
@@ -243,72 +258,75 @@ end
                                        surface_integral, dg::DG, cache,
                                        mortar_index, position_index, normal_direction,
                                        node_index)
-  @unpack u = cache.mpi_mortars
-  @unpack surface_flux = surface_integral
+    @unpack u = cache.mpi_mortars
+    @unpack surface_flux = surface_integral
 
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, node_index, mortar_index)
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, node_index,
+                                       mortar_index)
 
-  flux = surface_flux(u_ll, u_rr, normal_direction, equations)
+    flux = surface_flux(u_ll, u_rr, normal_direction, equations)
 
-  # Copy flux to buffer
-  set_node_vars!(fstar[position_index], flux, equations, dg, node_index)
+    # Copy flux to buffer
+    set_node_vars!(fstar[position_index], flux, equations, dg, node_index)
 end
 
-
 @inline function mpi_mortar_fluxes_to_elements!(surface_flux_values,
                                                 mesh::ParallelP4estMesh{2}, equations,
                                                 mortar_l2::LobattoLegendreMortarL2,
-                                                dg::DGSEM, cache, mortar, fstar, u_buffer)
-  @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
-
-  small_indices   = node_indices[1, mortar]
-  small_direction = indices2direction(small_indices)
-  large_indices   = node_indices[2, mortar]
-  large_direction = indices2direction(large_indices)
-
-  for (element, position) in zip(local_neighbor_ids[mortar], local_neighbor_positions[mortar])
-    if position == 3 # -> large element
-      # Project small fluxes to large element.
-      multiply_dimensionwise!(u_buffer,
-                              mortar_l2.reverse_upper, fstar[2],
-                              mortar_l2.reverse_lower, fstar[1])
-      # The flux is calculated in the outward direction of the small elements,
-      # so the sign must be switched to get the flux in outward direction
-      # of the large element.
-      # The contravariant vectors of the large element (and therefore the normal
-      # vectors of the large element as well) are twice as large as the
-      # contravariant vectors of the small elements. Therefore, the flux needs
-      # to be scaled by a factor of 2 to obtain the flux of the large element.
-      u_buffer .*= -2
-      # Copy interpolated flux values from buffer to large element face in the
-      # correct orientation.
-      # Note that the index of the small sides will always run forward but
-      # the index of the large side might need to run backwards for flipped sides.
-      if :i_backward in large_indices
-        for i in eachnode(dg)
-          for v in eachvariable(equations)
-            surface_flux_values[v, end + 1 - i, large_direction, element] = u_buffer[v, i]
-          end
-        end
-      else
-        for i in eachnode(dg)
-          for v in eachvariable(equations)
-            surface_flux_values[v, i, large_direction, element] = u_buffer[v, i]
-          end
-        end
-      end
-    else # position in (1, 2) -> small element
-      # Copy solution small to small
-      for i in eachnode(dg)
-        for v in eachvariable(equations)
-          surface_flux_values[v, i, small_direction, element] = fstar[position][v, i]
+                                                dg::DGSEM, cache, mortar, fstar,
+                                                u_buffer)
+    @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
+
+    small_indices = node_indices[1, mortar]
+    small_direction = indices2direction(small_indices)
+    large_indices = node_indices[2, mortar]
+    large_direction = indices2direction(large_indices)
+
+    for (element, position) in zip(local_neighbor_ids[mortar],
+                                   local_neighbor_positions[mortar])
+        if position == 3 # -> large element
+            # Project small fluxes to large element.
+            multiply_dimensionwise!(u_buffer,
+                                    mortar_l2.reverse_upper, fstar[2],
+                                    mortar_l2.reverse_lower, fstar[1])
+            # The flux is calculated in the outward direction of the small elements,
+            # so the sign must be switched to get the flux in outward direction
+            # of the large element.
+            # The contravariant vectors of the large element (and therefore the normal
+            # vectors of the large element as well) are twice as large as the
+            # contravariant vectors of the small elements. Therefore, the flux needs
+            # to be scaled by a factor of 2 to obtain the flux of the large element.
+            u_buffer .*= -2
+            # Copy interpolated flux values from buffer to large element face in the
+            # correct orientation.
+            # Note that the index of the small sides will always run forward but
+            # the index of the large side might need to run backwards for flipped sides.
+            if :i_backward in large_indices
+                for i in eachnode(dg)
+                    for v in eachvariable(equations)
+                        surface_flux_values[v, end + 1 - i, large_direction, element] = u_buffer[v,
+                                                                                                 i]
+                    end
+                end
+            else
+                for i in eachnode(dg)
+                    for v in eachvariable(equations)
+                        surface_flux_values[v, i, large_direction, element] = u_buffer[v,
+                                                                                       i]
+                    end
+                end
+            end
+        else # position in (1, 2) -> small element
+            # Copy solution small to small
+            for i in eachnode(dg)
+                for v in eachvariable(equations)
+                    surface_flux_values[v, i, small_direction, element] = fstar[position][v,
+                                                                                          i]
+                end
+            end
         end
-      end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
-end # muladd
\ No newline at end of file
+end # muladd
diff --git a/src/solvers/dgsem_p4est/dg_3d.jl b/src/solvers/dgsem_p4est/dg_3d.jl
index d5e32ca64e4..dc69329474f 100644
--- a/src/solvers/dgsem_p4est/dg_3d.jl
+++ b/src/solvers/dgsem_p4est/dg_3d.jl
@@ -3,24 +3,27 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # The methods below are specialized on the mortar type
 # and called from the basic `create_cache` method at the top.
-function create_cache(mesh::P4estMesh{3}, equations, mortar_l2::LobattoLegendreMortarL2, uEltype)
-  # TODO: Taal compare performance of different types
-  fstar_threaded = [Array{uEltype, 4}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2), 4)
-                    for _ in 1:Threads.nthreads()]
-
-  fstar_tmp_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2))
-                         for _ in 1:Threads.nthreads()]
-  u_threaded          = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2))
-                         for _ in 1:Threads.nthreads()]
-
-  (; fstar_threaded, fstar_tmp_threaded, u_threaded)
+function create_cache(mesh::P4estMesh{3}, equations, mortar_l2::LobattoLegendreMortarL2,
+                      uEltype)
+    # TODO: Taal compare performance of different types
+    fstar_threaded = [Array{uEltype, 4}(undef, nvariables(equations), nnodes(mortar_l2),
+                                        nnodes(mortar_l2), 4)
+                      for _ in 1:Threads.nthreads()]
+
+    fstar_tmp_threaded = [Array{uEltype, 3}(undef, nvariables(equations),
+                                            nnodes(mortar_l2), nnodes(mortar_l2))
+                          for _ in 1:Threads.nthreads()]
+    u_threaded = [Array{uEltype, 3}(undef, nvariables(equations), nnodes(mortar_l2),
+                                    nnodes(mortar_l2))
+                  for _ in 1:Threads.nthreads()]
+
+    (; fstar_threaded, fstar_tmp_threaded, u_threaded)
 end
 
-
 #     index_to_start_step_3d(index::Symbol, index_range)
 #
 # Given a symbolic `index` and an `indexrange` (usually `eachnode(dg)`),
@@ -51,179 +54,194 @@ end
 #       k_volume += k_volume_step_j
 #     end
 @inline function index_to_start_step_3d(index::Symbol, index_range)
-  index_begin = first(index_range)
-  index_end   = last(index_range)
-
-  if index === :begin
-    return index_begin, 0, 0
-  elseif index === :end
-    return index_end, 0, 0
-  elseif index === :i_forward
-    return index_begin, 1, index_begin - index_end - 1
-  elseif index === :i_backward
-    return index_end, -1, index_end + 1 - index_begin
-  elseif index === :j_forward
-    return index_begin, 0, 1
-  else # if index === :j_backward
-    return index_end, 0, -1
-  end
+    index_begin = first(index_range)
+    index_end = last(index_range)
+
+    if index === :begin
+        return index_begin, 0, 0
+    elseif index === :end
+        return index_end, 0, 0
+    elseif index === :i_forward
+        return index_begin, 1, index_begin - index_end - 1
+    elseif index === :i_backward
+        return index_end, -1, index_end + 1 - index_begin
+    elseif index === :j_forward
+        return index_begin, 0, 1
+    else # if index === :j_backward
+        return index_end, 0, -1
+    end
 end
 
 # Extract the two varying indices from a symbolic index tuple.
 # For example, `surface_indices((:i_forward, :end, :j_forward)) == (:i_forward, :j_forward)`.
 @inline function surface_indices(indices::NTuple{3, Symbol})
-  i1, i2, i3 = indices
-  index = i1
-  (index === :begin || index === :end) && return (i2, i3)
+    i1, i2, i3 = indices
+    index = i1
+    (index === :begin || index === :end) && return (i2, i3)
 
-  index = i2
-  (index === :begin || index === :end) && return (i1, i3)
+    index = i2
+    (index === :begin || index === :end) && return (i1, i3)
 
-  # i3 in (:begin, :end)
-  return (i1, i2)
+    # i3 in (:begin, :end)
+    return (i1, i2)
 end
 
 # We pass the `surface_integral` argument solely for dispatch
 function prolong2interfaces!(cache, u,
                              mesh::P4estMesh{3},
                              equations, surface_integral, dg::DG)
-  @unpack interfaces = cache
-  index_range = eachnode(dg)
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Copy solution data from the primary element using "delayed indexing" with
-    # a start value and two step sizes to get the correct face and orientation.
-    # Note that in the current implementation, the interface will be
-    # "aligned at the primary element", i.e., the indices of the primary side
-    # will always run forwards.
-    primary_element = interfaces.neighbor_ids[1, interface]
-    primary_indices = interfaces.node_indices[1, interface]
-
-    i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], index_range)
-    j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], index_range)
-    k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], index_range)
-
-    i_primary = i_primary_start
-    j_primary = j_primary_start
-    k_primary = k_primary_start
-    for j in eachnode(dg)
-      for i in eachnode(dg)
-        for v in eachvariable(equations)
-          interfaces.u[1, v, i, j, interface] = u[v, i_primary, j_primary, k_primary, primary_element]
+    @unpack interfaces = cache
+    index_range = eachnode(dg)
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Copy solution data from the primary element using "delayed indexing" with
+        # a start value and two step sizes to get the correct face and orientation.
+        # Note that in the current implementation, the interface will be
+        # "aligned at the primary element", i.e., the indices of the primary side
+        # will always run forwards.
+        primary_element = interfaces.neighbor_ids[1, interface]
+        primary_indices = interfaces.node_indices[1, interface]
+
+        i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1],
+                                                                                     index_range)
+        j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2],
+                                                                                     index_range)
+        k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3],
+                                                                                     index_range)
+
+        i_primary = i_primary_start
+        j_primary = j_primary_start
+        k_primary = k_primary_start
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                for v in eachvariable(equations)
+                    interfaces.u[1, v, i, j, interface] = u[v, i_primary, j_primary,
+                                                            k_primary, primary_element]
+                end
+                i_primary += i_primary_step_i
+                j_primary += j_primary_step_i
+                k_primary += k_primary_step_i
+            end
+            i_primary += i_primary_step_j
+            j_primary += j_primary_step_j
+            k_primary += k_primary_step_j
         end
-        i_primary += i_primary_step_i
-        j_primary += j_primary_step_i
-        k_primary += k_primary_step_i
-      end
-      i_primary += i_primary_step_j
-      j_primary += j_primary_step_j
-      k_primary += k_primary_step_j
-    end
-
-    # Copy solution data from the secondary element using "delayed indexing" with
-    # a start value and two step sizes to get the correct face and orientation.
-    secondary_element = interfaces.neighbor_ids[2, interface]
-    secondary_indices = interfaces.node_indices[2, interface]
-
-    i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1], index_range)
-    j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2], index_range)
-    k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3], index_range)
 
-    i_secondary = i_secondary_start
-    j_secondary = j_secondary_start
-    k_secondary = k_secondary_start
-    for j in eachnode(dg)
-      for i in eachnode(dg)
-        for v in eachvariable(equations)
-          interfaces.u[2, v, i, j, interface] = u[v, i_secondary, j_secondary, k_secondary, secondary_element]
+        # Copy solution data from the secondary element using "delayed indexing" with
+        # a start value and two step sizes to get the correct face and orientation.
+        secondary_element = interfaces.neighbor_ids[2, interface]
+        secondary_indices = interfaces.node_indices[2, interface]
+
+        i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1],
+                                                                                           index_range)
+        j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2],
+                                                                                           index_range)
+        k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3],
+                                                                                           index_range)
+
+        i_secondary = i_secondary_start
+        j_secondary = j_secondary_start
+        k_secondary = k_secondary_start
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                for v in eachvariable(equations)
+                    interfaces.u[2, v, i, j, interface] = u[v, i_secondary, j_secondary,
+                                                            k_secondary,
+                                                            secondary_element]
+                end
+                i_secondary += i_secondary_step_i
+                j_secondary += j_secondary_step_i
+                k_secondary += k_secondary_step_i
+            end
+            i_secondary += i_secondary_step_j
+            j_secondary += j_secondary_step_j
+            k_secondary += k_secondary_step_j
         end
-        i_secondary += i_secondary_step_i
-        j_secondary += j_secondary_step_i
-        k_secondary += k_secondary_step_i
-      end
-      i_secondary += i_secondary_step_j
-      j_secondary += j_secondary_step_j
-      k_secondary += k_secondary_step_j
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_interface_flux!(surface_flux_values,
                               mesh::P4estMesh{3},
                               nonconservative_terms,
                               equations, surface_integral, dg::DG, cache)
-  @unpack neighbor_ids, node_indices = cache.interfaces
-  @unpack contravariant_vectors = cache.elements
-  index_range = eachnode(dg)
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Get element and side information on the primary element
-    primary_element  = neighbor_ids[1, interface]
-    primary_indices  = node_indices[1, interface]
-    primary_direction = indices2direction(primary_indices)
-
-    i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1], index_range)
-    j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2], index_range)
-    k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3], index_range)
-
-    i_primary = i_primary_start
-    j_primary = j_primary_start
-    k_primary = k_primary_start
-
-    # Get element and side information on the secondary element
-    secondary_element = neighbor_ids[2, interface]
-    secondary_indices = node_indices[2, interface]
-    secondary_direction = indices2direction(secondary_indices)
-    secondary_surface_indices = surface_indices(secondary_indices)
-
-    # Get the surface indexing on the secondary element.
-    # Note that the indices of the primary side will always run forward but
-    # the secondary indices might need to run backwards for flipped sides.
-    i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1], index_range)
-    j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2], index_range)
-    i_secondary = i_secondary_start
-    j_secondary = j_secondary_start
-
-    for j in eachnode(dg)
-      for i in eachnode(dg)
-        # Get the normal direction from the primary element.
-        # Note, contravariant vectors at interfaces in negative coordinate direction
-        # are pointing inwards. This is handled by `get_normal_direction`.
-        normal_direction = get_normal_direction(primary_direction, contravariant_vectors,
-                                                i_primary, j_primary, k_primary,
-                                                primary_element)
-
-        calc_interface_flux!(surface_flux_values, mesh, nonconservative_terms, equations,
-                             surface_integral, dg, cache,
-                             interface, normal_direction,
-                             i, j, primary_direction, primary_element,
-                             i_secondary, j_secondary, secondary_direction, secondary_element)
-
-        # Increment the primary element indices
-        i_primary += i_primary_step_i
-        j_primary += j_primary_step_i
-        k_primary += k_primary_step_i
-        # Increment the secondary element surface indices
-        i_secondary += i_secondary_step_i
-        j_secondary += j_secondary_step_i
-      end
-      # Increment the primary element indices
-      i_primary += i_primary_step_j
-      j_primary += j_primary_step_j
-      k_primary += k_primary_step_j
-      # Increment the secondary element surface indices
-      i_secondary += i_secondary_step_j
-      j_secondary += j_secondary_step_j
+    @unpack neighbor_ids, node_indices = cache.interfaces
+    @unpack contravariant_vectors = cache.elements
+    index_range = eachnode(dg)
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Get element and side information on the primary element
+        primary_element = neighbor_ids[1, interface]
+        primary_indices = node_indices[1, interface]
+        primary_direction = indices2direction(primary_indices)
+
+        i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1],
+                                                                                     index_range)
+        j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2],
+                                                                                     index_range)
+        k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3],
+                                                                                     index_range)
+
+        i_primary = i_primary_start
+        j_primary = j_primary_start
+        k_primary = k_primary_start
+
+        # Get element and side information on the secondary element
+        secondary_element = neighbor_ids[2, interface]
+        secondary_indices = node_indices[2, interface]
+        secondary_direction = indices2direction(secondary_indices)
+        secondary_surface_indices = surface_indices(secondary_indices)
+
+        # Get the surface indexing on the secondary element.
+        # Note that the indices of the primary side will always run forward but
+        # the secondary indices might need to run backwards for flipped sides.
+        i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1],
+                                                                                           index_range)
+        j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2],
+                                                                                           index_range)
+        i_secondary = i_secondary_start
+        j_secondary = j_secondary_start
+
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                # Get the normal direction from the primary element.
+                # Note, contravariant vectors at interfaces in negative coordinate direction
+                # are pointing inwards. This is handled by `get_normal_direction`.
+                normal_direction = get_normal_direction(primary_direction,
+                                                        contravariant_vectors,
+                                                        i_primary, j_primary, k_primary,
+                                                        primary_element)
+
+                calc_interface_flux!(surface_flux_values, mesh, nonconservative_terms,
+                                     equations,
+                                     surface_integral, dg, cache,
+                                     interface, normal_direction,
+                                     i, j, primary_direction, primary_element,
+                                     i_secondary, j_secondary, secondary_direction,
+                                     secondary_element)
+
+                # Increment the primary element indices
+                i_primary += i_primary_step_i
+                j_primary += j_primary_step_i
+                k_primary += k_primary_step_i
+                # Increment the secondary element surface indices
+                i_secondary += i_secondary_step_i
+                j_secondary += j_secondary_step_i
+            end
+            # Increment the primary element indices
+            i_primary += i_primary_step_j
+            j_primary += j_primary_step_j
+            k_primary += k_primary_step_j
+            # Increment the secondary element surface indices
+            i_secondary += i_secondary_step_j
+            j_secondary += j_secondary_step_j
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Inlined function for interface flux computation for conservative flux terms
 @inline function calc_interface_flux!(surface_flux_values,
                                       mesh::P4estMesh{3},
@@ -233,20 +251,22 @@ end
                                       primary_i_node_index, primary_j_node_index,
                                       primary_direction_index, primary_element_index,
                                       secondary_i_node_index, secondary_j_node_index,
-                                      secondary_direction_index, secondary_element_index)
-  @unpack u = cache.interfaces
-  @unpack surface_flux = surface_integral
+                                      secondary_direction_index,
+                                      secondary_element_index)
+    @unpack u = cache.interfaces
+    @unpack surface_flux = surface_integral
 
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, primary_j_node_index, interface_index)
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index,
+                                       primary_j_node_index, interface_index)
 
-  flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
+    flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
 
-  for v in eachvariable(equations)
-    surface_flux_values[v, primary_i_node_index, primary_j_node_index,
-                        primary_direction_index, primary_element_index] = flux_[v]
-    surface_flux_values[v, secondary_i_node_index, secondary_j_node_index,
-                        secondary_direction_index, secondary_element_index] = -flux_[v]
-  end
+    for v in eachvariable(equations)
+        surface_flux_values[v, primary_i_node_index, primary_j_node_index,
+        primary_direction_index, primary_element_index] = flux_[v]
+        surface_flux_values[v, secondary_i_node_index, secondary_j_node_index,
+        secondary_direction_index, secondary_element_index] = -flux_[v]
+    end
 end
 
 # Inlined function for interface flux computation for flux + nonconservative terms
@@ -258,289 +278,314 @@ end
                                       primary_i_node_index, primary_j_node_index,
                                       primary_direction_index, primary_element_index,
                                       secondary_i_node_index, secondary_j_node_index,
-                                      secondary_direction_index, secondary_element_index)
-  @unpack u = cache.interfaces
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index, primary_j_node_index, interface_index)
-
-  flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
-
-  # Compute both nonconservative fluxes
-  # In general, nonconservative fluxes can depend on both the contravariant
-  # vectors (normal direction) at the current node and the averaged ones.
-  # However, both are the same at watertight interfaces, so we pass the
-  # `normal_direction` twice.
-  noncons_primary   = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations)
-  noncons_secondary = nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations)
-
-  # Store the flux with nonconservative terms on the primary and secondary elements
-  for v in eachvariable(equations)
-    # Note the factor 0.5 necessary for the nonconservative fluxes based on
-    # the interpretation of global SBP operators coupled discontinuously via
-    # central fluxes/SATs
-    surface_flux_values[v, primary_i_node_index, primary_j_node_index,
-                        primary_direction_index, primary_element_index] = flux_[v] + 0.5 * noncons_primary[v]
-    surface_flux_values[v, secondary_i_node_index, secondary_j_node_index,
-                        secondary_direction_index, secondary_element_index] = -(flux_[v] + 0.5 * noncons_secondary[v])
-  end
+                                      secondary_direction_index,
+                                      secondary_element_index)
+    @unpack u = cache.interfaces
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index,
+                                       primary_j_node_index, interface_index)
+
+    flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
+
+    # Compute both nonconservative fluxes
+    # In general, nonconservative fluxes can depend on both the contravariant
+    # vectors (normal direction) at the current node and the averaged ones.
+    # However, both are the same at watertight interfaces, so we pass the
+    # `normal_direction` twice.
+    noncons_primary = nonconservative_flux(u_ll, u_rr, normal_direction,
+                                           normal_direction, equations)
+    noncons_secondary = nonconservative_flux(u_rr, u_ll, normal_direction,
+                                             normal_direction, equations)
+
+    # Store the flux with nonconservative terms on the primary and secondary elements
+    for v in eachvariable(equations)
+        # Note the factor 0.5 necessary for the nonconservative fluxes based on
+        # the interpretation of global SBP operators coupled discontinuously via
+        # central fluxes/SATs
+        surface_flux_values[v, primary_i_node_index, primary_j_node_index,
+        primary_direction_index, primary_element_index] = flux_[v] +
+                                                          0.5 * noncons_primary[v]
+        surface_flux_values[v, secondary_i_node_index, secondary_j_node_index,
+        secondary_direction_index, secondary_element_index] = -(flux_[v] +
+                                                                0.5 *
+                                                                noncons_secondary[v])
+    end
 end
 
-
 function prolong2boundaries!(cache, u,
                              mesh::P4estMesh{3},
                              equations, surface_integral, dg::DG)
-  @unpack boundaries = cache
-  index_range = eachnode(dg)
-
-  @threaded for boundary in eachboundary(dg, cache)
-    # Copy solution data from the element using "delayed indexing" with
-    # a start value and two step sizes to get the correct face and orientation.
-    element      = boundaries.neighbor_ids[boundary]
-    node_indices = boundaries.node_indices[boundary]
-
-    i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1], index_range)
-    j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2], index_range)
-    k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3], index_range)
-
-    i_node = i_node_start
-    j_node = j_node_start
-    k_node = k_node_start
-    for j in eachnode(dg)
-      for i in eachnode(dg)
-        for v in eachvariable(equations)
-          boundaries.u[v, i, j, boundary] = u[v, i_node, j_node, k_node, element]
+    @unpack boundaries = cache
+    index_range = eachnode(dg)
+
+    @threaded for boundary in eachboundary(dg, cache)
+        # Copy solution data from the element using "delayed indexing" with
+        # a start value and two step sizes to get the correct face and orientation.
+        element = boundaries.neighbor_ids[boundary]
+        node_indices = boundaries.node_indices[boundary]
+
+        i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1],
+                                                                            index_range)
+        j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2],
+                                                                            index_range)
+        k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3],
+                                                                            index_range)
+
+        i_node = i_node_start
+        j_node = j_node_start
+        k_node = k_node_start
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                for v in eachvariable(equations)
+                    boundaries.u[v, i, j, boundary] = u[v, i_node, j_node, k_node,
+                                                        element]
+                end
+                i_node += i_node_step_i
+                j_node += j_node_step_i
+                k_node += k_node_step_i
+            end
+            i_node += i_node_step_j
+            j_node += j_node_step_j
+            k_node += k_node_step_j
         end
-        i_node += i_node_step_i
-        j_node += j_node_step_i
-        k_node += k_node_step_i
-      end
-      i_node += i_node_step_j
-      j_node += j_node_step_j
-      k_node += k_node_step_j
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing,
                              mesh::P4estMesh{3},
                              equations, surface_integral, dg::DG)
-  @unpack boundaries = cache
-  @unpack surface_flux_values, node_coordinates, contravariant_vectors = cache.elements
-  @unpack surface_flux = surface_integral
-  index_range = eachnode(dg)
-
-  @threaded for local_index in eachindex(boundary_indexing)
-    # Use the local index to get the global boundary index from the
-    # pre-sorted list
-    boundary = boundary_indexing[local_index]
-
-    # Get information on the adjacent element, compute the surface fluxes,
-    # and store them
-    element       = boundaries.neighbor_ids[boundary]
-    node_indices  = boundaries.node_indices[boundary]
-    direction     = indices2direction(node_indices)
-
-    i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1], index_range)
-    j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2], index_range)
-    k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3], index_range)
-
-    i_node = i_node_start
-    j_node = j_node_start
-    k_node = k_node_start
-    for j in eachnode(dg)
-      for i in eachnode(dg)
-        # Extract solution data from boundary container
-        u_inner = get_node_vars(boundaries.u, equations, dg, i, j, boundary)
-
-        # Outward-pointing normal direction (not normalized)
-        normal_direction = get_normal_direction(direction, contravariant_vectors,
-                                                i_node, j_node, k_node, element)
-
-        # Coordinates at boundary node
-        x = get_node_coords(node_coordinates, equations, dg,
-                            i_node, j_node, k_node, element)
-
-        flux_ = boundary_condition(u_inner, normal_direction, x, t, surface_flux, equations)
-
-        # Copy flux to element storage in the correct orientation
-        for v in eachvariable(equations)
-          surface_flux_values[v, i, j, direction, element] = flux_[v]
+    @unpack boundaries = cache
+    @unpack surface_flux_values, node_coordinates, contravariant_vectors = cache.elements
+    @unpack surface_flux = surface_integral
+    index_range = eachnode(dg)
+
+    @threaded for local_index in eachindex(boundary_indexing)
+        # Use the local index to get the global boundary index from the
+        # pre-sorted list
+        boundary = boundary_indexing[local_index]
+
+        # Get information on the adjacent element, compute the surface fluxes,
+        # and store them
+        element = boundaries.neighbor_ids[boundary]
+        node_indices = boundaries.node_indices[boundary]
+        direction = indices2direction(node_indices)
+
+        i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1],
+                                                                            index_range)
+        j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2],
+                                                                            index_range)
+        k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3],
+                                                                            index_range)
+
+        i_node = i_node_start
+        j_node = j_node_start
+        k_node = k_node_start
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                # Extract solution data from boundary container
+                u_inner = get_node_vars(boundaries.u, equations, dg, i, j, boundary)
+
+                # Outward-pointing normal direction (not normalized)
+                normal_direction = get_normal_direction(direction,
+                                                        contravariant_vectors,
+                                                        i_node, j_node, k_node, element)
+
+                # Coordinates at boundary node
+                x = get_node_coords(node_coordinates, equations, dg,
+                                    i_node, j_node, k_node, element)
+
+                flux_ = boundary_condition(u_inner, normal_direction, x, t,
+                                           surface_flux, equations)
+
+                # Copy flux to element storage in the correct orientation
+                for v in eachvariable(equations)
+                    surface_flux_values[v, i, j, direction, element] = flux_[v]
+                end
+
+                i_node += i_node_step_i
+                j_node += j_node_step_i
+                k_node += k_node_step_i
+            end
+            i_node += i_node_step_j
+            j_node += j_node_step_j
+            k_node += k_node_step_j
         end
-
-        i_node += i_node_step_i
-        j_node += j_node_step_i
-        k_node += k_node_step_i
-      end
-      i_node += i_node_step_j
-      j_node += j_node_step_j
-      k_node += k_node_step_j
     end
-  end
 end
 
-
 function prolong2mortars!(cache, u,
                           mesh::P4estMesh{3}, equations,
                           mortar_l2::LobattoLegendreMortarL2,
                           surface_integral, dg::DGSEM)
-  @unpack fstar_tmp_threaded = cache
-  @unpack neighbor_ids, node_indices = cache.mortars
-  index_range = eachnode(dg)
-
-  @threaded for mortar in eachmortar(dg, cache)
-    # Copy solution data from the small elements using "delayed indexing" with
-    # a start value and two step sizes to get the correct face and orientation.
-    small_indices = node_indices[1, mortar]
-
-    i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], index_range)
-    j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], index_range)
-    k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], index_range)
-
-    for position in 1:4
-      i_small = i_small_start
-      j_small = j_small_start
-      k_small = k_small_start
-      element = neighbor_ids[position, mortar]
-      for j in eachnode(dg)
-        for i in eachnode(dg)
-          for v in eachvariable(equations)
-            cache.mortars.u[1, v, position, i, j, mortar] = u[v, i_small, j_small, k_small, element]
-          end
-          i_small += i_small_step_i
-          j_small += j_small_step_i
-          k_small += k_small_step_i
+    @unpack fstar_tmp_threaded = cache
+    @unpack neighbor_ids, node_indices = cache.mortars
+    index_range = eachnode(dg)
+
+    @threaded for mortar in eachmortar(dg, cache)
+        # Copy solution data from the small elements using "delayed indexing" with
+        # a start value and two step sizes to get the correct face and orientation.
+        small_indices = node_indices[1, mortar]
+
+        i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1],
+                                                                               index_range)
+        j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2],
+                                                                               index_range)
+        k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3],
+                                                                               index_range)
+
+        for position in 1:4
+            i_small = i_small_start
+            j_small = j_small_start
+            k_small = k_small_start
+            element = neighbor_ids[position, mortar]
+            for j in eachnode(dg)
+                for i in eachnode(dg)
+                    for v in eachvariable(equations)
+                        cache.mortars.u[1, v, position, i, j, mortar] = u[v, i_small,
+                                                                          j_small,
+                                                                          k_small,
+                                                                          element]
+                    end
+                    i_small += i_small_step_i
+                    j_small += j_small_step_i
+                    k_small += k_small_step_i
+                end
+                i_small += i_small_step_j
+                j_small += j_small_step_j
+                k_small += k_small_step_j
+            end
         end
-        i_small += i_small_step_j
-        j_small += j_small_step_j
-        k_small += k_small_step_j
-      end
-    end
-
-
-    # Buffer to copy solution values of the large element in the correct orientation
-    # before interpolating
-    u_buffer = cache.u_threaded[Threads.threadid()]
-    # temporary buffer for projections
-    fstar_tmp = fstar_tmp_threaded[Threads.threadid()]
-
-    # Copy solution of large element face to buffer in the
-    # correct orientation
-    large_indices = node_indices[2, mortar]
 
-    i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_indices[1], index_range)
-    j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_indices[2], index_range)
-    k_large_start, k_large_step_i, k_large_step_j = index_to_start_step_3d(large_indices[3], index_range)
-
-    i_large = i_large_start
-    j_large = j_large_start
-    k_large = k_large_start
-    element = neighbor_ids[5, mortar]
-    for j in eachnode(dg)
-      for i in eachnode(dg)
-        for v in eachvariable(equations)
-          u_buffer[v, i, j] = u[v, i_large, j_large, k_large, element]
+        # Buffer to copy solution values of the large element in the correct orientation
+        # before interpolating
+        u_buffer = cache.u_threaded[Threads.threadid()]
+        # temporary buffer for projections
+        fstar_tmp = fstar_tmp_threaded[Threads.threadid()]
+
+        # Copy solution of large element face to buffer in the
+        # correct orientation
+        large_indices = node_indices[2, mortar]
+
+        i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_indices[1],
+                                                                               index_range)
+        j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_indices[2],
+                                                                               index_range)
+        k_large_start, k_large_step_i, k_large_step_j = index_to_start_step_3d(large_indices[3],
+                                                                               index_range)
+
+        i_large = i_large_start
+        j_large = j_large_start
+        k_large = k_large_start
+        element = neighbor_ids[5, mortar]
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                for v in eachvariable(equations)
+                    u_buffer[v, i, j] = u[v, i_large, j_large, k_large, element]
+                end
+                i_large += i_large_step_i
+                j_large += j_large_step_i
+                k_large += k_large_step_i
+            end
+            i_large += i_large_step_j
+            j_large += j_large_step_j
+            k_large += k_large_step_j
         end
-        i_large += i_large_step_i
-        j_large += j_large_step_i
-        k_large += k_large_step_i
-      end
-      i_large += i_large_step_j
-      j_large += j_large_step_j
-      k_large += k_large_step_j
-    end
 
-    # Interpolate large element face data from buffer to small face locations
-    multiply_dimensionwise!(view(cache.mortars.u, 2, :, 1, :, :, mortar),
-                            mortar_l2.forward_lower,
-                            mortar_l2.forward_lower,
-                            u_buffer,
-                            fstar_tmp)
-    multiply_dimensionwise!(view(cache.mortars.u, 2, :, 2, :, :, mortar),
-                            mortar_l2.forward_upper,
-                            mortar_l2.forward_lower,
-                            u_buffer,
-                            fstar_tmp)
-    multiply_dimensionwise!(view(cache.mortars.u, 2, :, 3, :, :, mortar),
-                            mortar_l2.forward_lower,
-                            mortar_l2.forward_upper,
-                            u_buffer,
-                            fstar_tmp)
-    multiply_dimensionwise!(view(cache.mortars.u, 2, :, 4, :, :, mortar),
-                            mortar_l2.forward_upper,
-                            mortar_l2.forward_upper,
-                            u_buffer,
-                            fstar_tmp)
-  end
+        # Interpolate large element face data from buffer to small face locations
+        multiply_dimensionwise!(view(cache.mortars.u, 2, :, 1, :, :, mortar),
+                                mortar_l2.forward_lower,
+                                mortar_l2.forward_lower,
+                                u_buffer,
+                                fstar_tmp)
+        multiply_dimensionwise!(view(cache.mortars.u, 2, :, 2, :, :, mortar),
+                                mortar_l2.forward_upper,
+                                mortar_l2.forward_lower,
+                                u_buffer,
+                                fstar_tmp)
+        multiply_dimensionwise!(view(cache.mortars.u, 2, :, 3, :, :, mortar),
+                                mortar_l2.forward_lower,
+                                mortar_l2.forward_upper,
+                                u_buffer,
+                                fstar_tmp)
+        multiply_dimensionwise!(view(cache.mortars.u, 2, :, 4, :, :, mortar),
+                                mortar_l2.forward_upper,
+                                mortar_l2.forward_upper,
+                                u_buffer,
+                                fstar_tmp)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_mortar_flux!(surface_flux_values,
                            mesh::P4estMesh{3},
                            nonconservative_terms, equations,
                            mortar_l2::LobattoLegendreMortarL2,
                            surface_integral, dg::DG, cache)
-  @unpack neighbor_ids, node_indices = cache.mortars
-  @unpack contravariant_vectors = cache.elements
-  @unpack fstar_threaded, fstar_tmp_threaded = cache
-  index_range = eachnode(dg)
-
-  @threaded for mortar in eachmortar(dg, cache)
-    # Choose thread-specific pre-allocated container
-    fstar = fstar_threaded[Threads.threadid()]
-    fstar_tmp = fstar_tmp_threaded[Threads.threadid()]
-
-    # Get index information on the small elements
-    small_indices = node_indices[1, mortar]
-    small_direction = indices2direction(small_indices)
-
-    i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], index_range)
-    j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], index_range)
-    k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], index_range)
-
-    for position in 1:4
-      i_small = i_small_start
-      j_small = j_small_start
-      k_small = k_small_start
-      element = neighbor_ids[position, mortar]
-      for j in eachnode(dg)
-        for i in eachnode(dg)
-          # Get the normal direction on the small element.
-          # Note, contravariant vectors at interfaces in negative coordinate direction
-          # are pointing inwards. This is handled by `get_normal_direction`.
-          normal_direction = get_normal_direction(small_direction, contravariant_vectors,
-                                                  i_small, j_small, k_small, element)
-
-          calc_mortar_flux!(fstar, mesh, nonconservative_terms, equations,
-                            surface_integral, dg, cache,
-                            mortar, position, normal_direction,
-                            i, j)
-
-          i_small += i_small_step_i
-          j_small += j_small_step_i
-          k_small += k_small_step_i
+    @unpack neighbor_ids, node_indices = cache.mortars
+    @unpack contravariant_vectors = cache.elements
+    @unpack fstar_threaded, fstar_tmp_threaded = cache
+    index_range = eachnode(dg)
+
+    @threaded for mortar in eachmortar(dg, cache)
+        # Choose thread-specific pre-allocated container
+        fstar = fstar_threaded[Threads.threadid()]
+        fstar_tmp = fstar_tmp_threaded[Threads.threadid()]
+
+        # Get index information on the small elements
+        small_indices = node_indices[1, mortar]
+        small_direction = indices2direction(small_indices)
+
+        i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1],
+                                                                               index_range)
+        j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2],
+                                                                               index_range)
+        k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3],
+                                                                               index_range)
+
+        for position in 1:4
+            i_small = i_small_start
+            j_small = j_small_start
+            k_small = k_small_start
+            element = neighbor_ids[position, mortar]
+            for j in eachnode(dg)
+                for i in eachnode(dg)
+                    # Get the normal direction on the small element.
+                    # Note, contravariant vectors at interfaces in negative coordinate direction
+                    # are pointing inwards. This is handled by `get_normal_direction`.
+                    normal_direction = get_normal_direction(small_direction,
+                                                            contravariant_vectors,
+                                                            i_small, j_small, k_small,
+                                                            element)
+
+                    calc_mortar_flux!(fstar, mesh, nonconservative_terms, equations,
+                                      surface_integral, dg, cache,
+                                      mortar, position, normal_direction,
+                                      i, j)
+
+                    i_small += i_small_step_i
+                    j_small += j_small_step_i
+                    k_small += k_small_step_i
+                end
+                i_small += i_small_step_j
+                j_small += j_small_step_j
+                k_small += k_small_step_j
+            end
         end
-        i_small += i_small_step_j
-        j_small += j_small_step_j
-        k_small += k_small_step_j
-      end
-    end
 
-    # Buffer to interpolate flux values of the large element to before
-    # copying in the correct orientation
-    u_buffer = cache.u_threaded[Threads.threadid()]
+        # Buffer to interpolate flux values of the large element to before
+        # copying in the correct orientation
+        u_buffer = cache.u_threaded[Threads.threadid()]
 
-    mortar_fluxes_to_elements!(surface_flux_values,
-                               mesh, equations, mortar_l2, dg, cache,
-                               mortar, fstar, u_buffer, fstar_tmp)
-  end
+        mortar_fluxes_to_elements!(surface_flux_values,
+                                   mesh, equations, mortar_l2, dg, cache,
+                                   mortar, fstar, u_buffer, fstar_tmp)
+    end
 
-  return nothing
+    return nothing
 end
 
 # Inlined version of the mortar flux computation on small elements for conservation fluxes
@@ -550,15 +595,17 @@ end
                                    surface_integral, dg::DG, cache,
                                    mortar_index, position_index, normal_direction,
                                    i_node_index, j_node_index)
-  @unpack u = cache.mortars
-  @unpack surface_flux = surface_integral
+    @unpack u = cache.mortars
+    @unpack surface_flux = surface_integral
 
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index, j_node_index, mortar_index)
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index,
+                                       j_node_index, mortar_index)
 
-  flux = surface_flux(u_ll, u_rr, normal_direction, equations)
+    flux = surface_flux(u_ll, u_rr, normal_direction, equations)
 
-  # Copy flux to buffer
-  set_node_vars!(fstar, flux, equations, dg, i_node_index, j_node_index, position_index)
+    # Copy flux to buffer
+    set_node_vars!(fstar, flux, equations, dg, i_node_index, j_node_index,
+                   position_index)
 end
 
 # Inlined version of the mortar flux computation on small elements for conservation fluxes
@@ -569,154 +616,164 @@ end
                                    surface_integral, dg::DG, cache,
                                    mortar_index, position_index, normal_direction,
                                    i_node_index, j_node_index)
-  @unpack u = cache.mortars
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack u = cache.mortars
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
 
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index, j_node_index, mortar_index)
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index,
+                                       j_node_index, mortar_index)
 
-  # Compute conservative flux
-  flux = surface_flux(u_ll, u_rr, normal_direction, equations)
+    # Compute conservative flux
+    flux = surface_flux(u_ll, u_rr, normal_direction, equations)
 
-  # Compute nonconservative flux and add it to the flux scaled by a factor of 0.5 based on
-  # the interpretation of global SBP operators coupled discontinuously via
-  # central fluxes/SATs
-  noncons = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations)
-  flux_plus_noncons = flux + 0.5 * noncons
+    # Compute nonconservative flux and add it to the flux scaled by a factor of 0.5 based on
+    # the interpretation of global SBP operators coupled discontinuously via
+    # central fluxes/SATs
+    noncons = nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction,
+                                   equations)
+    flux_plus_noncons = flux + 0.5 * noncons
 
-  # Copy to buffer
-  set_node_vars!(fstar, flux_plus_noncons, equations, dg, i_node_index, j_node_index, position_index)
+    # Copy to buffer
+    set_node_vars!(fstar, flux_plus_noncons, equations, dg, i_node_index, j_node_index,
+                   position_index)
 end
 
-
 @inline function mortar_fluxes_to_elements!(surface_flux_values,
                                             mesh::P4estMesh{3}, equations,
                                             mortar_l2::LobattoLegendreMortarL2,
-                                            dg::DGSEM, cache, mortar, fstar, u_buffer, fstar_tmp)
-  @unpack neighbor_ids, node_indices = cache.mortars
-  index_range = eachnode(dg)
-
-  # Copy solution small to small
-  small_indices   = node_indices[1, mortar]
-  small_direction = indices2direction(small_indices)
-
-  for position in 1:4
-    element = neighbor_ids[position, mortar]
-    for j in eachnode(dg), i in eachnode(dg)
-      for v in eachvariable(equations)
-        surface_flux_values[v, i, j, small_direction, element] = fstar[v, i, j, position]
-      end
+                                            dg::DGSEM, cache, mortar, fstar, u_buffer,
+                                            fstar_tmp)
+    @unpack neighbor_ids, node_indices = cache.mortars
+    index_range = eachnode(dg)
+
+    # Copy solution small to small
+    small_indices = node_indices[1, mortar]
+    small_direction = indices2direction(small_indices)
+
+    for position in 1:4
+        element = neighbor_ids[position, mortar]
+        for j in eachnode(dg), i in eachnode(dg)
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, j, small_direction, element] = fstar[v, i, j,
+                                                                               position]
+            end
+        end
     end
-  end
-
-  # Project small fluxes to large element.
-  multiply_dimensionwise!(
-    u_buffer,
-    mortar_l2.reverse_lower, mortar_l2.reverse_lower,
-    view(fstar, .., 1),
-    fstar_tmp)
-  add_multiply_dimensionwise!(
-    u_buffer,
-    mortar_l2.reverse_upper, mortar_l2.reverse_lower,
-    view(fstar, .., 2),
-    fstar_tmp)
-  add_multiply_dimensionwise!(
-    u_buffer,
-    mortar_l2.reverse_lower, mortar_l2.reverse_upper,
-    view(fstar, .., 3),
-    fstar_tmp)
-  add_multiply_dimensionwise!(
-    u_buffer,
-    mortar_l2.reverse_upper, mortar_l2.reverse_upper,
-    view(fstar, .., 4),
-    fstar_tmp)
-
-  # The flux is calculated in the outward direction of the small elements,
-  # so the sign must be switched to get the flux in outward direction
-  # of the large element.
-  # The contravariant vectors of the large element (and therefore the normal
-  # vectors of the large element as well) are four times as large as the
-  # contravariant vectors of the small elements. Therefore, the flux needs
-  # to be scaled by a factor of 4 to obtain the flux of the large element.
-  u_buffer .*= -4
-
-  # Copy interpolated flux values from buffer to large element face in the
-  # correct orientation.
-  # Note that the index of the small sides will always run forward but
-  # the index of the large side might need to run backwards for flipped sides.
-  large_element = neighbor_ids[5, mortar]
-  large_indices  = node_indices[2, mortar]
-  large_direction = indices2direction(large_indices)
-  large_surface_indices = surface_indices(large_indices)
-
-  i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_surface_indices[1], index_range)
-  j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_surface_indices[2], index_range)
-
-  # Note that the indices of the small sides will always run forward but
-  # the large indices might need to run backwards for flipped sides.
-  i_large = i_large_start
-  j_large = j_large_start
-  for j in eachnode(dg)
-    for i in eachnode(dg)
-      for v in eachvariable(equations)
-        surface_flux_values[v, i_large, j_large, large_direction, large_element] = u_buffer[v, i, j]
-      end
-      i_large += i_large_step_i
-      j_large += j_large_step_i
+
+    # Project small fluxes to large element.
+    multiply_dimensionwise!(u_buffer,
+                            mortar_l2.reverse_lower, mortar_l2.reverse_lower,
+                            view(fstar, .., 1),
+                            fstar_tmp)
+    add_multiply_dimensionwise!(u_buffer,
+                                mortar_l2.reverse_upper, mortar_l2.reverse_lower,
+                                view(fstar, .., 2),
+                                fstar_tmp)
+    add_multiply_dimensionwise!(u_buffer,
+                                mortar_l2.reverse_lower, mortar_l2.reverse_upper,
+                                view(fstar, .., 3),
+                                fstar_tmp)
+    add_multiply_dimensionwise!(u_buffer,
+                                mortar_l2.reverse_upper, mortar_l2.reverse_upper,
+                                view(fstar, .., 4),
+                                fstar_tmp)
+
+    # The flux is calculated in the outward direction of the small elements,
+    # so the sign must be switched to get the flux in outward direction
+    # of the large element.
+    # The contravariant vectors of the large element (and therefore the normal
+    # vectors of the large element as well) are four times as large as the
+    # contravariant vectors of the small elements. Therefore, the flux needs
+    # to be scaled by a factor of 4 to obtain the flux of the large element.
+    u_buffer .*= -4
+
+    # Copy interpolated flux values from buffer to large element face in the
+    # correct orientation.
+    # Note that the index of the small sides will always run forward but
+    # the index of the large side might need to run backwards for flipped sides.
+    large_element = neighbor_ids[5, mortar]
+    large_indices = node_indices[2, mortar]
+    large_direction = indices2direction(large_indices)
+    large_surface_indices = surface_indices(large_indices)
+
+    i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_surface_indices[1],
+                                                                           index_range)
+    j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_surface_indices[2],
+                                                                           index_range)
+
+    # Note that the indices of the small sides will always run forward but
+    # the large indices might need to run backwards for flipped sides.
+    i_large = i_large_start
+    j_large = j_large_start
+    for j in eachnode(dg)
+        for i in eachnode(dg)
+            for v in eachvariable(equations)
+                surface_flux_values[v, i_large, j_large, large_direction, large_element] = u_buffer[v,
+                                                                                                    i,
+                                                                                                    j]
+            end
+            i_large += i_large_step_i
+            j_large += j_large_step_i
+        end
+        i_large += i_large_step_j
+        j_large += j_large_step_j
     end
-    i_large += i_large_step_j
-    j_large += j_large_step_j
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_surface_integral!(du, u,
                                 mesh::P4estMesh{3},
                                 equations,
                                 surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGSEM, cache)
-  @unpack boundary_interpolation = dg.basis
-  @unpack surface_flux_values = cache.elements
-
-  # Note that all fluxes have been computed with outward-pointing normal vectors.
-  # Access the factors only once before beginning the loop to increase performance.
-  # We also use explicit assignments instead of `+=` to let `@muladd` turn these
-  # into FMAs (see comment at the top of the file).
-  factor_1 = boundary_interpolation[1,          1]
-  factor_2 = boundary_interpolation[nnodes(dg), 2]
-  @threaded for element in eachelement(dg, cache)
-    for m in eachnode(dg), l in eachnode(dg)
-      for v in eachvariable(equations)
-        # surface at -x
-        du[v, 1,          l, m, element] = (
-          du[v, 1,          l, m, element] + surface_flux_values[v, l, m, 1, element] * factor_1)
-
-        # surface at +x
-        du[v, nnodes(dg), l, m, element] = (
-          du[v, nnodes(dg), l, m, element] + surface_flux_values[v, l, m, 2, element] * factor_2)
-
-        # surface at -y
-        du[v, l, 1,          m, element] = (
-          du[v, l, 1,          m, element] + surface_flux_values[v, l, m, 3, element] * factor_1)
-
-        # surface at +y
-        du[v, l, nnodes(dg), m, element] = (
-          du[v, l, nnodes(dg), m, element] + surface_flux_values[v, l, m, 4, element] * factor_2)
-
-        # surface at -z
-        du[v, l, m, 1,          element] = (
-          du[v, l, m, 1,          element] + surface_flux_values[v, l, m, 5, element] * factor_1)
-
-        # surface at +z
-        du[v, l, m, nnodes(dg), element] = (
-          du[v, l, m, nnodes(dg), element] + surface_flux_values[v, l, m, 6, element] * factor_2)
-      end
+    @unpack boundary_interpolation = dg.basis
+    @unpack surface_flux_values = cache.elements
+
+    # Note that all fluxes have been computed with outward-pointing normal vectors.
+    # Access the factors only once before beginning the loop to increase performance.
+    # We also use explicit assignments instead of `+=` to let `@muladd` turn these
+    # into FMAs (see comment at the top of the file).
+    factor_1 = boundary_interpolation[1, 1]
+    factor_2 = boundary_interpolation[nnodes(dg), 2]
+    @threaded for element in eachelement(dg, cache)
+        for m in eachnode(dg), l in eachnode(dg)
+            for v in eachvariable(equations)
+                # surface at -x
+                du[v, 1, l, m, element] = (du[v, 1, l, m, element] +
+                                           surface_flux_values[v, l, m, 1, element] *
+                                           factor_1)
+
+                # surface at +x
+                du[v, nnodes(dg), l, m, element] = (du[v, nnodes(dg), l, m, element] +
+                                                    surface_flux_values[v, l, m, 2,
+                                                                        element] *
+                                                    factor_2)
+
+                # surface at -y
+                du[v, l, 1, m, element] = (du[v, l, 1, m, element] +
+                                           surface_flux_values[v, l, m, 3, element] *
+                                           factor_1)
+
+                # surface at +y
+                du[v, l, nnodes(dg), m, element] = (du[v, l, nnodes(dg), m, element] +
+                                                    surface_flux_values[v, l, m, 4,
+                                                                        element] *
+                                                    factor_2)
+
+                # surface at -z
+                du[v, l, m, 1, element] = (du[v, l, m, 1, element] +
+                                           surface_flux_values[v, l, m, 5, element] *
+                                           factor_1)
+
+                # surface at +z
+                du[v, l, m, nnodes(dg), element] = (du[v, l, m, nnodes(dg), element] +
+                                                    surface_flux_values[v, l, m, 6,
+                                                                        element] *
+                                                    factor_2)
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/dg_3d_parallel.jl b/src/solvers/dgsem_p4est/dg_3d_parallel.jl
index 5c77247ac6d..13bf2a1a2eb 100644
--- a/src/solvers/dgsem_p4est/dg_3d_parallel.jl
+++ b/src/solvers/dgsem_p4est/dg_3d_parallel.jl
@@ -3,207 +3,236 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function rhs!(du, u, t,
               mesh::ParallelP4estMesh{3}, equations,
               initial_condition, boundary_conditions, source_terms::Source,
               dg::DG, cache) where {Source}
-  # Start to receive MPI data
-  @trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache)
-
-  # Prolong solution to MPI interfaces
-  @trixi_timeit timer() "prolong2mpiinterfaces" prolong2mpiinterfaces!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Prolong solution to MPI mortars
-  @trixi_timeit timer() "prolong2mpimortars" prolong2mpimortars!(
-    cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg)
-
-  # Start to send MPI data
-  @trixi_timeit timer() "start MPI send" start_mpi_send!(
-    cache.mpi_cache, mesh, equations, dg, cache)
-
-  # Reset du
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
-
-  # Prolong solution to interfaces
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.surface_integral, dg, cache)
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg)
-
-  # Prolong solution to mortars
-  @trixi_timeit timer() "prolong2mortars" prolong2mortars!(
-    cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg)
-
-  # Calculate mortar fluxes
-  @trixi_timeit timer() "mortar flux" calc_mortar_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.mortar, dg.surface_integral, dg, cache)
-
-  # Finish to receive MPI data
-  @trixi_timeit timer() "finish MPI receive" finish_mpi_receive!(
-    cache.mpi_cache, mesh, equations, dg, cache)
-
-  # Calculate MPI interface fluxes
-  @trixi_timeit timer() "MPI interface flux" calc_mpi_interface_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.surface_integral, dg, cache)
-
-  # Calculate MPI mortar fluxes
-  @trixi_timeit timer() "MPI mortar flux" calc_mpi_mortar_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.mortar, dg.surface_integral, dg, cache)
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations, dg.surface_integral, dg, cache)
-
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian!(
-    du, mesh, equations, dg, cache)
-
-  # Calculate source terms
-  @trixi_timeit timer() "source terms" calc_sources!(
-    du, u, t, source_terms, equations, dg, cache)
-
-  # Finish to send MPI data
-  @trixi_timeit timer() "finish MPI send" finish_mpi_send!(cache.mpi_cache)
-
-  return nothing
-end
+    # Start to receive MPI data
+    @trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache)
+
+    # Prolong solution to MPI interfaces
+    @trixi_timeit timer() "prolong2mpiinterfaces" begin
+        prolong2mpiinterfaces!(cache, u, mesh, equations, dg.surface_integral, dg)
+    end
 
+    # Prolong solution to MPI mortars
+    @trixi_timeit timer() "prolong2mpimortars" begin
+        prolong2mpimortars!(cache, u, mesh, equations,
+                            dg.mortar, dg.surface_integral, dg)
+    end
+
+    # Start to send MPI data
+    @trixi_timeit timer() "start MPI send" begin
+        start_mpi_send!(cache.mpi_cache, mesh, equations, dg, cache)
+    end
+
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, u, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.volume_integral, dg, cache)
+    end
+
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache, u, mesh, equations, dg.surface_integral, dg)
+    end
+
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache.elements.surface_flux_values, mesh,
+                             have_nonconservative_terms(equations), equations,
+                             dg.surface_integral, dg, cache)
+    end
+
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache, u, mesh, equations, dg.surface_integral, dg)
+    end
+
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Prolong solution to mortars
+    @trixi_timeit timer() "prolong2mortars" begin
+        prolong2mortars!(cache, u, mesh, equations,
+                         dg.mortar, dg.surface_integral, dg)
+    end
+
+    # Calculate mortar fluxes
+    @trixi_timeit timer() "mortar flux" begin
+        calc_mortar_flux!(cache.elements.surface_flux_values, mesh,
+                          have_nonconservative_terms(equations), equations,
+                          dg.mortar, dg.surface_integral, dg, cache)
+    end
+
+    # Finish to receive MPI data
+    @trixi_timeit timer() "finish MPI receive" begin
+        finish_mpi_receive!(cache.mpi_cache, mesh, equations, dg, cache)
+    end
+
+    # Calculate MPI interface fluxes
+    @trixi_timeit timer() "MPI interface flux" begin
+        calc_mpi_interface_flux!(cache.elements.surface_flux_values, mesh,
+                                 have_nonconservative_terms(equations), equations,
+                                 dg.surface_integral, dg, cache)
+    end
+
+    # Calculate MPI mortar fluxes
+    @trixi_timeit timer() "MPI mortar flux" begin
+        calc_mpi_mortar_flux!(cache.elements.surface_flux_values, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.mortar, dg.surface_integral, dg, cache)
+    end
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations, dg.surface_integral, dg, cache)
+    end
+
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache)
+
+    # Calculate source terms
+    @trixi_timeit timer() "source terms" begin
+        calc_sources!(du, u, t, source_terms, equations, dg, cache)
+    end
+
+    # Finish to send MPI data
+    @trixi_timeit timer() "finish MPI send" finish_mpi_send!(cache.mpi_cache)
+
+    return nothing
+end
 
 function prolong2mpiinterfaces!(cache, u,
                                 mesh::ParallelP4estMesh{3},
                                 equations, surface_integral, dg::DG)
-  @unpack mpi_interfaces = cache
-  index_range = eachnode(dg)
-
-  @threaded for interface in eachmpiinterface(dg, cache)
-    # Copy solution data from the local element using "delayed indexing" with
-    # a start value and a step size to get the correct face and orientation.
-    # Note that in the current implementation, the interface will be
-    # "aligned at the primary element", i.e., the index of the primary side
-    # will always run forwards.
-    local_side = mpi_interfaces.local_sides[interface]
-    local_element = mpi_interfaces.local_neighbor_ids[interface]
-    local_indices = mpi_interfaces.node_indices[interface]
-
-    i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1], index_range)
-    j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2], index_range)
-    k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3], index_range)
-
-    i_element = i_element_start
-    j_element = j_element_start
-    k_element = k_element_start
-    for j in eachnode(dg)
-      for i in eachnode(dg)
-        for v in eachvariable(equations)
-          mpi_interfaces.u[local_side, v, i, j, interface] = u[v, i_element, j_element, k_element, local_element]
+    @unpack mpi_interfaces = cache
+    index_range = eachnode(dg)
+
+    @threaded for interface in eachmpiinterface(dg, cache)
+        # Copy solution data from the local element using "delayed indexing" with
+        # a start value and a step size to get the correct face and orientation.
+        # Note that in the current implementation, the interface will be
+        # "aligned at the primary element", i.e., the index of the primary side
+        # will always run forwards.
+        local_side = mpi_interfaces.local_sides[interface]
+        local_element = mpi_interfaces.local_neighbor_ids[interface]
+        local_indices = mpi_interfaces.node_indices[interface]
+
+        i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1],
+                                                                                     index_range)
+        j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2],
+                                                                                     index_range)
+        k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3],
+                                                                                     index_range)
+
+        i_element = i_element_start
+        j_element = j_element_start
+        k_element = k_element_start
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                for v in eachvariable(equations)
+                    mpi_interfaces.u[local_side, v, i, j, interface] = u[v, i_element,
+                                                                         j_element,
+                                                                         k_element,
+                                                                         local_element]
+                end
+                i_element += i_element_step_i
+                j_element += j_element_step_i
+                k_element += k_element_step_i
+            end
+            i_element += i_element_step_j
+            j_element += j_element_step_j
+            k_element += k_element_step_j
         end
-        i_element += i_element_step_i
-        j_element += j_element_step_i
-        k_element += k_element_step_i
-      end
-      i_element += i_element_step_j
-      j_element += j_element_step_j
-      k_element += k_element_step_j
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_mpi_interface_flux!(surface_flux_values,
                                   mesh::ParallelP4estMesh{3},
                                   nonconservative_terms,
                                   equations, surface_integral, dg::DG, cache)
-  @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces
-  @unpack contravariant_vectors = cache.elements
-  index_range = eachnode(dg)
-
-  @threaded for interface in eachmpiinterface(dg, cache)
-    # Get element and side index information on the local element
-    local_element = local_neighbor_ids[interface]
-    local_indices = node_indices[interface]
-    local_direction = indices2direction(local_indices)
-    local_side = local_sides[interface]
-
-    # Create the local i,j,k indexing on the local element used to pull normal direction information
-    i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1], index_range)
-    j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2], index_range)
-    k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3], index_range)
-
-    i_element = i_element_start
-    j_element = j_element_start
-    k_element = k_element_start
-
-    # Initiate the node indices to be used in the surface for loop,
-    # the surface flux storage must be indexed in alignment with the local element indexing
-    local_surface_indices = surface_indices(local_indices)
-    i_surface_start, i_surface_step_i, i_surface_step_j = index_to_start_step_3d(local_surface_indices[1], index_range)
-    j_surface_start, j_surface_step_i, j_surface_step_j = index_to_start_step_3d(local_surface_indices[2], index_range)
-    i_surface = i_surface_start
-    j_surface = j_surface_start
-
-    for j in eachnode(dg)
-      for i in eachnode(dg)
-        # Get the normal direction on the local element
-        # Contravariant vectors at interfaces in negative coordinate direction
-        # are pointing inwards. This is handled by `get_normal_direction`.
-        normal_direction = get_normal_direction(local_direction, contravariant_vectors,
-                                                i_element, j_element, k_element,
-                                                local_element)
-
-        calc_mpi_interface_flux!(surface_flux_values, mesh, nonconservative_terms, equations,
-                                 surface_integral, dg, cache,
-                                 interface, normal_direction,
-                                 i, j, local_side,
-                                 i_surface, j_surface, local_direction, local_element)
-
-        # Increment local element indices to pull the normal direction
-        i_element += i_element_step_i
-        j_element += j_element_step_i
-        k_element += k_element_step_i
-        # Increment the surface node indices along the local element
-        i_surface += i_surface_step_i
-        j_surface += j_surface_step_i
-      end
-      # Increment local element indices to pull the normal direction
-      i_element += i_element_step_j
-      j_element += j_element_step_j
-      k_element += k_element_step_j
-      # Increment the surface node indices along the local element
-      i_surface += i_surface_step_j
-      j_surface += j_surface_step_j
+    @unpack local_neighbor_ids, node_indices, local_sides = cache.mpi_interfaces
+    @unpack contravariant_vectors = cache.elements
+    index_range = eachnode(dg)
+
+    @threaded for interface in eachmpiinterface(dg, cache)
+        # Get element and side index information on the local element
+        local_element = local_neighbor_ids[interface]
+        local_indices = node_indices[interface]
+        local_direction = indices2direction(local_indices)
+        local_side = local_sides[interface]
+
+        # Create the local i,j,k indexing on the local element used to pull normal direction information
+        i_element_start, i_element_step_i, i_element_step_j = index_to_start_step_3d(local_indices[1],
+                                                                                     index_range)
+        j_element_start, j_element_step_i, j_element_step_j = index_to_start_step_3d(local_indices[2],
+                                                                                     index_range)
+        k_element_start, k_element_step_i, k_element_step_j = index_to_start_step_3d(local_indices[3],
+                                                                                     index_range)
+
+        i_element = i_element_start
+        j_element = j_element_start
+        k_element = k_element_start
+
+        # Initiate the node indices to be used in the surface for loop,
+        # the surface flux storage must be indexed in alignment with the local element indexing
+        local_surface_indices = surface_indices(local_indices)
+        i_surface_start, i_surface_step_i, i_surface_step_j = index_to_start_step_3d(local_surface_indices[1],
+                                                                                     index_range)
+        j_surface_start, j_surface_step_i, j_surface_step_j = index_to_start_step_3d(local_surface_indices[2],
+                                                                                     index_range)
+        i_surface = i_surface_start
+        j_surface = j_surface_start
+
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                # Get the normal direction on the local element
+                # Contravariant vectors at interfaces in negative coordinate direction
+                # are pointing inwards. This is handled by `get_normal_direction`.
+                normal_direction = get_normal_direction(local_direction,
+                                                        contravariant_vectors,
+                                                        i_element, j_element, k_element,
+                                                        local_element)
+
+                calc_mpi_interface_flux!(surface_flux_values, mesh,
+                                         nonconservative_terms, equations,
+                                         surface_integral, dg, cache,
+                                         interface, normal_direction,
+                                         i, j, local_side,
+                                         i_surface, j_surface, local_direction,
+                                         local_element)
+
+                # Increment local element indices to pull the normal direction
+                i_element += i_element_step_i
+                j_element += j_element_step_i
+                k_element += k_element_step_i
+                # Increment the surface node indices along the local element
+                i_surface += i_surface_step_i
+                j_surface += j_surface_step_i
+            end
+            # Increment local element indices to pull the normal direction
+            i_element += i_element_step_j
+            j_element += j_element_step_j
+            k_element += k_element_step_j
+            # Increment the surface node indices along the local element
+            i_surface += i_surface_step_j
+            j_surface += j_surface_step_j
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 # Inlined version of the interface flux computation for conservation laws
@@ -212,181 +241,198 @@ end
                                           nonconservative_terms::False, equations,
                                           surface_integral, dg::DG, cache,
                                           interface_index, normal_direction,
-                                          interface_i_node_index, interface_j_node_index, local_side,
+                                          interface_i_node_index,
+                                          interface_j_node_index, local_side,
                                           surface_i_node_index, surface_j_node_index,
                                           local_direction_index, local_element_index)
-  @unpack u = cache.mpi_interfaces
-  @unpack surface_flux = surface_integral
-
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg,
-                                     interface_i_node_index, interface_j_node_index, interface_index)
-
-  if local_side == 1
-    flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
-  else # local_side == 2
-    flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations)
-  end
-
-  for v in eachvariable(equations)
-    surface_flux_values[v, surface_i_node_index, surface_j_node_index,
-                        local_direction_index, local_element_index] = flux_[v]
-  end
-end
+    @unpack u = cache.mpi_interfaces
+    @unpack surface_flux = surface_integral
 
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg,
+                                       interface_i_node_index, interface_j_node_index,
+                                       interface_index)
+
+    if local_side == 1
+        flux_ = surface_flux(u_ll, u_rr, normal_direction, equations)
+    else # local_side == 2
+        flux_ = -surface_flux(u_ll, u_rr, -normal_direction, equations)
+    end
+
+    for v in eachvariable(equations)
+        surface_flux_values[v, surface_i_node_index, surface_j_node_index,
+        local_direction_index, local_element_index] = flux_[v]
+    end
+end
 
 function prolong2mpimortars!(cache, u,
                              mesh::ParallelP4estMesh{3}, equations,
                              mortar_l2::LobattoLegendreMortarL2,
                              surface_integral, dg::DGSEM)
-  @unpack node_indices = cache.mpi_mortars
-  index_range = eachnode(dg)
-
-  @threaded for mortar in eachmpimortar(dg, cache)
-    local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar]
-    local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar]
-
-    # Get start value and step size for indices on both sides to get the correct face
-    # and orientation
-    small_indices = node_indices[1, mortar]
-    i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], index_range)
-    j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], index_range)
-    k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], index_range)
-
-    large_indices = node_indices[2, mortar]
-    i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_indices[1], index_range)
-    j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_indices[2], index_range)
-    k_large_start, k_large_step_i, k_large_step_j = index_to_start_step_3d(large_indices[3], index_range)
-
-
-    for (element, position) in zip(local_neighbor_ids, local_neighbor_positions)
-      if position == 5 # -> large element
-        # Buffer to copy solution values of the large element in the correct orientation
-        # before interpolating
-        u_buffer = cache.u_threaded[Threads.threadid()]
-        # temporary buffer for projections
-        fstar_tmp = cache.fstar_tmp_threaded[Threads.threadid()]
-
-        i_large = i_large_start
-        j_large = j_large_start
-        k_large = k_large_start
-        for j in eachnode(dg)
-          for i in eachnode(dg)
-            for v in eachvariable(equations)
-              u_buffer[v, i, j] = u[v, i_large, j_large, k_large, element]
+    @unpack node_indices = cache.mpi_mortars
+    index_range = eachnode(dg)
+
+    @threaded for mortar in eachmpimortar(dg, cache)
+        local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar]
+        local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar]
+
+        # Get start value and step size for indices on both sides to get the correct face
+        # and orientation
+        small_indices = node_indices[1, mortar]
+        i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1],
+                                                                               index_range)
+        j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2],
+                                                                               index_range)
+        k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3],
+                                                                               index_range)
+
+        large_indices = node_indices[2, mortar]
+        i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_indices[1],
+                                                                               index_range)
+        j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_indices[2],
+                                                                               index_range)
+        k_large_start, k_large_step_i, k_large_step_j = index_to_start_step_3d(large_indices[3],
+                                                                               index_range)
+
+        for (element, position) in zip(local_neighbor_ids, local_neighbor_positions)
+            if position == 5 # -> large element
+                # Buffer to copy solution values of the large element in the correct orientation
+                # before interpolating
+                u_buffer = cache.u_threaded[Threads.threadid()]
+                # temporary buffer for projections
+                fstar_tmp = cache.fstar_tmp_threaded[Threads.threadid()]
+
+                i_large = i_large_start
+                j_large = j_large_start
+                k_large = k_large_start
+                for j in eachnode(dg)
+                    for i in eachnode(dg)
+                        for v in eachvariable(equations)
+                            u_buffer[v, i, j] = u[v, i_large, j_large, k_large, element]
+                        end
+
+                        i_large += i_large_step_i
+                        j_large += j_large_step_i
+                        k_large += k_large_step_i
+                    end
+                    i_large += i_large_step_j
+                    j_large += j_large_step_j
+                    k_large += k_large_step_j
+                end
+
+                # Interpolate large element face data from buffer to small face locations
+                multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 1, :, :,
+                                             mortar),
+                                        mortar_l2.forward_lower,
+                                        mortar_l2.forward_lower,
+                                        u_buffer,
+                                        fstar_tmp)
+                multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 2, :, :,
+                                             mortar),
+                                        mortar_l2.forward_upper,
+                                        mortar_l2.forward_lower,
+                                        u_buffer,
+                                        fstar_tmp)
+                multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 3, :, :,
+                                             mortar),
+                                        mortar_l2.forward_lower,
+                                        mortar_l2.forward_upper,
+                                        u_buffer,
+                                        fstar_tmp)
+                multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 4, :, :,
+                                             mortar),
+                                        mortar_l2.forward_upper,
+                                        mortar_l2.forward_upper,
+                                        u_buffer,
+                                        fstar_tmp)
+            else # position in (1, 2, 3, 4) -> small element
+                # Copy solution data from the small elements
+                i_small = i_small_start
+                j_small = j_small_start
+                k_small = k_small_start
+                for j in eachnode(dg)
+                    for i in eachnode(dg)
+                        for v in eachvariable(equations)
+                            cache.mpi_mortars.u[1, v, position, i, j, mortar] = u[v,
+                                                                                  i_small,
+                                                                                  j_small,
+                                                                                  k_small,
+                                                                                  element]
+                        end
+                        i_small += i_small_step_i
+                        j_small += j_small_step_i
+                        k_small += k_small_step_i
+                    end
+                    i_small += i_small_step_j
+                    j_small += j_small_step_j
+                    k_small += k_small_step_j
+                end
             end
-
-            i_large += i_large_step_i
-            j_large += j_large_step_i
-            k_large += k_large_step_i
-          end
-          i_large += i_large_step_j
-          j_large += j_large_step_j
-          k_large += k_large_step_j
         end
-
-        # Interpolate large element face data from buffer to small face locations
-        multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 1, :, :, mortar),
-                                mortar_l2.forward_lower,
-                                mortar_l2.forward_lower,
-                                u_buffer,
-                                fstar_tmp)
-        multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 2, :, :, mortar),
-                                mortar_l2.forward_upper,
-                                mortar_l2.forward_lower,
-                                u_buffer,
-                                fstar_tmp)
-        multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 3, :, :, mortar),
-                                mortar_l2.forward_lower,
-                                mortar_l2.forward_upper,
-                                u_buffer,
-                                fstar_tmp)
-        multiply_dimensionwise!(view(cache.mpi_mortars.u, 2, :, 4, :, :, mortar),
-                                mortar_l2.forward_upper,
-                                mortar_l2.forward_upper,
-                                u_buffer,
-                                fstar_tmp)
-      else # position in (1, 2, 3, 4) -> small element
-        # Copy solution data from the small elements
-        i_small = i_small_start
-        j_small = j_small_start
-        k_small = k_small_start
-        for j in eachnode(dg)
-          for i in eachnode(dg)
-            for v in eachvariable(equations)
-              cache.mpi_mortars.u[1, v, position, i, j, mortar] = u[v, i_small, j_small, k_small, element]
-            end
-            i_small += i_small_step_i
-            j_small += j_small_step_i
-            k_small += k_small_step_i
-          end
-          i_small += i_small_step_j
-          j_small += j_small_step_j
-          k_small += k_small_step_j
-        end
-      end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_mpi_mortar_flux!(surface_flux_values,
                                mesh::ParallelP4estMesh{3},
                                nonconservative_terms, equations,
                                mortar_l2::LobattoLegendreMortarL2,
                                surface_integral, dg::DG, cache)
-  @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
-  @unpack contravariant_vectors = cache.elements
-  @unpack fstar_threaded, fstar_tmp_threaded = cache
-  index_range = eachnode(dg)
-
-  @threaded for mortar in eachmpimortar(dg, cache)
-    # Choose thread-specific pre-allocated container
-    fstar = fstar_threaded[Threads.threadid()]
-    fstar_tmp = fstar_tmp_threaded[Threads.threadid()]
-
-    # Get index information on the small elements
-    small_indices = node_indices[1, mortar]
-
-    i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1], index_range)
-    j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2], index_range)
-    k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3], index_range)
-
-    for position in 1:4
-      i_small = i_small_start
-      j_small = j_small_start
-      k_small = k_small_start
-      for j in eachnode(dg)
-        for i in eachnode(dg)
-          # Get the normal direction on the small element.
-          normal_direction = get_normal_direction(cache.mpi_mortars, i, j, position, mortar)
-
-          calc_mpi_mortar_flux!(fstar, mesh, nonconservative_terms, equations,
-                                surface_integral, dg, cache,
-                                mortar, position, normal_direction,
-                                i, j)
-
-          i_small += i_small_step_i
-          j_small += j_small_step_i
-          k_small += k_small_step_i
+    @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
+    @unpack contravariant_vectors = cache.elements
+    @unpack fstar_threaded, fstar_tmp_threaded = cache
+    index_range = eachnode(dg)
+
+    @threaded for mortar in eachmpimortar(dg, cache)
+        # Choose thread-specific pre-allocated container
+        fstar = fstar_threaded[Threads.threadid()]
+        fstar_tmp = fstar_tmp_threaded[Threads.threadid()]
+
+        # Get index information on the small elements
+        small_indices = node_indices[1, mortar]
+
+        i_small_start, i_small_step_i, i_small_step_j = index_to_start_step_3d(small_indices[1],
+                                                                               index_range)
+        j_small_start, j_small_step_i, j_small_step_j = index_to_start_step_3d(small_indices[2],
+                                                                               index_range)
+        k_small_start, k_small_step_i, k_small_step_j = index_to_start_step_3d(small_indices[3],
+                                                                               index_range)
+
+        for position in 1:4
+            i_small = i_small_start
+            j_small = j_small_start
+            k_small = k_small_start
+            for j in eachnode(dg)
+                for i in eachnode(dg)
+                    # Get the normal direction on the small element.
+                    normal_direction = get_normal_direction(cache.mpi_mortars, i, j,
+                                                            position, mortar)
+
+                    calc_mpi_mortar_flux!(fstar, mesh, nonconservative_terms, equations,
+                                          surface_integral, dg, cache,
+                                          mortar, position, normal_direction,
+                                          i, j)
+
+                    i_small += i_small_step_i
+                    j_small += j_small_step_i
+                    k_small += k_small_step_i
+                end
+            end
+            i_small += i_small_step_j
+            j_small += j_small_step_j
+            k_small += k_small_step_j
         end
-      end
-      i_small += i_small_step_j
-      j_small += j_small_step_j
-      k_small += k_small_step_j
-    end
 
-    # Buffer to interpolate flux values of the large element to before
-    # copying in the correct orientation
-    u_buffer = cache.u_threaded[Threads.threadid()]
+        # Buffer to interpolate flux values of the large element to before
+        # copying in the correct orientation
+        u_buffer = cache.u_threaded[Threads.threadid()]
 
-    mpi_mortar_fluxes_to_elements!(surface_flux_values,
-                                   mesh, equations, mortar_l2, dg, cache,
-                                   mortar, fstar, u_buffer, fstar_tmp)
-  end
+        mpi_mortar_fluxes_to_elements!(surface_flux_values,
+                                       mesh, equations, mortar_l2, dg, cache,
+                                       mortar, fstar, u_buffer, fstar_tmp)
+    end
 
-  return nothing
+    return nothing
 end
 
 # Inlined version of the mortar flux computation on small elements for conservation laws
@@ -396,96 +442,103 @@ end
                                        surface_integral, dg::DG, cache,
                                        mortar_index, position_index, normal_direction,
                                        i_node_index, j_node_index)
-  @unpack u = cache.mpi_mortars
-  @unpack surface_flux = surface_integral
+    @unpack u = cache.mpi_mortars
+    @unpack surface_flux = surface_integral
 
-  u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index, j_node_index, mortar_index)
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, position_index, i_node_index,
+                                       j_node_index, mortar_index)
 
-  flux = surface_flux(u_ll, u_rr, normal_direction, equations)
+    flux = surface_flux(u_ll, u_rr, normal_direction, equations)
 
-  # Copy flux to buffer
-  set_node_vars!(fstar, flux, equations, dg, i_node_index, j_node_index, position_index)
+    # Copy flux to buffer
+    set_node_vars!(fstar, flux, equations, dg, i_node_index, j_node_index,
+                   position_index)
 end
 
-
 @inline function mpi_mortar_fluxes_to_elements!(surface_flux_values,
                                                 mesh::ParallelP4estMesh{3}, equations,
                                                 mortar_l2::LobattoLegendreMortarL2,
-                                                dg::DGSEM, cache, mortar, fstar, u_buffer, fstar_tmp)
-  @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
-  index_range = eachnode(dg)
-
-  small_indices   = node_indices[1, mortar]
-  small_direction = indices2direction(small_indices)
-  large_indices   = node_indices[2, mortar]
-  large_direction = indices2direction(large_indices)
-  large_surface_indices = surface_indices(large_indices)
-
-  i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_surface_indices[1], index_range)
-  j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_surface_indices[2], index_range)
-
-  for (element, position) in zip(local_neighbor_ids[mortar], local_neighbor_positions[mortar])
-    if position == 5 # -> large element
-      # Project small fluxes to large element.
-      multiply_dimensionwise!(
-        u_buffer,
-        mortar_l2.reverse_lower, mortar_l2.reverse_lower,
-        view(fstar, .., 1),
-        fstar_tmp)
-      add_multiply_dimensionwise!(
-        u_buffer,
-        mortar_l2.reverse_upper, mortar_l2.reverse_lower,
-        view(fstar, .., 2),
-        fstar_tmp)
-      add_multiply_dimensionwise!(
-        u_buffer,
-        mortar_l2.reverse_lower, mortar_l2.reverse_upper,
-        view(fstar, .., 3),
-        fstar_tmp)
-      add_multiply_dimensionwise!(
-        u_buffer,
-        mortar_l2.reverse_upper, mortar_l2.reverse_upper,
-        view(fstar, .., 4),
-        fstar_tmp)
-      # The flux is calculated in the outward direction of the small elements,
-      # so the sign must be switched to get the flux in outward direction
-      # of the large element.
-      # The contravariant vectors of the large element (and therefore the normal
-      # vectors of the large element as well) are four times as large as the
-      # contravariant vectors of the small elements. Therefore, the flux needs
-      # to be scaled by a factor of 4 to obtain the flux of the large element.
-      u_buffer .*= -4
-      # Copy interpolated flux values from buffer to large element face in the
-      # correct orientation.
-      # Note that the index of the small sides will always run forward but
-      # the index of the large side might need to run backwards for flipped sides.
-      i_large = i_large_start
-      j_large = j_large_start
-      for j in eachnode(dg)
-        for i in eachnode(dg)
-          for v in eachvariable(equations)
-            surface_flux_values[v, i_large, j_large, large_direction, element] = u_buffer[v, i, j]
-          end
-          i_large += i_large_step_i
-          j_large += j_large_step_i
-        end
-        i_large += i_large_step_j
-        j_large += j_large_step_j
-      end
-    else # position in (1, 2, 3, 4) -> small element
-      # Copy solution small to small
-      for j in eachnode(dg)
-        for i in eachnode(dg)
-          for v in eachvariable(equations)
-            surface_flux_values[v, i, j, small_direction, element] = fstar[v, i, j, position]
-          end
+                                                dg::DGSEM, cache, mortar, fstar,
+                                                u_buffer, fstar_tmp)
+    @unpack local_neighbor_ids, local_neighbor_positions, node_indices = cache.mpi_mortars
+    index_range = eachnode(dg)
+
+    small_indices = node_indices[1, mortar]
+    small_direction = indices2direction(small_indices)
+    large_indices = node_indices[2, mortar]
+    large_direction = indices2direction(large_indices)
+    large_surface_indices = surface_indices(large_indices)
+
+    i_large_start, i_large_step_i, i_large_step_j = index_to_start_step_3d(large_surface_indices[1],
+                                                                           index_range)
+    j_large_start, j_large_step_i, j_large_step_j = index_to_start_step_3d(large_surface_indices[2],
+                                                                           index_range)
+
+    for (element, position) in zip(local_neighbor_ids[mortar],
+                                   local_neighbor_positions[mortar])
+        if position == 5 # -> large element
+            # Project small fluxes to large element.
+            multiply_dimensionwise!(u_buffer,
+                                    mortar_l2.reverse_lower, mortar_l2.reverse_lower,
+                                    view(fstar, .., 1),
+                                    fstar_tmp)
+            add_multiply_dimensionwise!(u_buffer,
+                                        mortar_l2.reverse_upper,
+                                        mortar_l2.reverse_lower,
+                                        view(fstar, .., 2),
+                                        fstar_tmp)
+            add_multiply_dimensionwise!(u_buffer,
+                                        mortar_l2.reverse_lower,
+                                        mortar_l2.reverse_upper,
+                                        view(fstar, .., 3),
+                                        fstar_tmp)
+            add_multiply_dimensionwise!(u_buffer,
+                                        mortar_l2.reverse_upper,
+                                        mortar_l2.reverse_upper,
+                                        view(fstar, .., 4),
+                                        fstar_tmp)
+            # The flux is calculated in the outward direction of the small elements,
+            # so the sign must be switched to get the flux in outward direction
+            # of the large element.
+            # The contravariant vectors of the large element (and therefore the normal
+            # vectors of the large element as well) are four times as large as the
+            # contravariant vectors of the small elements. Therefore, the flux needs
+            # to be scaled by a factor of 4 to obtain the flux of the large element.
+            u_buffer .*= -4
+            # Copy interpolated flux values from buffer to large element face in the
+            # correct orientation.
+            # Note that the index of the small sides will always run forward but
+            # the index of the large side might need to run backwards for flipped sides.
+            i_large = i_large_start
+            j_large = j_large_start
+            for j in eachnode(dg)
+                for i in eachnode(dg)
+                    for v in eachvariable(equations)
+                        surface_flux_values[v, i_large, j_large, large_direction, element] = u_buffer[v,
+                                                                                                      i,
+                                                                                                      j]
+                    end
+                    i_large += i_large_step_i
+                    j_large += j_large_step_i
+                end
+                i_large += i_large_step_j
+                j_large += j_large_step_j
+            end
+        else # position in (1, 2, 3, 4) -> small element
+            # Copy solution small to small
+            for j in eachnode(dg)
+                for i in eachnode(dg)
+                    for v in eachvariable(equations)
+                        surface_flux_values[v, i, j, small_direction, element] = fstar[v,
+                                                                                       i,
+                                                                                       j,
+                                                                                       position]
+                    end
+                end
+            end
         end
-      end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
-end # muladd
\ No newline at end of file
+end # muladd
diff --git a/src/solvers/dgsem_p4est/dg_parallel.jl b/src/solvers/dgsem_p4est/dg_parallel.jl
index 6d3803225a0..ac122d048c1 100644
--- a/src/solvers/dgsem_p4est/dg_parallel.jl
+++ b/src/solvers/dgsem_p4est/dg_parallel.jl
@@ -3,175 +3,181 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 mutable struct P4estMPICache{uEltype}
-  mpi_neighbor_ranks::Vector{Int}
-  mpi_neighbor_interfaces::Vector{Vector{Int}}
-  mpi_neighbor_mortars::Vector{Vector{Int}}
-  mpi_send_buffers::Vector{Vector{uEltype}}
-  mpi_recv_buffers::Vector{Vector{uEltype}}
-  mpi_send_requests::Vector{MPI.Request}
-  mpi_recv_requests::Vector{MPI.Request}
-  n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}}
-  n_elements_global::Int
-  first_element_global_id::Int
+    mpi_neighbor_ranks::Vector{Int}
+    mpi_neighbor_interfaces::Vector{Vector{Int}}
+    mpi_neighbor_mortars::Vector{Vector{Int}}
+    mpi_send_buffers::Vector{Vector{uEltype}}
+    mpi_recv_buffers::Vector{Vector{uEltype}}
+    mpi_send_requests::Vector{MPI.Request}
+    mpi_recv_requests::Vector{MPI.Request}
+    n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}}
+    n_elements_global::Int
+    first_element_global_id::Int
 end
 
 function P4estMPICache(uEltype)
-  # MPI communication "just works" for bitstypes only
-  if !isbitstype(uEltype)
-    throw(ArgumentError("P4estMPICache only supports bitstypes, $uEltype is not a bitstype."))
-  end
-
-  mpi_neighbor_ranks = Vector{Int}(undef, 0)
-  mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0)
-  mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0)
-  mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0)
-  mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0)
-  mpi_send_requests = Vector{MPI.Request}(undef, 0)
-  mpi_recv_requests = Vector{MPI.Request}(undef, 0)
-  n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1)
-  n_elements_global = 0
-  first_element_global_id = 0
-
-  P4estMPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars,
-                         mpi_send_buffers, mpi_recv_buffers,
-                         mpi_send_requests, mpi_recv_requests,
-                         n_elements_by_rank, n_elements_global,
-                         first_element_global_id)
-end
+    # MPI communication "just works" for bitstypes only
+    if !isbitstype(uEltype)
+        throw(ArgumentError("P4estMPICache only supports bitstypes, $uEltype is not a bitstype."))
+    end
 
-@inline Base.eltype(::P4estMPICache{uEltype}) where uEltype = uEltype
+    mpi_neighbor_ranks = Vector{Int}(undef, 0)
+    mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0)
+    mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0)
+    mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0)
+    mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0)
+    mpi_send_requests = Vector{MPI.Request}(undef, 0)
+    mpi_recv_requests = Vector{MPI.Request}(undef, 0)
+    n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1)
+    n_elements_global = 0
+    first_element_global_id = 0
+
+    P4estMPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces,
+                           mpi_neighbor_mortars,
+                           mpi_send_buffers, mpi_recv_buffers,
+                           mpi_send_requests, mpi_recv_requests,
+                           n_elements_by_rank, n_elements_global,
+                           first_element_global_id)
+end
 
+@inline Base.eltype(::P4estMPICache{uEltype}) where {uEltype} = uEltype
 
 function start_mpi_send!(mpi_cache::P4estMPICache, mesh, equations, dg, cache)
-  data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
-  n_small_elements = 2^(ndims(mesh)-1)
-
-  for d in 1:length(mpi_cache.mpi_neighbor_ranks)
-    send_buffer = mpi_cache.mpi_send_buffers[d]
-
-    for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d])
-      first = (index - 1) * data_size + 1
-      last  = (index - 1) * data_size + data_size
-      local_side = cache.mpi_interfaces.local_sides[interface]
-      @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[local_side, .., interface])
-    end
+    data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
+    n_small_elements = 2^(ndims(mesh) - 1)
+
+    for d in 1:length(mpi_cache.mpi_neighbor_ranks)
+        send_buffer = mpi_cache.mpi_send_buffers[d]
+
+        for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d])
+            first = (index - 1) * data_size + 1
+            last = (index - 1) * data_size + data_size
+            local_side = cache.mpi_interfaces.local_sides[interface]
+            @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[local_side, ..,
+                                                                         interface])
+        end
 
-    # Set send_buffer corresponding to mortar data to NaN and overwrite the parts where local
-    # data exists
-    interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size
-    mortars_data_size = length(mpi_cache.mpi_neighbor_mortars[d]) * n_small_elements * 2 * data_size
-    # `NaN |> eltype(...)` ensures that the NaN's are of the appropriate floating point type
-    send_buffer[interfaces_data_size+1:interfaces_data_size+mortars_data_size] .= NaN |> eltype(mpi_cache)
-
-    for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d])
-      index_base = interfaces_data_size + (index - 1) * n_small_elements * 2 * data_size
-      indices = buffer_mortar_indices(mesh, index_base, data_size)
-
-      for position in cache.mpi_mortars.local_neighbor_positions[mortar]
-        first, last = indices[position]
-        if position > n_small_elements # large element
-          @views send_buffer[first:last] .= vec(cache.mpi_mortars.u[2, :, :, .., mortar])
-        else # small element
-          @views send_buffer[first:last] .= vec(cache.mpi_mortars.u[1, :, position, .., mortar])
+        # Set send_buffer corresponding to mortar data to NaN and overwrite the parts where local
+        # data exists
+        interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size
+        mortars_data_size = length(mpi_cache.mpi_neighbor_mortars[d]) *
+                            n_small_elements * 2 * data_size
+        # `NaN |> eltype(...)` ensures that the NaN's are of the appropriate floating point type
+        send_buffer[(interfaces_data_size + 1):(interfaces_data_size + mortars_data_size)] .= NaN |>
+                                                                                              eltype(mpi_cache)
+
+        for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d])
+            index_base = interfaces_data_size +
+                         (index - 1) * n_small_elements * 2 * data_size
+            indices = buffer_mortar_indices(mesh, index_base, data_size)
+
+            for position in cache.mpi_mortars.local_neighbor_positions[mortar]
+                first, last = indices[position]
+                if position > n_small_elements # large element
+                    @views send_buffer[first:last] .= vec(cache.mpi_mortars.u[2, :, :,
+                                                                              ..,
+                                                                              mortar])
+                else # small element
+                    @views send_buffer[first:last] .= vec(cache.mpi_mortars.u[1, :,
+                                                                              position,
+                                                                              ..,
+                                                                              mortar])
+                end
+            end
         end
-      end
     end
-  end
 
-  # Start sending
-  for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks)
-    mpi_cache.mpi_send_requests[index] = MPI.Isend(
-      mpi_cache.mpi_send_buffers[index], d, mpi_rank(), mpi_comm())
-  end
+    # Start sending
+    for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks)
+        mpi_cache.mpi_send_requests[index] = MPI.Isend(mpi_cache.mpi_send_buffers[index],
+                                                       d, mpi_rank(), mpi_comm())
+    end
 
-  return nothing
+    return nothing
 end
 
-
 function start_mpi_receive!(mpi_cache::P4estMPICache)
-  for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks)
-    mpi_cache.mpi_recv_requests[index] = MPI.Irecv!(
-      mpi_cache.mpi_recv_buffers[index], d, d, mpi_comm())
-  end
+    for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks)
+        mpi_cache.mpi_recv_requests[index] = MPI.Irecv!(mpi_cache.mpi_recv_buffers[index],
+                                                        d, d, mpi_comm())
+    end
 
-  return nothing
+    return nothing
 end
 
-
 function finish_mpi_send!(mpi_cache::P4estMPICache)
-  MPI.Waitall(mpi_cache.mpi_send_requests, MPI.Status)
+    MPI.Waitall(mpi_cache.mpi_send_requests, MPI.Status)
 end
 
-
 function finish_mpi_receive!(mpi_cache::P4estMPICache, mesh, equations, dg, cache)
-  data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
-  n_small_elements = 2^(ndims(mesh)-1)
-  n_positions = n_small_elements + 1
-
-  # Start receiving and unpack received data until all communication is finished
-  d = MPI.Waitany(mpi_cache.mpi_recv_requests)
-  while d !== nothing
-    recv_buffer = mpi_cache.mpi_recv_buffers[d]
-
-    for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d])
-      first = (index - 1) * data_size + 1
-      last  = (index - 1) * data_size + data_size
-
-      if cache.mpi_interfaces.local_sides[interface] == 1 # local element on primary side
-        @views vec(cache.mpi_interfaces.u[2, .., interface]) .= recv_buffer[first:last]
-      else # local element at secondary side
-        @views vec(cache.mpi_interfaces.u[1, .., interface]) .= recv_buffer[first:last]
-      end
-    end
+    data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
+    n_small_elements = 2^(ndims(mesh) - 1)
+    n_positions = n_small_elements + 1
 
-    interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size
-    for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d])
-      index_base = interfaces_data_size + (index - 1) * n_small_elements * 2 * data_size
-      indices = buffer_mortar_indices(mesh, index_base, data_size)
-
-      for position in 1:n_positions
-        # Skip if received data for `position` is NaN as no real data has been sent for the
-        # corresponding element
-        if isnan(recv_buffer[Base.first(indices[position])])
-          continue
+    # Start receiving and unpack received data until all communication is finished
+    d = MPI.Waitany(mpi_cache.mpi_recv_requests)
+    while d !== nothing
+        recv_buffer = mpi_cache.mpi_recv_buffers[d]
+
+        for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d])
+            first = (index - 1) * data_size + 1
+            last = (index - 1) * data_size + data_size
+
+            if cache.mpi_interfaces.local_sides[interface] == 1 # local element on primary side
+                @views vec(cache.mpi_interfaces.u[2, .., interface]) .= recv_buffer[first:last]
+            else # local element at secondary side
+                @views vec(cache.mpi_interfaces.u[1, .., interface]) .= recv_buffer[first:last]
+            end
         end
 
-        first, last = indices[position]
-        if position == n_positions # large element
-          @views vec(cache.mpi_mortars.u[2, :, :, .., mortar]) .= recv_buffer[first:last]
-        else # small element
-          @views vec(cache.mpi_mortars.u[1, :, position, .., mortar]) .= recv_buffer[first:last]
+        interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size
+        for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d])
+            index_base = interfaces_data_size +
+                         (index - 1) * n_small_elements * 2 * data_size
+            indices = buffer_mortar_indices(mesh, index_base, data_size)
+
+            for position in 1:n_positions
+                # Skip if received data for `position` is NaN as no real data has been sent for the
+                # corresponding element
+                if isnan(recv_buffer[Base.first(indices[position])])
+                    continue
+                end
+
+                first, last = indices[position]
+                if position == n_positions # large element
+                    @views vec(cache.mpi_mortars.u[2, :, :, .., mortar]) .= recv_buffer[first:last]
+                else # small element
+                    @views vec(cache.mpi_mortars.u[1, :, position, .., mortar]) .= recv_buffer[first:last]
+                end
+            end
         end
-      end
-    end
 
-    d = MPI.Waitany(mpi_cache.mpi_recv_requests)
-  end
+        d = MPI.Waitany(mpi_cache.mpi_recv_requests)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 # Return a tuple `indices` where indices[position] is a `(first, last)` tuple for accessing the
 # data corresponding to the `position` part of a mortar in an MPI buffer. The mortar data must begin
 # at `index_base`+1 in the MPI buffer. `data_size` is the data size associated with each small
 # position (i.e. position 1 or 2). The data corresponding to the large side (i.e. position 3) has
 # size `2 * data_size`.
-@inline function buffer_mortar_indices(mesh::ParallelP4estMesh{2}, index_base, data_size)
-  return (
-    # first, last for local element in position 1 (small element)
-    (index_base + 1,
-     index_base + 1 * data_size),
-    # first, last for local element in position 2 (small element)
-    (index_base + 1 * data_size + 1,
-     index_base + 2 * data_size),
-    # first, last for local element in position 3 (large element)
-    (index_base + 2 * data_size + 1,
-     index_base + 4 * data_size),
-  )
+@inline function buffer_mortar_indices(mesh::ParallelP4estMesh{2}, index_base,
+                                       data_size)
+    return (
+            # first, last for local element in position 1 (small element)
+            (index_base + 1,
+             index_base + 1 * data_size),
+            # first, last for local element in position 2 (small element)
+            (index_base + 1 * data_size + 1,
+             index_base + 2 * data_size),
+            # first, last for local element in position 3 (large element)
+            (index_base + 2 * data_size + 1,
+             index_base + 4 * data_size))
 end
 
 # Return a tuple `indices` where indices[position] is a `(first, last)` tuple for accessing the
@@ -179,354 +185,401 @@ end
 # at `index_base`+1 in the MPI buffer. `data_size` is the data size associated with each small
 # position (i.e. position 1 to 4). The data corresponding to the large side (i.e. position 5) has
 # size `4 * data_size`.
-@inline function buffer_mortar_indices(mesh::ParallelP4estMesh{3}, index_base, data_size)
-  return (
-    # first, last for local element in position 1 (small element)
-    (index_base + 1,
-     index_base + 1 * data_size),
-    # first, last for local element in position 2 (small element)
-    (index_base + 1 * data_size + 1,
-     index_base + 2 * data_size),
-    # first, last for local element in position 3 (small element)
-    (index_base + 2 * data_size + 1,
-     index_base + 3 * data_size),
-    # first, last for local element in position 4 (small element)
-    (index_base + 3 * data_size + 1,
-     index_base + 4 * data_size),
-    # first, last for local element in position 5 (large element)
-    (index_base + 4 * data_size + 1,
-     index_base + 8 * data_size),
-  )
+@inline function buffer_mortar_indices(mesh::ParallelP4estMesh{3}, index_base,
+                                       data_size)
+    return (
+            # first, last for local element in position 1 (small element)
+            (index_base + 1,
+             index_base + 1 * data_size),
+            # first, last for local element in position 2 (small element)
+            (index_base + 1 * data_size + 1,
+             index_base + 2 * data_size),
+            # first, last for local element in position 3 (small element)
+            (index_base + 2 * data_size + 1,
+             index_base + 3 * data_size),
+            # first, last for local element in position 4 (small element)
+            (index_base + 3 * data_size + 1,
+             index_base + 4 * data_size),
+            # first, last for local element in position 5 (large element)
+            (index_base + 4 * data_size + 1,
+             index_base + 8 * data_size))
 end
 
-
 # This method is called when a SemidiscretizationHyperbolic is constructed.
 # It constructs the basic `cache` used throughout the simulation to compute
 # the RHS etc.
-function create_cache(mesh::ParallelP4estMesh, equations::AbstractEquations, dg::DG, ::Any, ::Type{uEltype}) where {uEltype<:Real}
-  # Make sure to balance and partition the p4est and create a new ghost layer before creating any
-  # containers in case someone has tampered with the p4est after creating the mesh
-  balance!(mesh)
-  partition!(mesh)
-  update_ghost_layer!(mesh)
-
-  elements       = init_elements(mesh, equations, dg.basis, uEltype)
+function create_cache(mesh::ParallelP4estMesh, equations::AbstractEquations, dg::DG,
+                      ::Any, ::Type{uEltype}) where {uEltype <: Real}
+    # Make sure to balance and partition the p4est and create a new ghost layer before creating any
+    # containers in case someone has tampered with the p4est after creating the mesh
+    balance!(mesh)
+    partition!(mesh)
+    update_ghost_layer!(mesh)
 
-  mpi_interfaces = init_mpi_interfaces(mesh, equations, dg.basis, elements)
-  mpi_mortars    = init_mpi_mortars(mesh, equations, dg.basis, elements)
-  mpi_cache      = init_mpi_cache(mesh, mpi_interfaces, mpi_mortars,
-                                  nvariables(equations), nnodes(dg), uEltype)
+    elements = init_elements(mesh, equations, dg.basis, uEltype)
 
-  exchange_normal_directions!(mpi_mortars, mpi_cache, mesh, nnodes(dg))
+    mpi_interfaces = init_mpi_interfaces(mesh, equations, dg.basis, elements)
+    mpi_mortars = init_mpi_mortars(mesh, equations, dg.basis, elements)
+    mpi_cache = init_mpi_cache(mesh, mpi_interfaces, mpi_mortars,
+                               nvariables(equations), nnodes(dg), uEltype)
 
-  interfaces     = init_interfaces(mesh, equations, dg.basis, elements)
-  boundaries     = init_boundaries(mesh, equations, dg.basis, elements)
-  mortars        = init_mortars(mesh, equations, dg.basis, elements)
+    exchange_normal_directions!(mpi_mortars, mpi_cache, mesh, nnodes(dg))
 
+    interfaces = init_interfaces(mesh, equations, dg.basis, elements)
+    boundaries = init_boundaries(mesh, equations, dg.basis, elements)
+    mortars = init_mortars(mesh, equations, dg.basis, elements)
 
-  cache = (; elements, interfaces, mpi_interfaces, boundaries, mortars, mpi_mortars, mpi_cache)
+    cache = (; elements, interfaces, mpi_interfaces, boundaries, mortars, mpi_mortars,
+             mpi_cache)
 
-  # Add specialized parts of the cache required to compute the volume integral etc.
-  cache = (; cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
-  cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
+    # Add specialized parts of the cache required to compute the volume integral etc.
+    cache = (; cache...,
+             create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
+    cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
 
-  return cache
+    return cache
 end
 
+function init_mpi_cache(mesh::ParallelP4estMesh, mpi_interfaces, mpi_mortars, nvars,
+                        nnodes, uEltype)
+    mpi_cache = P4estMPICache(uEltype)
+    init_mpi_cache!(mpi_cache, mesh, mpi_interfaces, mpi_mortars, nvars, nnodes,
+                    uEltype)
 
-function init_mpi_cache(mesh::ParallelP4estMesh, mpi_interfaces, mpi_mortars, nvars, nnodes, uEltype)
-  mpi_cache = P4estMPICache(uEltype)
-  init_mpi_cache!(mpi_cache, mesh, mpi_interfaces, mpi_mortars, nvars, nnodes, uEltype)
-
-  return mpi_cache
+    return mpi_cache
 end
 
 function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh,
                          mpi_interfaces, mpi_mortars, nvars, n_nodes, uEltype)
-  mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars =
-    init_mpi_neighbor_connectivity(mpi_interfaces, mpi_mortars, mesh)
-
-  mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests =
-    init_mpi_data_structures(mpi_neighbor_interfaces, mpi_neighbor_mortars,
-                             ndims(mesh), nvars, n_nodes, uEltype)
-
-  # Determine local and total number of elements
-  n_elements_global = Int(unsafe_load(mesh.p4est).global_num_quadrants)
-  n_elements_by_rank = vcat(Int.(unsafe_wrap(Array, unsafe_load(mesh.p4est).global_first_quadrant, mpi_nranks())),
-                            n_elements_global) |> diff # diff sufficient due to 0-based quad indices
-  n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1))
-  # Account for 1-based indexing in Julia
-  first_element_global_id = Int(unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, mpi_rank() + 1)) + 1
-  @assert n_elements_global == sum(n_elements_by_rank) "error in total number of elements"
-
-  # TODO reuse existing structures
-  @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars,
-                     mpi_send_buffers, mpi_recv_buffers,
-                     mpi_send_requests, mpi_recv_requests,
-                     n_elements_by_rank, n_elements_global,
-                     first_element_global_id
+    mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = init_mpi_neighbor_connectivity(mpi_interfaces,
+                                                                                                       mpi_mortars,
+                                                                                                       mesh)
+
+    mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(mpi_neighbor_interfaces,
+                                                                                                        mpi_neighbor_mortars,
+                                                                                                        ndims(mesh),
+                                                                                                        nvars,
+                                                                                                        n_nodes,
+                                                                                                        uEltype)
+
+    # Determine local and total number of elements
+    n_elements_global = Int(unsafe_load(mesh.p4est).global_num_quadrants)
+    n_elements_by_rank = vcat(Int.(unsafe_wrap(Array,
+                                               unsafe_load(mesh.p4est).global_first_quadrant,
+                                               mpi_nranks())),
+                              n_elements_global) |> diff # diff sufficient due to 0-based quad indices
+    n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1))
+    # Account for 1-based indexing in Julia
+    first_element_global_id = Int(unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant,
+                                              mpi_rank() + 1)) + 1
+    @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements"
+
+    # TODO reuse existing structures
+    @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces,
+                       mpi_neighbor_mortars,
+                       mpi_send_buffers, mpi_recv_buffers,
+                       mpi_send_requests, mpi_recv_requests,
+                       n_elements_by_rank, n_elements_global,
+                       first_element_global_id
 end
 
-function init_mpi_neighbor_connectivity(mpi_interfaces, mpi_mortars, mesh::ParallelP4estMesh)
-  # Let p4est iterate over all interfaces and call init_neighbor_rank_connectivity_iter_face
-  # to collect connectivity information
-  iter_face_c = cfunction(init_neighbor_rank_connectivity_iter_face, Val(ndims(mesh)))
-  user_data = InitNeighborRankConnectivityIterFaceUserData(mpi_interfaces, mpi_mortars, mesh)
-
-  iterate_p4est(mesh.p4est, user_data; ghost_layer=mesh.ghost, iter_face_c=iter_face_c)
-
-  # Build proper connectivity data structures from information gathered by iterating over p4est
-  @unpack global_interface_ids, neighbor_ranks_interface, global_mortar_ids, neighbor_ranks_mortar = user_data
-
-  mpi_neighbor_ranks = vcat(neighbor_ranks_interface, neighbor_ranks_mortar...) |> sort |> unique
-
-  p = sortperm(global_interface_ids)
-  neighbor_ranks_interface .= neighbor_ranks_interface[p]
-  interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p]
-
-  p = sortperm(global_mortar_ids)
-  neighbor_ranks_mortar .= neighbor_ranks_mortar[p]
-  mortar_ids = collect(1:nmpimortars(mpi_mortars))[p]
-
-  # For each neighbor rank, init connectivity data structures
-  mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks))
-  mpi_neighbor_mortars = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks))
-  for (index, d) in enumerate(mpi_neighbor_ranks)
-    mpi_neighbor_interfaces[index] = interface_ids[findall(==(d), neighbor_ranks_interface)]
-    mpi_neighbor_mortars[index] = mortar_ids[findall(x->(d in x), neighbor_ranks_mortar)]
-  end
+function init_mpi_neighbor_connectivity(mpi_interfaces, mpi_mortars,
+                                        mesh::ParallelP4estMesh)
+    # Let p4est iterate over all interfaces and call init_neighbor_rank_connectivity_iter_face
+    # to collect connectivity information
+    iter_face_c = cfunction(init_neighbor_rank_connectivity_iter_face, Val(ndims(mesh)))
+    user_data = InitNeighborRankConnectivityIterFaceUserData(mpi_interfaces,
+                                                             mpi_mortars, mesh)
+
+    iterate_p4est(mesh.p4est, user_data; ghost_layer = mesh.ghost,
+                  iter_face_c = iter_face_c)
+
+    # Build proper connectivity data structures from information gathered by iterating over p4est
+    @unpack global_interface_ids, neighbor_ranks_interface, global_mortar_ids, neighbor_ranks_mortar = user_data
+
+    mpi_neighbor_ranks = vcat(neighbor_ranks_interface, neighbor_ranks_mortar...) |>
+                         sort |> unique
+
+    p = sortperm(global_interface_ids)
+    neighbor_ranks_interface .= neighbor_ranks_interface[p]
+    interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p]
+
+    p = sortperm(global_mortar_ids)
+    neighbor_ranks_mortar .= neighbor_ranks_mortar[p]
+    mortar_ids = collect(1:nmpimortars(mpi_mortars))[p]
+
+    # For each neighbor rank, init connectivity data structures
+    mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks))
+    mpi_neighbor_mortars = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks))
+    for (index, d) in enumerate(mpi_neighbor_ranks)
+        mpi_neighbor_interfaces[index] = interface_ids[findall(==(d),
+                                                               neighbor_ranks_interface)]
+        mpi_neighbor_mortars[index] = mortar_ids[findall(x -> (d in x),
+                                                         neighbor_ranks_mortar)]
+    end
 
-  # Check that all interfaces were counted exactly once
-  @assert mapreduce(length, +, mpi_neighbor_interfaces; init=0) == nmpiinterfaces(mpi_interfaces)
+    # Check that all interfaces were counted exactly once
+    @assert mapreduce(length, +, mpi_neighbor_interfaces; init = 0) ==
+            nmpiinterfaces(mpi_interfaces)
 
-  return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars
+    return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars
 end
 
-mutable struct InitNeighborRankConnectivityIterFaceUserData{MPIInterfaces, MPIMortars, Mesh}
-  interfaces::MPIInterfaces
-  interface_id::Int
-  global_interface_ids::Vector{Int}
-  neighbor_ranks_interface::Vector{Int}
-  mortars::MPIMortars
-  mortar_id::Int
-  global_mortar_ids::Vector{Int}
-  neighbor_ranks_mortar::Vector{Vector{Int}}
-  mesh::Mesh
+mutable struct InitNeighborRankConnectivityIterFaceUserData{MPIInterfaces, MPIMortars,
+                                                            Mesh}
+    interfaces::MPIInterfaces
+    interface_id::Int
+    global_interface_ids::Vector{Int}
+    neighbor_ranks_interface::Vector{Int}
+    mortars::MPIMortars
+    mortar_id::Int
+    global_mortar_ids::Vector{Int}
+    neighbor_ranks_mortar::Vector{Vector{Int}}
+    mesh::Mesh
 end
 
 function InitNeighborRankConnectivityIterFaceUserData(mpi_interfaces, mpi_mortars, mesh)
-  global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces))
-  neighbor_ranks_interface = fill(-1, nmpiinterfaces(mpi_interfaces))
-  global_mortar_ids = fill(-1, nmpimortars(mpi_mortars))
-  neighbor_ranks_mortar = Vector{Vector{Int}}(undef, nmpimortars(mpi_mortars))
-
-  return InitNeighborRankConnectivityIterFaceUserData{
-    typeof(mpi_interfaces), typeof(mpi_mortars), typeof(mesh)}(
-      mpi_interfaces, 1, global_interface_ids, neighbor_ranks_interface,
-      mpi_mortars, 1, global_mortar_ids, neighbor_ranks_mortar,
-      mesh)
+    global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces))
+    neighbor_ranks_interface = fill(-1, nmpiinterfaces(mpi_interfaces))
+    global_mortar_ids = fill(-1, nmpimortars(mpi_mortars))
+    neighbor_ranks_mortar = Vector{Vector{Int}}(undef, nmpimortars(mpi_mortars))
+
+    return InitNeighborRankConnectivityIterFaceUserData{
+                                                        typeof(mpi_interfaces),
+                                                        typeof(mpi_mortars),
+                                                        typeof(mesh)}(mpi_interfaces, 1,
+                                                                      global_interface_ids,
+                                                                      neighbor_ranks_interface,
+                                                                      mpi_mortars, 1,
+                                                                      global_mortar_ids,
+                                                                      neighbor_ranks_mortar,
+                                                                      mesh)
 end
 
 function init_neighbor_rank_connectivity_iter_face(info, user_data)
-  data = unsafe_pointer_to_objref(Ptr{InitNeighborRankConnectivityIterFaceUserData}(user_data))
+    data = unsafe_pointer_to_objref(Ptr{InitNeighborRankConnectivityIterFaceUserData}(user_data))
 
-  # Function barrier because the unpacked user_data above is not type-stable
-  init_neighbor_rank_connectivity_iter_face_inner(info, data)
+    # Function barrier because the unpacked user_data above is not type-stable
+    init_neighbor_rank_connectivity_iter_face_inner(info, data)
 end
 
 # 2D
-cfunction(::typeof(init_neighbor_rank_connectivity_iter_face), ::Val{2}) = @cfunction(init_neighbor_rank_connectivity_iter_face, Cvoid, (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(init_neighbor_rank_connectivity_iter_face), ::Val{2})
+    @cfunction(init_neighbor_rank_connectivity_iter_face, Cvoid,
+               (Ptr{p4est_iter_face_info_t}, Ptr{Cvoid}))
+end
 # 3D
-cfunction(::typeof(init_neighbor_rank_connectivity_iter_face), ::Val{3}) = @cfunction(init_neighbor_rank_connectivity_iter_face, Cvoid, (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid}))
+function cfunction(::typeof(init_neighbor_rank_connectivity_iter_face), ::Val{3})
+    @cfunction(init_neighbor_rank_connectivity_iter_face, Cvoid,
+               (Ptr{p8est_iter_face_info_t}, Ptr{Cvoid}))
+end
 
 # Function barrier for type stability
 function init_neighbor_rank_connectivity_iter_face_inner(info, user_data)
-  @unpack interfaces, interface_id, global_interface_ids, neighbor_ranks_interface,
-          mortars, mortar_id, global_mortar_ids, neighbor_ranks_mortar, mesh = user_data
-
-  # Get the global interface/mortar ids and neighbor rank if current face belongs to an MPI
-  # interface/mortar
-  if unsafe_load(info).sides.elem_count == 2 # MPI interfaces/mortars have two neighboring elements
-    # Extract surface data
-    sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
-
-    if sides[1].is_hanging == false && sides[2].is_hanging == false # No hanging nodes for MPI interfaces
-      if sides[1].is.full.is_ghost == true
-        remote_side = 1
-        local_side = 2
-      elseif sides[2].is.full.is_ghost == true
-        remote_side = 2
-        local_side = 1
-      else # both sides are on this rank -> skip since it's a regular interface
-        return nothing
-      end
-
-      # Sanity check, current face should belong to current MPI interface
-      local_tree = unsafe_load_tree(mesh.p4est, sides[local_side].treeid + 1) # one-based indexing
-      local_quad_id = local_tree.quadrants_offset + sides[local_side].is.full.quadid
-      @assert interfaces.local_neighbor_ids[interface_id] == local_quad_id + 1 # one-based indexing
-
-      # Get neighbor ID from ghost layer
-      proc_offsets = unsafe_wrap(Array,
-                                 unsafe_load(unsafe_load(info).ghost_layer).proc_offsets,
-                                 mpi_nranks() + 1)
-      ghost_id = sides[remote_side].is.full.quadid # indexes the ghost layer, 0-based
-      neighbor_rank = findfirst(r -> proc_offsets[r] <= ghost_id < proc_offsets[r+1],
-                                1:mpi_nranks()) - 1 # MPI ranks are 0-based
-      neighbor_ranks_interface[interface_id] = neighbor_rank
-
-      # Global interface id is the globally unique quadrant id of the quadrant on the primary
-      # side (1) multiplied by the number of faces per quadrant plus face
-      if local_side == 1
-        offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, mpi_rank() + 1) # one-based indexing
-        primary_quad_id = offset + local_quad_id
-      else
-        offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, neighbor_rank + 1) # one-based indexing
-        primary_quad_id = offset + unsafe_load(sides[1].is.full.quad.p.piggy3.local_num)
-      end
-      global_interface_id = 2 * ndims(mesh) * primary_quad_id + sides[1].face
-      global_interface_ids[interface_id] = global_interface_id
-
-      user_data.interface_id += 1
-    else # hanging node
-      if sides[1].is_hanging == true
-        hanging_side = 1
-        full_side = 2
-      else
-        hanging_side = 2
-        full_side = 1
-      end
-      # Verify before accessing is.full / is.hanging
-      @assert sides[hanging_side].is_hanging == true && sides[full_side].is_hanging == false
-
-      # If all quadrants are locally available, this is a regular mortar -> skip
-      if sides[full_side].is.full.is_ghost == false && all(sides[hanging_side].is.hanging.is_ghost .== false)
-        return nothing
-      end
-
-      trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1),
-               unsafe_load_tree(mesh.p4est, sides[2].treeid + 1))
-
-      # Find small quads that are remote and determine which rank owns them
-      remote_small_quad_positions = findall(sides[hanging_side].is.hanging.is_ghost .== true)
-      proc_offsets = unsafe_wrap(Array,
-                                 unsafe_load(unsafe_load(info).ghost_layer).proc_offsets,
-                                 mpi_nranks() + 1)
-      # indices of small remote quads inside the ghost layer, 0-based
-      ghost_ids = map(pos -> sides[hanging_side].is.hanging.quadid[pos], remote_small_quad_positions)
-      neighbor_ranks = map(ghost_ids) do ghost_id
-        return findfirst(r -> proc_offsets[r] <= ghost_id < proc_offsets[r+1],
-                         1:mpi_nranks()) - 1 # MPI ranks are 0-based
-      end
-      # Determine global quad id of large element to determine global MPI mortar id
-      # Furthermore, if large element is ghost, add its owner rank to neighbor_ranks
-      if sides[full_side].is.full.is_ghost == true
-        ghost_id = sides[full_side].is.full.quadid
-        large_quad_owner_rank = findfirst(r -> proc_offsets[r] <= ghost_id < proc_offsets[r+1],
-                                          1:mpi_nranks()) - 1 # MPI ranks are 0-based
-        push!(neighbor_ranks, large_quad_owner_rank)
-
-        offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, large_quad_owner_rank + 1) # one-based indexing
-        large_quad_id = offset + unsafe_load(sides[full_side].is.full.quad.p.piggy3.local_num)
-      else
-        offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant, mpi_rank() + 1) # one-based indexing
-        large_quad_id = offset + trees[full_side].quadrants_offset + sides[full_side].is.full.quadid
-      end
-      neighbor_ranks_mortar[mortar_id] = neighbor_ranks
-      # Global mortar id is the globally unique quadrant id of the large quadrant multiplied by the
-      # number of faces per quadrant plus face
-      global_mortar_ids[mortar_id] = 2 * ndims(mesh) * large_quad_id + sides[full_side].face
-
-      user_data.mortar_id += 1
+    @unpack interfaces, interface_id, global_interface_ids, neighbor_ranks_interface,
+    mortars, mortar_id, global_mortar_ids, neighbor_ranks_mortar, mesh = user_data
+
+    # Get the global interface/mortar ids and neighbor rank if current face belongs to an MPI
+    # interface/mortar
+    if unsafe_load(info).sides.elem_count == 2 # MPI interfaces/mortars have two neighboring elements
+        # Extract surface data
+        sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
+
+        if sides[1].is_hanging == false && sides[2].is_hanging == false # No hanging nodes for MPI interfaces
+            if sides[1].is.full.is_ghost == true
+                remote_side = 1
+                local_side = 2
+            elseif sides[2].is.full.is_ghost == true
+                remote_side = 2
+                local_side = 1
+            else # both sides are on this rank -> skip since it's a regular interface
+                return nothing
+            end
+
+            # Sanity check, current face should belong to current MPI interface
+            local_tree = unsafe_load_tree(mesh.p4est, sides[local_side].treeid + 1) # one-based indexing
+            local_quad_id = local_tree.quadrants_offset +
+                            sides[local_side].is.full.quadid
+            @assert interfaces.local_neighbor_ids[interface_id] == local_quad_id + 1 # one-based indexing
+
+            # Get neighbor ID from ghost layer
+            proc_offsets = unsafe_wrap(Array,
+                                       unsafe_load(unsafe_load(info).ghost_layer).proc_offsets,
+                                       mpi_nranks() + 1)
+            ghost_id = sides[remote_side].is.full.quadid # indexes the ghost layer, 0-based
+            neighbor_rank = findfirst(r -> proc_offsets[r] <= ghost_id <
+                                           proc_offsets[r + 1],
+                                      1:mpi_nranks()) - 1 # MPI ranks are 0-based
+            neighbor_ranks_interface[interface_id] = neighbor_rank
+
+            # Global interface id is the globally unique quadrant id of the quadrant on the primary
+            # side (1) multiplied by the number of faces per quadrant plus face
+            if local_side == 1
+                offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant,
+                                     mpi_rank() + 1) # one-based indexing
+                primary_quad_id = offset + local_quad_id
+            else
+                offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant,
+                                     neighbor_rank + 1) # one-based indexing
+                primary_quad_id = offset +
+                                  unsafe_load(sides[1].is.full.quad.p.piggy3.local_num)
+            end
+            global_interface_id = 2 * ndims(mesh) * primary_quad_id + sides[1].face
+            global_interface_ids[interface_id] = global_interface_id
+
+            user_data.interface_id += 1
+        else # hanging node
+            if sides[1].is_hanging == true
+                hanging_side = 1
+                full_side = 2
+            else
+                hanging_side = 2
+                full_side = 1
+            end
+            # Verify before accessing is.full / is.hanging
+            @assert sides[hanging_side].is_hanging == true &&
+                    sides[full_side].is_hanging == false
+
+            # If all quadrants are locally available, this is a regular mortar -> skip
+            if sides[full_side].is.full.is_ghost == false &&
+               all(sides[hanging_side].is.hanging.is_ghost .== false)
+                return nothing
+            end
+
+            trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1),
+                     unsafe_load_tree(mesh.p4est, sides[2].treeid + 1))
+
+            # Find small quads that are remote and determine which rank owns them
+            remote_small_quad_positions = findall(sides[hanging_side].is.hanging.is_ghost .==
+                                                  true)
+            proc_offsets = unsafe_wrap(Array,
+                                       unsafe_load(unsafe_load(info).ghost_layer).proc_offsets,
+                                       mpi_nranks() + 1)
+            # indices of small remote quads inside the ghost layer, 0-based
+            ghost_ids = map(pos -> sides[hanging_side].is.hanging.quadid[pos],
+                            remote_small_quad_positions)
+            neighbor_ranks = map(ghost_ids) do ghost_id
+                return findfirst(r -> proc_offsets[r] <= ghost_id < proc_offsets[r + 1],
+                                 1:mpi_nranks()) - 1 # MPI ranks are 0-based
+            end
+            # Determine global quad id of large element to determine global MPI mortar id
+            # Furthermore, if large element is ghost, add its owner rank to neighbor_ranks
+            if sides[full_side].is.full.is_ghost == true
+                ghost_id = sides[full_side].is.full.quadid
+                large_quad_owner_rank = findfirst(r -> proc_offsets[r] <= ghost_id <
+                                                       proc_offsets[r + 1],
+                                                  1:mpi_nranks()) - 1 # MPI ranks are 0-based
+                push!(neighbor_ranks, large_quad_owner_rank)
+
+                offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant,
+                                     large_quad_owner_rank + 1) # one-based indexing
+                large_quad_id = offset +
+                                unsafe_load(sides[full_side].is.full.quad.p.piggy3.local_num)
+            else
+                offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant,
+                                     mpi_rank() + 1) # one-based indexing
+                large_quad_id = offset + trees[full_side].quadrants_offset +
+                                sides[full_side].is.full.quadid
+            end
+            neighbor_ranks_mortar[mortar_id] = neighbor_ranks
+            # Global mortar id is the globally unique quadrant id of the large quadrant multiplied by the
+            # number of faces per quadrant plus face
+            global_mortar_ids[mortar_id] = 2 * ndims(mesh) * large_quad_id +
+                                           sides[full_side].face
+
+            user_data.mortar_id += 1
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Exchange normal directions of small elements of the MPI mortars. They are needed on all involved
 # MPI ranks to calculate the mortar fluxes.
-function exchange_normal_directions!(mpi_mortars, mpi_cache, mesh::ParallelP4estMesh, n_nodes)
-  RealT = real(mesh)
-  n_dims = ndims(mesh)
-  @unpack mpi_neighbor_mortars, mpi_neighbor_ranks = mpi_cache
-  n_small_elements = 2^(n_dims-1)
-  data_size = n_nodes^(n_dims - 1) * n_dims
-
-  # Create buffers and requests
-  send_buffers = Vector{Vector{RealT}}(undef, length(mpi_neighbor_mortars))
-  recv_buffers = Vector{Vector{RealT}}(undef, length(mpi_neighbor_mortars))
-  for index in 1:length(mpi_neighbor_mortars)
-    send_buffers[index] = Vector{RealT}(undef, length(mpi_neighbor_mortars[index]) * n_small_elements * data_size)
-    send_buffers[index] .= NaN |> RealT
-    recv_buffers[index] = Vector{RealT}(undef, length(mpi_neighbor_mortars[index]) * n_small_elements * data_size)
-    recv_buffers[index] .= NaN |> RealT
-  end
-  send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_mortars))
-  recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_mortars))
-
-  # Fill send buffers
-  for d in 1:length(mpi_neighbor_ranks)
-    send_buffer = send_buffers[d]
-
-    for (index, mortar) in enumerate(mpi_neighbor_mortars[d])
-      index_base = (index - 1) * n_small_elements * data_size
-      indices = buffer_mortar_indices(mesh, index_base, data_size)
-      for position in mpi_mortars.local_neighbor_positions[mortar]
-        if position <= n_small_elements # element is small
-          first, last = indices[position]
-          @views send_buffer[first:last] .= vec(mpi_mortars.normal_directions[:, .., position, mortar])
-        end
-      end
+function exchange_normal_directions!(mpi_mortars, mpi_cache, mesh::ParallelP4estMesh,
+                                     n_nodes)
+    RealT = real(mesh)
+    n_dims = ndims(mesh)
+    @unpack mpi_neighbor_mortars, mpi_neighbor_ranks = mpi_cache
+    n_small_elements = 2^(n_dims - 1)
+    data_size = n_nodes^(n_dims - 1) * n_dims
+
+    # Create buffers and requests
+    send_buffers = Vector{Vector{RealT}}(undef, length(mpi_neighbor_mortars))
+    recv_buffers = Vector{Vector{RealT}}(undef, length(mpi_neighbor_mortars))
+    for index in 1:length(mpi_neighbor_mortars)
+        send_buffers[index] = Vector{RealT}(undef,
+                                            length(mpi_neighbor_mortars[index]) *
+                                            n_small_elements * data_size)
+        send_buffers[index] .= NaN |> RealT
+        recv_buffers[index] = Vector{RealT}(undef,
+                                            length(mpi_neighbor_mortars[index]) *
+                                            n_small_elements * data_size)
+        recv_buffers[index] .= NaN |> RealT
     end
-  end
-
-  # Start data exchange
-  for (index, d) in enumerate(mpi_neighbor_ranks)
-    send_requests[index] = MPI.Isend(send_buffers[index], d, mpi_rank(), mpi_comm())
-    recv_requests[index] = MPI.Irecv!(recv_buffers[index], d, d, mpi_comm())
-  end
-
-  # Unpack data from receive buffers
-  d = MPI.Waitany(recv_requests)
-  while d !== nothing
-    recv_buffer = recv_buffers[d]
-
-    for (index, mortar) in enumerate(mpi_neighbor_mortars[d])
-      index_base = (index - 1) * n_small_elements * data_size
-      indices = buffer_mortar_indices(mesh, index_base, data_size)
-      for position in 1:n_small_elements
-        # Skip if received data for `position` is NaN as no real data has been sent for the
-        # corresponding element
-        if isnan(recv_buffer[Base.first(indices[position])])
-          continue
+    send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_mortars))
+    recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_mortars))
+
+    # Fill send buffers
+    for d in 1:length(mpi_neighbor_ranks)
+        send_buffer = send_buffers[d]
+
+        for (index, mortar) in enumerate(mpi_neighbor_mortars[d])
+            index_base = (index - 1) * n_small_elements * data_size
+            indices = buffer_mortar_indices(mesh, index_base, data_size)
+            for position in mpi_mortars.local_neighbor_positions[mortar]
+                if position <= n_small_elements # element is small
+                    first, last = indices[position]
+                    @views send_buffer[first:last] .= vec(mpi_mortars.normal_directions[:,
+                                                                                        ..,
+                                                                                        position,
+                                                                                        mortar])
+                end
+            end
         end
+    end
 
-        first, last = indices[position]
-        @views vec(mpi_mortars.normal_directions[:, .., position, mortar]) .= recv_buffer[first:last]
-      end
+    # Start data exchange
+    for (index, d) in enumerate(mpi_neighbor_ranks)
+        send_requests[index] = MPI.Isend(send_buffers[index], d, mpi_rank(), mpi_comm())
+        recv_requests[index] = MPI.Irecv!(recv_buffers[index], d, d, mpi_comm())
     end
 
+    # Unpack data from receive buffers
     d = MPI.Waitany(recv_requests)
-  end
+    while d !== nothing
+        recv_buffer = recv_buffers[d]
+
+        for (index, mortar) in enumerate(mpi_neighbor_mortars[d])
+            index_base = (index - 1) * n_small_elements * data_size
+            indices = buffer_mortar_indices(mesh, index_base, data_size)
+            for position in 1:n_small_elements
+                # Skip if received data for `position` is NaN as no real data has been sent for the
+                # corresponding element
+                if isnan(recv_buffer[Base.first(indices[position])])
+                    continue
+                end
+
+                first, last = indices[position]
+                @views vec(mpi_mortars.normal_directions[:, .., position, mortar]) .= recv_buffer[first:last]
+            end
+        end
+
+        d = MPI.Waitany(recv_requests)
+    end
 
-  # Wait for communication to finish
-  MPI.Waitall(send_requests, MPI.Status)
+    # Wait for communication to finish
+    MPI.Waitall(send_requests, MPI.Status)
 
-  return nothing
+    return nothing
 end
 
-
 # Get normal direction of MPI mortar
 @inline function get_normal_direction(mpi_mortars::P4estMPIMortarContainer, indices...)
-  SVector(ntuple(@inline(dim -> mpi_mortars.normal_directions[dim, indices...]),
-                 Val(ndims(mpi_mortars))))
+    SVector(ntuple(@inline(dim->mpi_mortars.normal_directions[dim, indices...]),
+                   Val(ndims(mpi_mortars))))
 end
 
-
 include("dg_2d_parallel.jl")
 include("dg_3d_parallel.jl")
-
-
-end # muladd
\ No newline at end of file
+end # muladd
diff --git a/src/solvers/dgsem_structured/containers.jl b/src/solvers/dgsem_structured/containers.jl
index a44f2b3c88c..41eabf7c6bf 100644
--- a/src/solvers/dgsem_structured/containers.jl
+++ b/src/solvers/dgsem_structured/containers.jl
@@ -3,56 +3,67 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
-
-struct ElementContainer{NDIMS, RealT<:Real, uEltype<:Real, NDIMSP1, NDIMSP2, NDIMSP3}
-  # Physical coordinates at each node
-  node_coordinates      ::Array{RealT, NDIMSP2}   # [orientation, node_i, node_j, node_k, element]
-  # ID of neighbor element in negative direction in orientation
-  left_neighbors        ::Array{Int, 2}           # [orientation, elements]
-  # Jacobian matrix of the transformation
-  # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,...
-  jacobian_matrix       ::Array{RealT, NDIMSP3}
-  # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension)
-  contravariant_vectors ::Array{RealT, NDIMSP3}   # [dimension, index, node_i, node_j, node_k, element]
-  # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix)
-  inverse_jacobian      ::Array{RealT, NDIMSP1}   # [node_i, node_j, node_k, element]
-  # Buffer for calculated surface flux
-  surface_flux_values   ::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element]
+struct ElementContainer{NDIMS, RealT <: Real, uEltype <: Real, NDIMSP1, NDIMSP2, NDIMSP3
+                        }
+    # Physical coordinates at each node
+    node_coordinates::Array{RealT, NDIMSP2}   # [orientation, node_i, node_j, node_k, element]
+    # ID of neighbor element in negative direction in orientation
+    left_neighbors::Array{Int, 2}           # [orientation, elements]
+    # Jacobian matrix of the transformation
+    # [jacobian_i, jacobian_j, node_i, node_j, node_k, element] where jacobian_i is the first index of the Jacobian matrix,...
+    jacobian_matrix::Array{RealT, NDIMSP3}
+    # Contravariant vectors, scaled by J, in Kopriva's blue book called Ja^i_n (i index, n dimension)
+    contravariant_vectors::Array{RealT, NDIMSP3}   # [dimension, index, node_i, node_j, node_k, element]
+    # 1/J where J is the Jacobian determinant (determinant of Jacobian matrix)
+    inverse_jacobian::Array{RealT, NDIMSP1}   # [node_i, node_j, node_k, element]
+    # Buffer for calculated surface flux
+    surface_flux_values::Array{uEltype, NDIMSP2} # [variable, i, j, direction, element]
 end
 
-
 # Create element container and initialize element data
 function init_elements(mesh::StructuredMesh{NDIMS, RealT},
                        equations::AbstractEquations,
-                       basis, ::Type{uEltype}) where {NDIMS, RealT<:Real, uEltype<:Real}
+                       basis,
+                       ::Type{uEltype}) where {NDIMS, RealT <: Real, uEltype <: Real}
+    nelements = prod(size(mesh))
+    node_coordinates = Array{RealT, NDIMS + 2}(undef, NDIMS,
+                                               ntuple(_ -> nnodes(basis), NDIMS)...,
+                                               nelements)
+    left_neighbors = Array{Int, 2}(undef, NDIMS, nelements)
+    jacobian_matrix = Array{RealT, NDIMS + 3}(undef, NDIMS, NDIMS,
+                                              ntuple(_ -> nnodes(basis), NDIMS)...,
+                                              nelements)
+    contravariant_vectors = similar(jacobian_matrix)
+    inverse_jacobian = Array{RealT, NDIMS + 1}(undef,
+                                               ntuple(_ -> nnodes(basis), NDIMS)...,
+                                               nelements)
+    surface_flux_values = Array{uEltype, NDIMS + 2}(undef, nvariables(equations),
+                                                    ntuple(_ -> nnodes(basis),
+                                                           NDIMS - 1)..., NDIMS * 2,
+                                                    nelements)
 
-  nelements = prod(size(mesh))
-  node_coordinates      = Array{RealT, NDIMS+2}(undef, NDIMS, ntuple(_ -> nnodes(basis), NDIMS)..., nelements)
-  left_neighbors        = Array{Int, 2}(undef, NDIMS, nelements)
-  jacobian_matrix       = Array{RealT, NDIMS+3}(undef, NDIMS, NDIMS, ntuple(_ -> nnodes(basis), NDIMS)..., nelements)
-  contravariant_vectors = similar(jacobian_matrix)
-  inverse_jacobian      = Array{RealT, NDIMS+1}(undef, ntuple(_ -> nnodes(basis), NDIMS)..., nelements)
-  surface_flux_values   = Array{uEltype, NDIMS+2}(undef, nvariables(equations),
-                                                  ntuple(_ -> nnodes(basis), NDIMS-1)..., NDIMS*2, nelements)
+    elements = ElementContainer{NDIMS, RealT, uEltype, NDIMS + 1, NDIMS + 2, NDIMS + 3}(node_coordinates,
+                                                                                        left_neighbors,
+                                                                                        jacobian_matrix,
+                                                                                        contravariant_vectors,
+                                                                                        inverse_jacobian,
+                                                                                        surface_flux_values)
 
-  elements = ElementContainer{NDIMS, RealT, uEltype, NDIMS+1, NDIMS+2, NDIMS+3}(
-      node_coordinates, left_neighbors, jacobian_matrix, contravariant_vectors,
-      inverse_jacobian, surface_flux_values)
-
-  init_elements!(elements, mesh, basis)
-  return elements
+    init_elements!(elements, mesh, basis)
+    return elements
 end
 
 @inline nelements(elements::ElementContainer) = size(elements.left_neighbors, 2)
-@inline Base.ndims(::ElementContainer{NDIMS}) where NDIMS = NDIMS
-
-Base.eltype(::ElementContainer{NDIMS, RealT, uEltype}) where {NDIMS, RealT, uEltype} = uEltype
+@inline Base.ndims(::ElementContainer{NDIMS}) where {NDIMS} = NDIMS
 
+function Base.eltype(::ElementContainer{NDIMS, RealT, uEltype}) where {NDIMS, RealT,
+                                                                       uEltype}
+    uEltype
+end
 
 include("containers_1d.jl")
 include("containers_2d.jl")
 include("containers_3d.jl")
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_structured/containers_1d.jl b/src/solvers/dgsem_structured/containers_1d.jl
index 97955dcec30..1a1bb183cb3 100644
--- a/src/solvers/dgsem_structured/containers_1d.jl
+++ b/src/solvers/dgsem_structured/containers_1d.jl
@@ -3,84 +3,83 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Initialize data structures in element container
 function init_elements!(elements, mesh::StructuredMesh{1}, basis::LobattoLegendreBasis)
-  @unpack node_coordinates, left_neighbors,
-          jacobian_matrix, contravariant_vectors, inverse_jacobian = elements
+    @unpack node_coordinates, left_neighbors,
+    jacobian_matrix, contravariant_vectors, inverse_jacobian = elements
 
-  # Calculate node coordinates, Jacobian matrix, and inverse Jacobian determinant
-  for cell_x in 1:size(mesh, 1)
-    calc_node_coordinates!(node_coordinates, cell_x, mesh.mapping, mesh, basis)
+    # Calculate node coordinates, Jacobian matrix, and inverse Jacobian determinant
+    for cell_x in 1:size(mesh, 1)
+        calc_node_coordinates!(node_coordinates, cell_x, mesh.mapping, mesh, basis)
 
-    calc_jacobian_matrix!(jacobian_matrix, cell_x, node_coordinates, basis)
+        calc_jacobian_matrix!(jacobian_matrix, cell_x, node_coordinates, basis)
 
-    calc_inverse_jacobian!(inverse_jacobian, cell_x, jacobian_matrix)
-  end
+        calc_inverse_jacobian!(inverse_jacobian, cell_x, jacobian_matrix)
+    end
 
-  # Contravariant vectors don't make sense in 1D, they would be identical to inverse_jacobian
-  fill!(contravariant_vectors, NaN)
+    # Contravariant vectors don't make sense in 1D, they would be identical to inverse_jacobian
+    fill!(contravariant_vectors, NaN)
 
-  initialize_left_neighbor_connectivity!(left_neighbors, mesh)
+    initialize_left_neighbor_connectivity!(left_neighbors, mesh)
 
-  return nothing
+    return nothing
 end
 
-
 # Calculate physical coordinates to which every node of the reference element is mapped
 # `mesh.mapping` is passed as an additional argument for type stability (function barrier)
-function calc_node_coordinates!(node_coordinates, cell_x, mapping, mesh::StructuredMesh{1},
+function calc_node_coordinates!(node_coordinates, cell_x, mapping,
+                                mesh::StructuredMesh{1},
                                 basis::LobattoLegendreBasis)
-  @unpack nodes = basis
+    @unpack nodes = basis
 
-  # Get cell length in reference mesh
-  dx = 2 / size(mesh, 1)
+    # Get cell length in reference mesh
+    dx = 2 / size(mesh, 1)
 
-  # Calculate node coordinates of reference mesh
-  cell_x_offset = -1 + (cell_x-1) * dx + dx/2
+    # Calculate node coordinates of reference mesh
+    cell_x_offset = -1 + (cell_x - 1) * dx + dx / 2
 
-  for i in eachnode(basis)
-    # node_coordinates are the mapped reference node_coordinates
-    node_coordinates[1, i, cell_x] = mapping(cell_x_offset + dx/2 * nodes[i])[1]
-  end
+    for i in eachnode(basis)
+        # node_coordinates are the mapped reference node_coordinates
+        node_coordinates[1, i, cell_x] = mapping(cell_x_offset + dx / 2 * nodes[i])[1]
+    end
 end
 
-
 # Calculate Jacobian matrix of the mapping from the reference element to the element in the physical domain
-function calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates::AbstractArray{<:Any,3},
+function calc_jacobian_matrix!(jacobian_matrix, element,
+                               node_coordinates::AbstractArray{<:Any, 3},
                                basis::LobattoLegendreBasis)
-  @views mul!(jacobian_matrix[1, 1, :, element], basis.derivative_matrix, node_coordinates[1, :, element]) # x_ξ
+    @views mul!(jacobian_matrix[1, 1, :, element], basis.derivative_matrix,
+                node_coordinates[1, :, element]) # x_ξ
 
-  return jacobian_matrix
+    return jacobian_matrix
 end
 
-
 # Calculate inverse Jacobian (determinant of Jacobian matrix of the mapping) in each node
-function calc_inverse_jacobian!(inverse_jacobian::AbstractArray{<:Any, 2}, element, jacobian_matrix)
-  @views inverse_jacobian[:, element] .= inv.(jacobian_matrix[1, 1, :, element])
+function calc_inverse_jacobian!(inverse_jacobian::AbstractArray{<:Any, 2}, element,
+                                jacobian_matrix)
+    @views inverse_jacobian[:, element] .= inv.(jacobian_matrix[1, 1, :, element])
 
-  return inverse_jacobian
+    return inverse_jacobian
 end
 
-
 # Save id of left neighbor of every element
 function initialize_left_neighbor_connectivity!(left_neighbors, mesh::StructuredMesh{1})
-  # Neighbors in x-direction
-  # Inner elements
-  for cell_x in 2:size(mesh, 1)
-    left_neighbors[1, cell_x] = cell_x - 1
-  end
-
-  if isperiodic(mesh)
-    # Periodic boundary
-    left_neighbors[1, 1] = size(mesh, 1)
-  else
-    # Use boundary conditions
-    left_neighbors[1, 1] = 0
-  end
-
-  return left_neighbors
+    # Neighbors in x-direction
+    # Inner elements
+    for cell_x in 2:size(mesh, 1)
+        left_neighbors[1, cell_x] = cell_x - 1
+    end
+
+    if isperiodic(mesh)
+        # Periodic boundary
+        left_neighbors[1, 1] = size(mesh, 1)
+    else
+        # Use boundary conditions
+        left_neighbors[1, 1] = 0
+    end
+
+    return left_neighbors
 end
-
 end # @muladd
diff --git a/src/solvers/dgsem_structured/containers_2d.jl b/src/solvers/dgsem_structured/containers_2d.jl
index e2b5aff8b0b..fb6db48e0a5 100644
--- a/src/solvers/dgsem_structured/containers_2d.jl
+++ b/src/solvers/dgsem_structured/containers_2d.jl
@@ -3,174 +3,187 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Initialize data structures in element container
 function init_elements!(elements, mesh::StructuredMesh{2}, basis::LobattoLegendreBasis)
-  @unpack node_coordinates, left_neighbors,
-          jacobian_matrix, contravariant_vectors, inverse_jacobian = elements
+    @unpack node_coordinates, left_neighbors,
+    jacobian_matrix, contravariant_vectors, inverse_jacobian = elements
 
-  linear_indices = LinearIndices(size(mesh))
+    linear_indices = LinearIndices(size(mesh))
 
-  # Calculate node coordinates, Jacobian matrix, and inverse Jacobian determinant
-  for cell_y in 1:size(mesh, 2), cell_x in 1:size(mesh, 1)
-    element = linear_indices[cell_x, cell_y]
+    # Calculate node coordinates, Jacobian matrix, and inverse Jacobian determinant
+    for cell_y in 1:size(mesh, 2), cell_x in 1:size(mesh, 1)
+        element = linear_indices[cell_x, cell_y]
 
-    calc_node_coordinates!(node_coordinates, element, cell_x, cell_y, mesh.mapping, mesh, basis)
+        calc_node_coordinates!(node_coordinates, element, cell_x, cell_y, mesh.mapping,
+                               mesh, basis)
 
-    calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis)
+        calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis)
 
-    calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix)
+        calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix)
 
-    calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix)
-  end
+        calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix)
+    end
 
-  initialize_left_neighbor_connectivity!(left_neighbors, mesh, linear_indices)
+    initialize_left_neighbor_connectivity!(left_neighbors, mesh, linear_indices)
 
-  return nothing
+    return nothing
 end
 
-
 # Calculate physical coordinates to which every node of the reference element is mapped
 # `mesh.mapping` is passed as an additional argument for type stability (function barrier)
 function calc_node_coordinates!(node_coordinates, element,
                                 cell_x, cell_y, mapping,
                                 mesh::StructuredMesh{2},
                                 basis::LobattoLegendreBasis)
-  @unpack nodes = basis
+    @unpack nodes = basis
 
-  # Get cell length in reference mesh
-  dx = 2 / size(mesh, 1)
-  dy = 2 / size(mesh, 2)
+    # Get cell length in reference mesh
+    dx = 2 / size(mesh, 1)
+    dy = 2 / size(mesh, 2)
 
-  # Calculate node coordinates of reference mesh
-  cell_x_offset = -1 + (cell_x-1) * dx + dx/2
-  cell_y_offset = -1 + (cell_y-1) * dy + dy/2
+    # Calculate node coordinates of reference mesh
+    cell_x_offset = -1 + (cell_x - 1) * dx + dx / 2
+    cell_y_offset = -1 + (cell_y - 1) * dy + dy / 2
 
-  for j in eachnode(basis), i in eachnode(basis)
-    # node_coordinates are the mapped reference node_coordinates
-    node_coordinates[:, i, j, element] .= mapping(cell_x_offset + dx/2 * nodes[i],
-                                                  cell_y_offset + dy/2 * nodes[j])
-  end
+    for j in eachnode(basis), i in eachnode(basis)
+        # node_coordinates are the mapped reference node_coordinates
+        node_coordinates[:, i, j, element] .= mapping(cell_x_offset + dx / 2 * nodes[i],
+                                                      cell_y_offset + dy / 2 * nodes[j])
+    end
 end
 
-
 # Calculate Jacobian matrix of the mapping from the reference element to the element in the physical domain
-function calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates::AbstractArray{<:Any, 4}, basis::LobattoLegendreBasis)
-  @unpack derivative_matrix = basis
-
-  # The code below is equivalent to the following matrix multiplications, which
-  # seem to end up calling generic linear algebra code from Julia. Thus, the
-  # optimized code below using `@turbo` is much faster.
-  # jacobian_matrix[1, 1, :, :, element] = derivative_matrix * node_coordinates[1, :, :, element]  # x_ξ
-  # jacobian_matrix[2, 1, :, :, element] = derivative_matrix * node_coordinates[2, :, :, element]  # y_ξ
-  # jacobian_matrix[1, 2, :, :, element] = node_coordinates[1, :, :, element] * derivative_matrix' # x_η
-  # jacobian_matrix[2, 2, :, :, element] = node_coordinates[2, :, :, element] * derivative_matrix' # y_η
-
-  # x_ξ, y_ξ
-  @turbo for xy in indices((jacobian_matrix, node_coordinates), (1, 1))
-    for j in indices((jacobian_matrix, node_coordinates), (4, 3)), i in indices((jacobian_matrix, derivative_matrix), (3, 1))
-      result = zero(eltype(jacobian_matrix))
-      for ii in indices((node_coordinates, derivative_matrix), (2, 2))
-        result += derivative_matrix[i, ii] * node_coordinates[xy, ii, j, element]
-      end
-      jacobian_matrix[xy, 1, i, j, element] = result
+function calc_jacobian_matrix!(jacobian_matrix, element,
+                               node_coordinates::AbstractArray{<:Any, 4},
+                               basis::LobattoLegendreBasis)
+    @unpack derivative_matrix = basis
+
+    # The code below is equivalent to the following matrix multiplications, which
+    # seem to end up calling generic linear algebra code from Julia. Thus, the
+    # optimized code below using `@turbo` is much faster.
+    # jacobian_matrix[1, 1, :, :, element] = derivative_matrix * node_coordinates[1, :, :, element]  # x_ξ
+    # jacobian_matrix[2, 1, :, :, element] = derivative_matrix * node_coordinates[2, :, :, element]  # y_ξ
+    # jacobian_matrix[1, 2, :, :, element] = node_coordinates[1, :, :, element] * derivative_matrix' # x_η
+    # jacobian_matrix[2, 2, :, :, element] = node_coordinates[2, :, :, element] * derivative_matrix' # y_η
+
+    # x_ξ, y_ξ
+    @turbo for xy in indices((jacobian_matrix, node_coordinates), (1, 1))
+        for j in indices((jacobian_matrix, node_coordinates), (4, 3)),
+            i in indices((jacobian_matrix, derivative_matrix), (3, 1))
+
+            result = zero(eltype(jacobian_matrix))
+            for ii in indices((node_coordinates, derivative_matrix), (2, 2))
+                result += derivative_matrix[i, ii] *
+                          node_coordinates[xy, ii, j, element]
+            end
+            jacobian_matrix[xy, 1, i, j, element] = result
+        end
     end
-  end
-
-  # x_η, y_η
-  @turbo for xy in indices((jacobian_matrix, node_coordinates), (1, 1))
-    for j in indices((jacobian_matrix, derivative_matrix), (4, 1)), i in indices((jacobian_matrix, node_coordinates), (3, 2))
-      result = zero(eltype(jacobian_matrix))
-      for jj in indices((node_coordinates, derivative_matrix), (3, 2))
-        result += derivative_matrix[j, jj] * node_coordinates[xy, i, jj, element]
-      end
-      jacobian_matrix[xy, 2, i, j, element] = result
+
+    # x_η, y_η
+    @turbo for xy in indices((jacobian_matrix, node_coordinates), (1, 1))
+        for j in indices((jacobian_matrix, derivative_matrix), (4, 1)),
+            i in indices((jacobian_matrix, node_coordinates), (3, 2))
+
+            result = zero(eltype(jacobian_matrix))
+            for jj in indices((node_coordinates, derivative_matrix), (3, 2))
+                result += derivative_matrix[j, jj] *
+                          node_coordinates[xy, i, jj, element]
+            end
+            jacobian_matrix[xy, 2, i, j, element] = result
+        end
     end
-  end
 
-  return jacobian_matrix
+    return jacobian_matrix
 end
 
-
 # Calculate contravarant vectors, multiplied by the Jacobian determinant J of the transformation mapping.
 # Those are called Ja^i in Kopriva's blue book.
-function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any,5}, element, jacobian_matrix)
-  # The code below is equivalent to the following using broadcasting but much faster.
-  # # First contravariant vector Ja^1
-  # contravariant_vectors[1, 1, :, :, element] =  jacobian_matrix[2, 2, :, :, element]
-  # contravariant_vectors[2, 1, :, :, element] = -jacobian_matrix[1, 2, :, :, element]
-  # # Second contravariant vector Ja^2
-  # contravariant_vectors[1, 2, :, :, element] = -jacobian_matrix[2, 1, :, :, element]
-  # contravariant_vectors[2, 2, :, :, element] =  jacobian_matrix[1, 1, :, :, element]
-
-  @turbo for j in indices((contravariant_vectors, jacobian_matrix), (4, 4)),
-             i in indices((contravariant_vectors, jacobian_matrix), (3, 3))
-    # First contravariant vector Ja^1
-    contravariant_vectors[1, 1, i, j, element] =  jacobian_matrix[2, 2, i, j, element]
-    contravariant_vectors[2, 1, i, j, element] = -jacobian_matrix[1, 2, i, j, element]
-
-    # Second contravariant vector Ja^2
-    contravariant_vectors[1, 2, i, j, element] = -jacobian_matrix[2, 1, i, j, element]
-    contravariant_vectors[2, 2, i, j, element] =  jacobian_matrix[1, 1, i, j, element]
-  end
-
-  return contravariant_vectors
-end
+function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any, 5},
+                                     element, jacobian_matrix)
+    # The code below is equivalent to the following using broadcasting but much faster.
+    # # First contravariant vector Ja^1
+    # contravariant_vectors[1, 1, :, :, element] =  jacobian_matrix[2, 2, :, :, element]
+    # contravariant_vectors[2, 1, :, :, element] = -jacobian_matrix[1, 2, :, :, element]
+    # # Second contravariant vector Ja^2
+    # contravariant_vectors[1, 2, :, :, element] = -jacobian_matrix[2, 1, :, :, element]
+    # contravariant_vectors[2, 2, :, :, element] =  jacobian_matrix[1, 1, :, :, element]
+
+    @turbo for j in indices((contravariant_vectors, jacobian_matrix), (4, 4)),
+               i in indices((contravariant_vectors, jacobian_matrix), (3, 3))
+        # First contravariant vector Ja^1
+        contravariant_vectors[1, 1, i, j, element] = jacobian_matrix[2, 2, i, j,
+                                                                     element]
+        contravariant_vectors[2, 1, i, j, element] = -jacobian_matrix[1, 2, i, j,
+                                                                      element]
+
+        # Second contravariant vector Ja^2
+        contravariant_vectors[1, 2, i, j, element] = -jacobian_matrix[2, 1, i, j,
+                                                                      element]
+        contravariant_vectors[2, 2, i, j, element] = jacobian_matrix[1, 1, i, j,
+                                                                     element]
+    end
 
+    return contravariant_vectors
+end
 
 # Calculate inverse Jacobian (determinant of Jacobian matrix of the mapping) in each node
-function calc_inverse_jacobian!(inverse_jacobian::AbstractArray{<:Any,3}, element, jacobian_matrix)
-  # The code below is equivalent to the following high-level code but much faster.
-  # inverse_jacobian[i, j, element] = inv(det(jacobian_matrix[:, :, i, j, element])
-
-  @turbo for j in indices((inverse_jacobian, jacobian_matrix), (2, 4)),
-             i in indices((inverse_jacobian, jacobian_matrix), (1, 3))
-    inverse_jacobian[i, j, element] = inv(jacobian_matrix[1, 1, i, j, element] * jacobian_matrix[2, 2, i, j, element] -
-                                          jacobian_matrix[1, 2, i, j, element] * jacobian_matrix[2, 1, i, j, element])
-  end
+function calc_inverse_jacobian!(inverse_jacobian::AbstractArray{<:Any, 3}, element,
+                                jacobian_matrix)
+    # The code below is equivalent to the following high-level code but much faster.
+    # inverse_jacobian[i, j, element] = inv(det(jacobian_matrix[:, :, i, j, element])
+
+    @turbo for j in indices((inverse_jacobian, jacobian_matrix), (2, 4)),
+               i in indices((inverse_jacobian, jacobian_matrix), (1, 3))
+
+        inverse_jacobian[i, j, element] = inv(jacobian_matrix[1, 1, i, j, element] *
+                                              jacobian_matrix[2, 2, i, j, element] -
+                                              jacobian_matrix[1, 2, i, j, element] *
+                                              jacobian_matrix[2, 1, i, j, element])
+    end
 
-  return inverse_jacobian
+    return inverse_jacobian
 end
 
-
 # Save id of left neighbor of every element
-function initialize_left_neighbor_connectivity!(left_neighbors, mesh::StructuredMesh{2}, linear_indices)
-  # Neighbors in x-direction
-  for cell_y in 1:size(mesh, 2)
-    # Inner elements
-    for cell_x in 2:size(mesh, 1)
-      element = linear_indices[cell_x, cell_y]
-      left_neighbors[1, element] = linear_indices[cell_x - 1, cell_y]
+function initialize_left_neighbor_connectivity!(left_neighbors, mesh::StructuredMesh{2},
+                                                linear_indices)
+    # Neighbors in x-direction
+    for cell_y in 1:size(mesh, 2)
+        # Inner elements
+        for cell_x in 2:size(mesh, 1)
+            element = linear_indices[cell_x, cell_y]
+            left_neighbors[1, element] = linear_indices[cell_x - 1, cell_y]
+        end
+
+        if isperiodic(mesh, 1)
+            # Periodic boundary
+            left_neighbors[1, linear_indices[1, cell_y]] = linear_indices[end, cell_y]
+        else
+            # Use boundary conditions
+            left_neighbors[1, linear_indices[1, cell_y]] = 0
+        end
     end
 
-    if isperiodic(mesh, 1)
-      # Periodic boundary
-      left_neighbors[1, linear_indices[1, cell_y]] = linear_indices[end, cell_y]
-    else
-      # Use boundary conditions
-      left_neighbors[1, linear_indices[1, cell_y]] = 0
+    # Neighbors in y-direction
+    for cell_x in 1:size(mesh, 1)
+        # Inner elements
+        for cell_y in 2:size(mesh, 2)
+            element = linear_indices[cell_x, cell_y]
+            left_neighbors[2, element] = linear_indices[cell_x, cell_y - 1]
+        end
+
+        if isperiodic(mesh, 2)
+            # Periodic boundary
+            left_neighbors[2, linear_indices[cell_x, 1]] = linear_indices[cell_x, end]
+        else
+            # Use boundary conditions
+            left_neighbors[2, linear_indices[cell_x, 1]] = 0
+        end
     end
-  end
-
-  # Neighbors in y-direction
-  for cell_x in 1:size(mesh, 1)
-    # Inner elements
-    for cell_y in 2:size(mesh, 2)
-      element = linear_indices[cell_x, cell_y]
-      left_neighbors[2, element] = linear_indices[cell_x, cell_y - 1]
-    end
-
-    if isperiodic(mesh, 2)
-      # Periodic boundary
-      left_neighbors[2, linear_indices[cell_x, 1]] = linear_indices[cell_x, end]
-    else
-      # Use boundary conditions
-      left_neighbors[2, linear_indices[cell_x, 1]] = 0
-    end
-  end
 
-  return left_neighbors
+    return left_neighbors
 end
-
 end # @muladd
diff --git a/src/solvers/dgsem_structured/containers_3d.jl b/src/solvers/dgsem_structured/containers_3d.jl
index 1dc1ced4528..e843e869bf5 100644
--- a/src/solvers/dgsem_structured/containers_3d.jl
+++ b/src/solvers/dgsem_structured/containers_3d.jl
@@ -3,288 +3,342 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Initialize data structures in element container
 function init_elements!(elements, mesh::StructuredMesh{3}, basis::LobattoLegendreBasis)
-  @unpack node_coordinates, left_neighbors,
-          jacobian_matrix, contravariant_vectors, inverse_jacobian = elements
+    @unpack node_coordinates, left_neighbors,
+    jacobian_matrix, contravariant_vectors, inverse_jacobian = elements
 
-  linear_indices = LinearIndices(size(mesh))
+    linear_indices = LinearIndices(size(mesh))
 
-  # Calculate node coordinates, Jacobian matrix, and inverse Jacobian determinant
-  for cell_z in 1:size(mesh, 3), cell_y in 1:size(mesh, 2), cell_x in 1:size(mesh, 1)
-    element = linear_indices[cell_x, cell_y, cell_z]
+    # Calculate node coordinates, Jacobian matrix, and inverse Jacobian determinant
+    for cell_z in 1:size(mesh, 3), cell_y in 1:size(mesh, 2), cell_x in 1:size(mesh, 1)
+        element = linear_indices[cell_x, cell_y, cell_z]
 
-    calc_node_coordinates!(node_coordinates, element, cell_x, cell_y, cell_z, mesh.mapping, mesh, basis)
+        calc_node_coordinates!(node_coordinates, element, cell_x, cell_y, cell_z,
+                               mesh.mapping, mesh, basis)
 
-    calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis)
+        calc_jacobian_matrix!(jacobian_matrix, element, node_coordinates, basis)
 
-    calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix, node_coordinates, basis)
+        calc_contravariant_vectors!(contravariant_vectors, element, jacobian_matrix,
+                                    node_coordinates, basis)
 
-    calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix, basis)
-  end
+        calc_inverse_jacobian!(inverse_jacobian, element, jacobian_matrix, basis)
+    end
 
-  initialize_left_neighbor_connectivity!(left_neighbors, mesh, linear_indices)
+    initialize_left_neighbor_connectivity!(left_neighbors, mesh, linear_indices)
 
-  return nothing
+    return nothing
 end
 
-
 # Calculate physical coordinates to which every node of the reference element is mapped
 # `mesh.mapping` is passed as an additional argument for type stability (function barrier)
 function calc_node_coordinates!(node_coordinates, element,
                                 cell_x, cell_y, cell_z,
                                 mapping, mesh::StructuredMesh{3},
                                 basis::LobattoLegendreBasis)
-  @unpack nodes = basis
-
-  # Get cell length in reference mesh
-  dx = 2 / size(mesh, 1)
-  dy = 2 / size(mesh, 2)
-  dz = 2 / size(mesh, 3)
-
-  # Calculate node coordinates of reference mesh
-  cell_x_offset = -1 + (cell_x-1) * dx + dx/2
-  cell_y_offset = -1 + (cell_y-1) * dy + dy/2
-  cell_z_offset = -1 + (cell_z-1) * dz + dz/2
-
-  for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
-    # node_coordinates are the mapped reference node_coordinates
-    node_coordinates[:, i, j, k, element] .= mapping(cell_x_offset + dx/2 * nodes[i],
-                                                     cell_y_offset + dy/2 * nodes[j],
-                                                     cell_z_offset + dz/2 * nodes[k])
-  end
+    @unpack nodes = basis
+
+    # Get cell length in reference mesh
+    dx = 2 / size(mesh, 1)
+    dy = 2 / size(mesh, 2)
+    dz = 2 / size(mesh, 3)
+
+    # Calculate node coordinates of reference mesh
+    cell_x_offset = -1 + (cell_x - 1) * dx + dx / 2
+    cell_y_offset = -1 + (cell_y - 1) * dy + dy / 2
+    cell_z_offset = -1 + (cell_z - 1) * dz + dz / 2
+
+    for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
+        # node_coordinates are the mapped reference node_coordinates
+        node_coordinates[:, i, j, k, element] .= mapping(cell_x_offset +
+                                                         dx / 2 * nodes[i],
+                                                         cell_y_offset +
+                                                         dy / 2 * nodes[j],
+                                                         cell_z_offset +
+                                                         dz / 2 * nodes[k])
+    end
 end
 
-
 # Calculate Jacobian matrix of the mapping from the reference element to the element in the physical domain
-function calc_jacobian_matrix!(jacobian_matrix::AbstractArray{<:Any,6}, element, node_coordinates, basis)
-  # The code below is equivalent to the following matrix multiplications but much faster.
-  #
-  # for dim in 1:3, j in eachnode(basis), i in eachnode(basis)
-  #   # ∂/∂ξ
-  #   jacobian_matrix[dim, 1, :, i, j, element] = basis.derivative_matrix * node_coordinates[dim, :, i, j, element]
-  #   # ∂/∂η
-  #   jacobian_matrix[dim, 2, i, :, j, element] = basis.derivative_matrix * node_coordinates[dim, i, :, j, element]
-  #   # ∂/∂ζ
-  #   jacobian_matrix[dim, 3, i, j, :, element] = basis.derivative_matrix * node_coordinates[dim, i, j, :, element]
-  # end
-
-  @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
-    result = zero(eltype(jacobian_matrix))
-
-    for ii in eachnode(basis)
-      result += basis.derivative_matrix[i, ii] * node_coordinates[dim, ii, j, k, element]
+function calc_jacobian_matrix!(jacobian_matrix::AbstractArray{<:Any, 6}, element,
+                               node_coordinates, basis)
+    # The code below is equivalent to the following matrix multiplications but much faster.
+    #
+    # for dim in 1:3, j in eachnode(basis), i in eachnode(basis)
+    #   # ∂/∂ξ
+    #   jacobian_matrix[dim, 1, :, i, j, element] = basis.derivative_matrix * node_coordinates[dim, :, i, j, element]
+    #   # ∂/∂η
+    #   jacobian_matrix[dim, 2, i, :, j, element] = basis.derivative_matrix * node_coordinates[dim, i, :, j, element]
+    #   # ∂/∂ζ
+    #   jacobian_matrix[dim, 3, i, j, :, element] = basis.derivative_matrix * node_coordinates[dim, i, j, :, element]
+    # end
+
+    @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis),
+               i in eachnode(basis)
+
+        result = zero(eltype(jacobian_matrix))
+
+        for ii in eachnode(basis)
+            result += basis.derivative_matrix[i, ii] *
+                      node_coordinates[dim, ii, j, k, element]
+        end
+
+        jacobian_matrix[dim, 1, i, j, k, element] = result
     end
 
-    jacobian_matrix[dim, 1, i, j, k, element] = result
-  end
+    @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis),
+               i in eachnode(basis)
+
+        result = zero(eltype(jacobian_matrix))
 
-  @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
-    result = zero(eltype(jacobian_matrix))
+        for ii in eachnode(basis)
+            result += basis.derivative_matrix[j, ii] *
+                      node_coordinates[dim, i, ii, k, element]
+        end
 
-    for ii in eachnode(basis)
-      result += basis.derivative_matrix[j, ii] * node_coordinates[dim, i, ii, k, element]
+        jacobian_matrix[dim, 2, i, j, k, element] = result
     end
 
-    jacobian_matrix[dim, 2, i, j, k, element] = result
-  end
+    @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis),
+               i in eachnode(basis)
 
-  @turbo for dim in 1:3, k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
-    result = zero(eltype(jacobian_matrix))
+        result = zero(eltype(jacobian_matrix))
 
-    for ii in eachnode(basis)
-      result += basis.derivative_matrix[k, ii] * node_coordinates[dim, i, j, ii, element]
-    end
+        for ii in eachnode(basis)
+            result += basis.derivative_matrix[k, ii] *
+                      node_coordinates[dim, i, j, ii, element]
+        end
 
-    jacobian_matrix[dim, 3, i, j, k, element] = result
-  end
+        jacobian_matrix[dim, 3, i, j, k, element] = result
+    end
 
-  return jacobian_matrix
+    return jacobian_matrix
 end
 
-
 # Calculate contravariant vectors, multiplied by the Jacobian determinant J of the transformation mapping,
 # using the invariant curl form.
 # These are called Ja^i in Kopriva's blue book.
-function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any,6}, element,
-                                     jacobian_matrix, node_coordinates, basis::LobattoLegendreBasis)
-  @unpack derivative_matrix = basis
-
-  # The general form is
-  # Jaⁱₙ = 0.5 * ( ∇ × (Xₘ ∇ Xₗ - Xₗ ∇ Xₘ) )ᵢ  where (n, m, l) cyclic and ∇ = (∂/∂ξ, ∂/∂η, ∂/∂ζ)ᵀ
-
-  for n in 1:3
-    # (n, m, l) cyclic
-    m = (n % 3) + 1
-    l = ((n + 1) % 3) + 1
-
-    # Calculate Ja¹ₙ = 0.5 * [ (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_η - (Xₘ Xₗ_η - Xₗ Xₘ_η)_ζ ]
-    # For each of these, the first and second summand are computed in separate loops
-    # for performance reasons.
-
-    # First summand 0.5 * (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_η
-    @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
-      result = zero(eltype(contravariant_vectors))
-
-      for ii in eachnode(basis)
-        # Multiply derivative_matrix to j-dimension to differentiate wrt η
-        result += 0.5 * derivative_matrix[j, ii] * (
-          node_coordinates[m, i, ii, k, element] * jacobian_matrix[l, 3, i, ii, k, element] -
-          node_coordinates[l, i, ii, k, element] * jacobian_matrix[m, 3, i, ii, k, element])
-      end
-
-      contravariant_vectors[n, 1, i, j, k, element] = result
+function calc_contravariant_vectors!(contravariant_vectors::AbstractArray{<:Any, 6},
+                                     element,
+                                     jacobian_matrix, node_coordinates,
+                                     basis::LobattoLegendreBasis)
+    @unpack derivative_matrix = basis
+
+    # The general form is
+    # Jaⁱₙ = 0.5 * ( ∇ × (Xₘ ∇ Xₗ - Xₗ ∇ Xₘ) )ᵢ  where (n, m, l) cyclic and ∇ = (∂/∂ξ, ∂/∂η, ∂/∂ζ)ᵀ
+
+    for n in 1:3
+        # (n, m, l) cyclic
+        m = (n % 3) + 1
+        l = ((n + 1) % 3) + 1
+
+        # Calculate Ja¹ₙ = 0.5 * [ (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_η - (Xₘ Xₗ_η - Xₗ Xₘ_η)_ζ ]
+        # For each of these, the first and second summand are computed in separate loops
+        # for performance reasons.
+
+        # First summand 0.5 * (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_η
+        @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
+            result = zero(eltype(contravariant_vectors))
+
+            for ii in eachnode(basis)
+                # Multiply derivative_matrix to j-dimension to differentiate wrt η
+                result += 0.5 * derivative_matrix[j, ii] *
+                          (node_coordinates[m, i, ii, k, element] *
+                           jacobian_matrix[l, 3, i, ii, k, element] -
+                           node_coordinates[l, i, ii, k, element] *
+                           jacobian_matrix[m, 3, i, ii, k, element])
+            end
+
+            contravariant_vectors[n, 1, i, j, k, element] = result
+        end
+
+        # Second summand -0.5 * (Xₘ Xₗ_η - Xₗ Xₘ_η)_ζ
+        @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
+            result = zero(eltype(contravariant_vectors))
+
+            for ii in eachnode(basis)
+                # Multiply derivative_matrix to k-dimension to differentiate wrt ζ
+                result += 0.5 * derivative_matrix[k, ii] *
+                          (node_coordinates[m, i, j, ii, element] *
+                           jacobian_matrix[l, 2, i, j, ii, element] -
+                           node_coordinates[l, i, j, ii, element] *
+                           jacobian_matrix[m, 2, i, j, ii, element])
+            end
+
+            contravariant_vectors[n, 1, i, j, k, element] -= result
+        end
+
+        # Calculate Ja²ₙ = 0.5 * [ (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_ζ - (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_ξ ]
+
+        # First summand 0.5 * (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_ζ
+        @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
+            result = zero(eltype(contravariant_vectors))
+
+            for ii in eachnode(basis)
+                # Multiply derivative_matrix to k-dimension to differentiate wrt ζ
+                result += 0.5 * derivative_matrix[k, ii] *
+                          (node_coordinates[m, i, j, ii, element] *
+                           jacobian_matrix[l, 1, i, j, ii, element] -
+                           node_coordinates[l, i, j, ii, element] *
+                           jacobian_matrix[m, 1, i, j, ii, element])
+            end
+
+            contravariant_vectors[n, 2, i, j, k, element] = result
+        end
+
+        # Second summand -0.5 * (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_ξ
+        @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
+            result = zero(eltype(contravariant_vectors))
+
+            for ii in eachnode(basis)
+                # Multiply derivative_matrix to i-dimension to differentiate wrt ξ
+                result += 0.5 * derivative_matrix[i, ii] *
+                          (node_coordinates[m, ii, j, k, element] *
+                           jacobian_matrix[l, 3, ii, j, k, element] -
+                           node_coordinates[l, ii, j, k, element] *
+                           jacobian_matrix[m, 3, ii, j, k, element])
+            end
+
+            contravariant_vectors[n, 2, i, j, k, element] -= result
+        end
+
+        # Calculate Ja³ₙ = 0.5 * [ (Xₘ Xₗ_η - Xₗ Xₘ_η)_ξ - (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_η ]
+
+        # First summand 0.5 * (Xₘ Xₗ_η - Xₗ Xₘ_η)_ξ
+        @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
+            result = zero(eltype(contravariant_vectors))
+
+            for ii in eachnode(basis)
+                # Multiply derivative_matrix to i-dimension to differentiate wrt ξ
+                result += 0.5 * derivative_matrix[i, ii] *
+                          (node_coordinates[m, ii, j, k, element] *
+                           jacobian_matrix[l, 2, ii, j, k, element] -
+                           node_coordinates[l, ii, j, k, element] *
+                           jacobian_matrix[m, 2, ii, j, k, element])
+            end
+
+            contravariant_vectors[n, 3, i, j, k, element] = result
+        end
+
+        # Second summand -0.5 * (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_η
+        @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
+            result = zero(eltype(contravariant_vectors))
+
+            for ii in eachnode(basis)
+                # Multiply derivative_matrix to j-dimension to differentiate wrt η
+                result += 0.5 * derivative_matrix[j, ii] *
+                          (node_coordinates[m, i, ii, k, element] *
+                           jacobian_matrix[l, 1, i, ii, k, element] -
+                           node_coordinates[l, i, ii, k, element] *
+                           jacobian_matrix[m, 1, i, ii, k, element])
+            end
+
+            contravariant_vectors[n, 3, i, j, k, element] -= result
+        end
     end
 
-    # Second summand -0.5 * (Xₘ Xₗ_η - Xₗ Xₘ_η)_ζ
-    @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
-      result = zero(eltype(contravariant_vectors))
-
-      for ii in eachnode(basis)
-        # Multiply derivative_matrix to k-dimension to differentiate wrt ζ
-        result += 0.5 * derivative_matrix[k, ii] * (
-          node_coordinates[m, i, j, ii, element] * jacobian_matrix[l, 2, i, j, ii, element] -
-          node_coordinates[l, i, j, ii, element] * jacobian_matrix[m, 2, i, j, ii, element])
-      end
-
-      contravariant_vectors[n, 1, i, j, k, element] -= result
-    end
-
-    # Calculate Ja²ₙ = 0.5 * [ (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_ζ - (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_ξ ]
-
-    # First summand 0.5 * (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_ζ
-    @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
-      result = zero(eltype(contravariant_vectors))
-
-      for ii in eachnode(basis)
-        # Multiply derivative_matrix to k-dimension to differentiate wrt ζ
-        result += 0.5 * derivative_matrix[k, ii] * (
-          node_coordinates[m, i, j, ii, element] * jacobian_matrix[l, 1, i, j, ii, element] -
-          node_coordinates[l, i, j, ii, element] * jacobian_matrix[m, 1, i, j, ii, element])
-      end
-
-      contravariant_vectors[n, 2, i, j, k, element] = result
-    end
-
-    # Second summand -0.5 * (Xₘ Xₗ_ζ - Xₗ Xₘ_ζ)_ξ
-    @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
-      result = zero(eltype(contravariant_vectors))
-
-      for ii in eachnode(basis)
-        # Multiply derivative_matrix to i-dimension to differentiate wrt ξ
-        result += 0.5 * derivative_matrix[i, ii] * (
-          node_coordinates[m, ii, j, k, element] * jacobian_matrix[l, 3, ii, j, k, element] -
-          node_coordinates[l, ii, j, k, element] * jacobian_matrix[m, 3, ii, j, k, element])
-      end
-
-      contravariant_vectors[n, 2, i, j, k, element] -= result
-    end
-
-    # Calculate Ja³ₙ = 0.5 * [ (Xₘ Xₗ_η - Xₗ Xₘ_η)_ξ - (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_η ]
+    return contravariant_vectors
+end
 
-    # First summand 0.5 * (Xₘ Xₗ_η - Xₗ Xₘ_η)_ξ
+# Calculate inverse Jacobian (determinant of Jacobian matrix of the mapping) in each node
+function calc_inverse_jacobian!(inverse_jacobian::AbstractArray{<:Any, 4}, element,
+                                jacobian_matrix, basis)
     @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
-      result = zero(eltype(contravariant_vectors))
-
-      for ii in eachnode(basis)
-        # Multiply derivative_matrix to i-dimension to differentiate wrt ξ
-        result += 0.5 * derivative_matrix[i, ii] * (
-          node_coordinates[m, ii, j, k, element] * jacobian_matrix[l, 2, ii, j, k, element] -
-          node_coordinates[l, ii, j, k, element] * jacobian_matrix[m, 2, ii, j, k, element])
-      end
-
-      contravariant_vectors[n, 3, i, j, k, element] = result
+        # Calculate Determinant by using Sarrus formula (about 100 times faster than LinearAlgebra.det())
+        inverse_jacobian[i, j, k, element] = inv(jacobian_matrix[1, 1, i, j, k,
+                                                                 element] *
+                                                 jacobian_matrix[2, 2, i, j, k,
+                                                                 element] *
+                                                 jacobian_matrix[3, 3, i, j, k, element] +
+                                                 jacobian_matrix[1, 2, i, j, k,
+                                                                 element] *
+                                                 jacobian_matrix[2, 3, i, j, k,
+                                                                 element] *
+                                                 jacobian_matrix[3, 1, i, j, k, element] +
+                                                 jacobian_matrix[1, 3, i, j, k,
+                                                                 element] *
+                                                 jacobian_matrix[2, 1, i, j, k,
+                                                                 element] *
+                                                 jacobian_matrix[3, 2, i, j, k, element] -
+                                                 jacobian_matrix[3, 1, i, j, k,
+                                                                 element] *
+                                                 jacobian_matrix[2, 2, i, j, k,
+                                                                 element] *
+                                                 jacobian_matrix[1, 3, i, j, k, element] -
+                                                 jacobian_matrix[3, 2, i, j, k,
+                                                                 element] *
+                                                 jacobian_matrix[2, 3, i, j, k,
+                                                                 element] *
+                                                 jacobian_matrix[1, 1, i, j, k, element] -
+                                                 jacobian_matrix[3, 3, i, j, k,
+                                                                 element] *
+                                                 jacobian_matrix[2, 1, i, j, k,
+                                                                 element] *
+                                                 jacobian_matrix[1, 2, i, j, k, element])
     end
 
-    # Second summand -0.5 * (Xₘ Xₗ_ξ - Xₗ Xₘ_ξ)_η
-    @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
-      result = zero(eltype(contravariant_vectors))
-
-      for ii in eachnode(basis)
-        # Multiply derivative_matrix to j-dimension to differentiate wrt η
-        result += 0.5 * derivative_matrix[j, ii] * (
-          node_coordinates[m, i, ii, k, element] * jacobian_matrix[l, 1, i, ii, k, element] -
-          node_coordinates[l, i, ii, k, element] * jacobian_matrix[m, 1, i, ii, k, element])
-      end
-
-      contravariant_vectors[n, 3, i, j, k, element] -= result
-    end
-  end
-
-  return contravariant_vectors
-end
-
-
-# Calculate inverse Jacobian (determinant of Jacobian matrix of the mapping) in each node
-function calc_inverse_jacobian!(inverse_jacobian::AbstractArray{<:Any, 4}, element, jacobian_matrix, basis)
-  @turbo for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
-    # Calculate Determinant by using Sarrus formula (about 100 times faster than LinearAlgebra.det())
-    inverse_jacobian[i, j, k, element] = inv(
-        jacobian_matrix[1, 1, i, j, k, element] * jacobian_matrix[2, 2, i, j, k, element] * jacobian_matrix[3, 3, i, j, k, element] +
-        jacobian_matrix[1, 2, i, j, k, element] * jacobian_matrix[2, 3, i, j, k, element] * jacobian_matrix[3, 1, i, j, k, element] +
-        jacobian_matrix[1, 3, i, j, k, element] * jacobian_matrix[2, 1, i, j, k, element] * jacobian_matrix[3, 2, i, j, k, element] -
-        jacobian_matrix[3, 1, i, j, k, element] * jacobian_matrix[2, 2, i, j, k, element] * jacobian_matrix[1, 3, i, j, k, element] -
-        jacobian_matrix[3, 2, i, j, k, element] * jacobian_matrix[2, 3, i, j, k, element] * jacobian_matrix[1, 1, i, j, k, element] -
-        jacobian_matrix[3, 3, i, j, k, element] * jacobian_matrix[2, 1, i, j, k, element] * jacobian_matrix[1, 2, i, j, k, element] )
-  end
-
-  return inverse_jacobian
+    return inverse_jacobian
 end
 
-
 # Save id of left neighbor of every element
-function initialize_left_neighbor_connectivity!(left_neighbors, mesh::StructuredMesh{3}, linear_indices)
-  # Neighbors in x-direction
-  for cell_z in 1:size(mesh, 3), cell_y in 1:size(mesh, 2)
-    # Inner elements
-    for cell_x in 2:size(mesh, 1)
-      element = linear_indices[cell_x, cell_y, cell_z]
-      left_neighbors[1, element] = linear_indices[cell_x - 1, cell_y, cell_z]
-    end
-
-    if isperiodic(mesh, 1)
-      # Periodic boundary
-      left_neighbors[1, linear_indices[1, cell_y, cell_z]] = linear_indices[end, cell_y, cell_z]
-    else
-      left_neighbors[1, linear_indices[1, cell_y, cell_z]] = 0
-    end
-  end
-
-  # Neighbors in y-direction
-  for cell_z in 1:size(mesh, 3), cell_x in 1:size(mesh, 1)
-    # Inner elements
-    for cell_y in 2:size(mesh, 2)
-      element = linear_indices[cell_x, cell_y, cell_z]
-      left_neighbors[2, element] = linear_indices[cell_x, cell_y - 1, cell_z]
+function initialize_left_neighbor_connectivity!(left_neighbors, mesh::StructuredMesh{3},
+                                                linear_indices)
+    # Neighbors in x-direction
+    for cell_z in 1:size(mesh, 3), cell_y in 1:size(mesh, 2)
+        # Inner elements
+        for cell_x in 2:size(mesh, 1)
+            element = linear_indices[cell_x, cell_y, cell_z]
+            left_neighbors[1, element] = linear_indices[cell_x - 1, cell_y, cell_z]
+        end
+
+        if isperiodic(mesh, 1)
+            # Periodic boundary
+            left_neighbors[1, linear_indices[1, cell_y, cell_z]] = linear_indices[end,
+                                                                                  cell_y,
+                                                                                  cell_z]
+        else
+            left_neighbors[1, linear_indices[1, cell_y, cell_z]] = 0
+        end
     end
 
-    if isperiodic(mesh, 2)
-      # Periodic boundary
-      left_neighbors[2, linear_indices[cell_x, 1, cell_z]] = linear_indices[cell_x, end, cell_z]
-    else
-      left_neighbors[2, linear_indices[cell_x, 1, cell_z]] = 0
-    end
-  end
-
-  # Neighbors in z-direction
-  for cell_y in 1:size(mesh, 2), cell_x in 1:size(mesh, 1)
-    # Inner elements
-    for cell_z in 2:size(mesh, 3)
-      element = linear_indices[cell_x, cell_y, cell_z]
-      left_neighbors[3, element] = linear_indices[cell_x, cell_y, cell_z - 1]
+    # Neighbors in y-direction
+    for cell_z in 1:size(mesh, 3), cell_x in 1:size(mesh, 1)
+        # Inner elements
+        for cell_y in 2:size(mesh, 2)
+            element = linear_indices[cell_x, cell_y, cell_z]
+            left_neighbors[2, element] = linear_indices[cell_x, cell_y - 1, cell_z]
+        end
+
+        if isperiodic(mesh, 2)
+            # Periodic boundary
+            left_neighbors[2, linear_indices[cell_x, 1, cell_z]] = linear_indices[cell_x,
+                                                                                  end,
+                                                                                  cell_z]
+        else
+            left_neighbors[2, linear_indices[cell_x, 1, cell_z]] = 0
+        end
     end
 
-    if isperiodic(mesh, 3)
-      # Periodic boundary
-      left_neighbors[3, linear_indices[cell_x, cell_y, 1]] = linear_indices[cell_x, cell_y, end]
-    else
-      left_neighbors[3, linear_indices[cell_x, cell_y, 1]] = 0
+    # Neighbors in z-direction
+    for cell_y in 1:size(mesh, 2), cell_x in 1:size(mesh, 1)
+        # Inner elements
+        for cell_z in 2:size(mesh, 3)
+            element = linear_indices[cell_x, cell_y, cell_z]
+            left_neighbors[3, element] = linear_indices[cell_x, cell_y, cell_z - 1]
+        end
+
+        if isperiodic(mesh, 3)
+            # Periodic boundary
+            left_neighbors[3, linear_indices[cell_x, cell_y, 1]] = linear_indices[cell_x,
+                                                                                  cell_y,
+                                                                                  end]
+        else
+            left_neighbors[3, linear_indices[cell_x, cell_y, 1]] = 0
+        end
     end
-  end
 
-  return left_neighbors
+    return left_neighbors
 end
-
 end # @muladd
diff --git a/src/solvers/dgsem_structured/dg.jl b/src/solvers/dgsem_structured/dg.jl
index c4ba534b496..5cf4c4ef78c 100644
--- a/src/solvers/dgsem_structured/dg.jl
+++ b/src/solvers/dgsem_structured/dg.jl
@@ -3,68 +3,74 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # This method is called when a SemidiscretizationHyperbolic is constructed.
 # It constructs the basic `cache` used throughout the simulation to compute
 # the RHS etc.
-function create_cache(mesh::StructuredMesh, equations::AbstractEquations, dg::DG, ::Any, ::Type{uEltype}) where {uEltype<:Real}
-  elements = init_elements(mesh, equations, dg.basis, uEltype)
+function create_cache(mesh::StructuredMesh, equations::AbstractEquations, dg::DG, ::Any,
+                      ::Type{uEltype}) where {uEltype <: Real}
+    elements = init_elements(mesh, equations, dg.basis, uEltype)
 
-  cache = (; elements)
+    cache = (; elements)
 
-  # Add specialized parts of the cache required to compute the volume integral etc.
-  cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
+    # Add specialized parts of the cache required to compute the volume integral etc.
+    cache = (; cache...,
+             create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
 
-  return cache
+    return cache
 end
 
 # Extract contravariant vector Ja^i (i = index) as SVector
 @inline function get_contravariant_vector(index, contravariant_vectors, indices...)
-  SVector(ntuple(@inline(dim -> contravariant_vectors[dim, index, indices...]), Val(ndims(contravariant_vectors) - 3)))
+    SVector(ntuple(@inline(dim->contravariant_vectors[dim, index, indices...]),
+                   Val(ndims(contravariant_vectors) - 3)))
 end
 
-
-@inline function calc_boundary_flux_by_direction!(surface_flux_values, u, t, orientation,
+@inline function calc_boundary_flux_by_direction!(surface_flux_values, u, t,
+                                                  orientation,
                                                   boundary_condition::BoundaryConditionPeriodic,
                                                   mesh::StructuredMesh, equations,
                                                   surface_integral, dg::DG, cache,
-                                                  direction, node_indices, surface_node_indices, element)
-  @assert isperiodic(mesh, orientation)
+                                                  direction, node_indices,
+                                                  surface_node_indices, element)
+    @assert isperiodic(mesh, orientation)
 end
 
-
-@inline function calc_boundary_flux_by_direction!(surface_flux_values, u, t, orientation,
+@inline function calc_boundary_flux_by_direction!(surface_flux_values, u, t,
+                                                  orientation,
                                                   boundary_condition,
                                                   mesh::StructuredMesh, equations,
                                                   surface_integral, dg::DG, cache,
-                                                  direction, node_indices, surface_node_indices, element)
-  @unpack node_coordinates, contravariant_vectors, inverse_jacobian = cache.elements
-  @unpack surface_flux = surface_integral
-
-  u_inner = get_node_vars(u, equations, dg, node_indices..., element)
-  x = get_node_coords(node_coordinates, equations, dg, node_indices..., element)
-
-  # If the mapping is orientation-reversing, the contravariant vectors' orientation
-  # is reversed as well. The normal vector must be oriented in the direction
-  # from `left_element` to `right_element`, or the numerical flux will be computed
-  # incorrectly (downwind direction).
-  sign_jacobian = sign(inverse_jacobian[node_indices..., element])
-
-  # Contravariant vector Ja^i is the normal vector
-  normal = sign_jacobian * get_contravariant_vector(orientation, contravariant_vectors,
-                                                    node_indices..., element)
-
-  # If the mapping is orientation-reversing, the normal vector will be reversed (see above).
-  # However, the flux now has the wrong sign, since we need the physical flux in normal direction.
-  flux = sign_jacobian * boundary_condition(u_inner, normal, direction, x, t, surface_flux, equations)
-
-  for v in eachvariable(equations)
-    surface_flux_values[v, surface_node_indices..., direction, element] = flux[v]
-  end
+                                                  direction, node_indices,
+                                                  surface_node_indices, element)
+    @unpack node_coordinates, contravariant_vectors, inverse_jacobian = cache.elements
+    @unpack surface_flux = surface_integral
+
+    u_inner = get_node_vars(u, equations, dg, node_indices..., element)
+    x = get_node_coords(node_coordinates, equations, dg, node_indices..., element)
+
+    # If the mapping is orientation-reversing, the contravariant vectors' orientation
+    # is reversed as well. The normal vector must be oriented in the direction
+    # from `left_element` to `right_element`, or the numerical flux will be computed
+    # incorrectly (downwind direction).
+    sign_jacobian = sign(inverse_jacobian[node_indices..., element])
+
+    # Contravariant vector Ja^i is the normal vector
+    normal = sign_jacobian *
+             get_contravariant_vector(orientation, contravariant_vectors,
+                                      node_indices..., element)
+
+    # If the mapping is orientation-reversing, the normal vector will be reversed (see above).
+    # However, the flux now has the wrong sign, since we need the physical flux in normal direction.
+    flux = sign_jacobian *
+           boundary_condition(u_inner, normal, direction, x, t, surface_flux, equations)
+
+    for v in eachvariable(equations)
+        surface_flux_values[v, surface_node_indices..., direction, element] = flux[v]
+    end
 end
 
-
 include("containers.jl")
 include("dg_1d.jl")
 include("dg_2d.jl")
@@ -77,6 +83,4 @@ include("indicators_3d.jl")
 # Specialized implementations used to improve performance
 include("dg_2d_compressible_euler.jl")
 include("dg_3d_compressible_euler.jl")
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_structured/dg_1d.jl b/src/solvers/dgsem_structured/dg_1d.jl
index e33328a8204..3d63cc5af36 100644
--- a/src/solvers/dgsem_structured/dg_1d.jl
+++ b/src/solvers/dgsem_structured/dg_1d.jl
@@ -3,107 +3,114 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function rhs!(du, u, t,
               mesh::StructuredMesh{1}, equations,
               initial_condition, boundary_conditions, source_terms::Source,
               dg::DG, cache) where {Source}
-  # Reset du
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, u, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.volume_integral, dg, cache)
+    end
 
-  # Calculate interface and boundary fluxes
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
+    # Calculate interface and boundary fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache, u, mesh, equations, dg.surface_integral, dg)
+    end
 
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, u, t, boundary_conditions, mesh, equations, dg.surface_integral, dg)
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux!(cache, u, t, boundary_conditions, mesh, equations,
+                            dg.surface_integral, dg)
+    end
 
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations, dg.surface_integral, dg, cache)
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations,
+                               dg.surface_integral, dg, cache)
+    end
 
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian!(
-    du, mesh, equations, dg, cache)
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache)
 
-  # Calculate source terms
-  @trixi_timeit timer() "source terms" calc_sources!(
-    du, u, t, source_terms, equations, dg, cache)
+    # Calculate source terms
+    @trixi_timeit timer() "source terms" begin
+        calc_sources!(du, u, t, source_terms, equations, dg, cache)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_interface_flux!(cache, u, mesh::StructuredMesh{1},
                               equations, surface_integral, dg::DG)
-  @unpack surface_flux = surface_integral
+    @unpack surface_flux = surface_integral
 
-  @threaded for element in eachelement(dg, cache)
-    left_element = cache.elements.left_neighbors[1, element]
+    @threaded for element in eachelement(dg, cache)
+        left_element = cache.elements.left_neighbors[1, element]
 
-    if left_element > 0 # left_element = 0 at boundaries
-      u_ll = get_node_vars(u, equations, dg, nnodes(dg), left_element)
-      u_rr = get_node_vars(u, equations, dg, 1,          element)
+        if left_element > 0 # left_element = 0 at boundaries
+            u_ll = get_node_vars(u, equations, dg, nnodes(dg), left_element)
+            u_rr = get_node_vars(u, equations, dg, 1, element)
 
-      f1 = surface_flux(u_ll, u_rr, 1, equations)
+            f1 = surface_flux(u_ll, u_rr, 1, equations)
 
-      for v in eachvariable(equations)
-        cache.elements.surface_flux_values[v, 2, left_element] = f1[v]
-        cache.elements.surface_flux_values[v, 1, element] = f1[v]
-      end
+            for v in eachvariable(equations)
+                cache.elements.surface_flux_values[v, 2, left_element] = f1[v]
+                cache.elements.surface_flux_values[v, 1, element] = f1[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # TODO: Taal dimension agnostic
 function calc_boundary_flux!(cache, u, t, boundary_condition::BoundaryConditionPeriodic,
-                             mesh::StructuredMesh{1}, equations, surface_integral, dg::DG)
-  @assert isperiodic(mesh)
+                             mesh::StructuredMesh{1}, equations, surface_integral,
+                             dg::DG)
+    @assert isperiodic(mesh)
 end
 
 function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple,
-                             mesh::StructuredMesh{1}, equations, surface_integral, dg::DG)
-  @unpack surface_flux = surface_integral
-  @unpack surface_flux_values, node_coordinates = cache.elements
+                             mesh::StructuredMesh{1}, equations, surface_integral,
+                             dg::DG)
+    @unpack surface_flux = surface_integral
+    @unpack surface_flux_values, node_coordinates = cache.elements
 
-  orientation = 1
+    orientation = 1
 
-  # Negative x-direction
-  direction = 1
+    # Negative x-direction
+    direction = 1
 
-  u_rr = get_node_vars(u, equations, dg, 1, 1)
-  x = get_node_coords(node_coordinates, equations, dg, 1, 1)
+    u_rr = get_node_vars(u, equations, dg, 1, 1)
+    x = get_node_coords(node_coordinates, equations, dg, 1, 1)
 
-  flux = boundary_conditions[direction](u_rr, orientation, direction, x, t, surface_flux, equations)
+    flux = boundary_conditions[direction](u_rr, orientation, direction, x, t,
+                                          surface_flux, equations)
 
-  for v in eachvariable(equations)
-    surface_flux_values[v, direction, 1] = flux[v]
-  end
+    for v in eachvariable(equations)
+        surface_flux_values[v, direction, 1] = flux[v]
+    end
 
-  # Positive x-direction
-  direction = 2
+    # Positive x-direction
+    direction = 2
 
-  u_rr = get_node_vars(u, equations, dg, nnodes(dg), nelements(dg, cache))
-  x = get_node_coords(node_coordinates, equations, dg, nnodes(dg), nelements(dg, cache))
+    u_rr = get_node_vars(u, equations, dg, nnodes(dg), nelements(dg, cache))
+    x = get_node_coords(node_coordinates, equations, dg, nnodes(dg),
+                        nelements(dg, cache))
 
-  flux = boundary_conditions[direction](u_rr, orientation, direction, x, t, surface_flux, equations)
+    flux = boundary_conditions[direction](u_rr, orientation, direction, x, t,
+                                          surface_flux, equations)
 
-  # Copy flux to left and right element storage
-  for v in eachvariable(equations)
-    surface_flux_values[v, direction, nelements(dg, cache)] = flux[v]
-  end
+    # Copy flux to left and right element storage
+    for v in eachvariable(equations)
+        surface_flux_values[v, direction, nelements(dg, cache)] = flux[v]
+    end
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl
index a8972dfe766..c013bf62d98 100644
--- a/src/solvers/dgsem_structured/dg_2d.jl
+++ b/src/solvers/dgsem_structured/dg_2d.jl
@@ -3,413 +3,465 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function rhs!(du, u, t,
               mesh::StructuredMesh{2}, equations,
               initial_condition, boundary_conditions, source_terms::Source,
               dg::DG, cache) where {Source}
-  # Reset du
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
-
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache, u, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.surface_integral, dg)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, u, t, boundary_conditions, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations, dg.surface_integral, dg, cache)
-
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian!(
-    du, mesh, equations, dg, cache)
-
-  # Calculate source terms
-  @trixi_timeit timer() "source terms" calc_sources!(
-    du, u, t, source_terms, equations, dg, cache)
-
-  return nothing
-end
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, u, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.volume_integral, dg, cache)
+    end
 
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache, u, mesh,
+                             have_nonconservative_terms(equations), equations,
+                             dg.surface_integral, dg)
+    end
 
-@inline function weak_form_kernel!(du, u,
-                                   element, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
-                                   nonconservative_terms::False, equations,
-                                   dg::DGSEM, cache, alpha=true)
-  # true * [some floating point value] == [exactly the same floating point value]
-  # This can (hopefully) be optimized away due to constant propagation.
-  @unpack derivative_dhat = dg.basis
-  @unpack contravariant_vectors = cache.elements
-
-  for j in eachnode(dg), i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, j, element)
-
-    flux1 = flux(u_node, 1, equations)
-    flux2 = flux(u_node, 2, equations)
-
-    # Compute the contravariant flux by taking the scalar product of the
-    # first contravariant vector Ja^1 and the flux vector
-    Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element)
-    contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2
-    for ii in eachnode(dg)
-      multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], contravariant_flux1, equations, dg, ii, j, element)
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux!(cache, u, t, boundary_conditions, mesh, equations,
+                            dg.surface_integral, dg)
     end
 
-    # Compute the contravariant flux by taking the scalar product of the
-    # second contravariant vector Ja^2 and the flux vector
-    Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element)
-    contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2
-    for jj in eachnode(dg)
-      multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], contravariant_flux2, equations, dg, i, jj, element)
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations,
+                               dg.surface_integral, dg, cache)
     end
-  end
 
-  return nothing
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache)
+
+    # Calculate source terms
+    @trixi_timeit timer() "source terms" begin
+        calc_sources!(du, u, t, source_terms, equations, dg, cache)
+    end
+
+    return nothing
 end
 
+@inline function weak_form_kernel!(du, u,
+                                   element,
+                                   mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
+                                               P4estMesh{2}},
+                                   nonconservative_terms::False, equations,
+                                   dg::DGSEM, cache, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    @unpack derivative_dhat = dg.basis
+    @unpack contravariant_vectors = cache.elements
+
+    for j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, element)
+
+        flux1 = flux(u_node, 1, equations)
+        flux2 = flux(u_node, 2, equations)
+
+        # Compute the contravariant flux by taking the scalar product of the
+        # first contravariant vector Ja^1 and the flux vector
+        Ja11, Ja12 = get_contravariant_vector(1, contravariant_vectors, i, j, element)
+        contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2
+        for ii in eachnode(dg)
+            multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i],
+                                       contravariant_flux1, equations, dg, ii, j,
+                                       element)
+        end
+
+        # Compute the contravariant flux by taking the scalar product of the
+        # second contravariant vector Ja^2 and the flux vector
+        Ja21, Ja22 = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+        contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2
+        for jj in eachnode(dg)
+            multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j],
+                                       contravariant_flux2, equations, dg, i, jj,
+                                       element)
+        end
+    end
+
+    return nothing
+end
 
 @inline function flux_differencing_kernel!(du, u,
-                                           element, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                                           element,
+                                           mesh::Union{StructuredMesh{2},
+                                                       UnstructuredMesh2D, P4estMesh{2}
+                                                       },
                                            nonconservative_terms::False, equations,
-                                           volume_flux, dg::DGSEM, cache, alpha=true)
-  @unpack derivative_split = dg.basis
-  @unpack contravariant_vectors = cache.elements
-
-  # Calculate volume integral in one element
-  for j in eachnode(dg), i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, j, element)
-
-    # pull the contravariant vectors in each coordinate direction
-    Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element)
-    Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
-
-    # All diagonal entries of `derivative_split` are zero. Thus, we can skip
-    # the computation of the diagonal terms. In addition, we use the symmetry
-    # of the `volume_flux` to save half of the possible two-point flux
-    # computations.
-
-    # x direction
-    for ii in (i+1):nnodes(dg)
-      u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
-      # pull the contravariant vectors and compute the average
-      Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, element)
-      Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii)
-      # compute the contravariant sharp flux in the direction of the
-      # averaged contravariant vector
-      fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], fluxtilde1, equations, dg, i,  j, element)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], fluxtilde1, equations, dg, ii, j, element)
-    end
+                                           volume_flux, dg::DGSEM, cache, alpha = true)
+    @unpack derivative_split = dg.basis
+    @unpack contravariant_vectors = cache.elements
 
-    # y direction
-    for jj in (j+1):nnodes(dg)
-      u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
-      # pull the contravariant vectors and compute the average
-      Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, element)
-      Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj)
-      # compute the contravariant sharp flux in the direction of the
-      # averaged contravariant vector
-      fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], fluxtilde2, equations, dg, i, j,  element)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], fluxtilde2, equations, dg, i, jj, element)
+    # Calculate volume integral in one element
+    for j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element)
+        Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+
+        # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+        # the computation of the diagonal terms. In addition, we use the symmetry
+        # of the `volume_flux` to save half of the possible two-point flux
+        # computations.
+
+        # x direction
+        for ii in (i + 1):nnodes(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
+            # pull the contravariant vectors and compute the average
+            Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j,
+                                                   element)
+            Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii)
+            # compute the contravariant sharp flux in the direction of the
+            # averaged contravariant vector
+            fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], fluxtilde1,
+                                       equations, dg, i, j, element)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], fluxtilde1,
+                                       equations, dg, ii, j, element)
+        end
+
+        # y direction
+        for jj in (j + 1):nnodes(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
+            # pull the contravariant vectors and compute the average
+            Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj,
+                                                   element)
+            Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj)
+            # compute the contravariant sharp flux in the direction of the
+            # averaged contravariant vector
+            fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], fluxtilde2,
+                                       equations, dg, i, j, element)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], fluxtilde2,
+                                       equations, dg, i, jj, element)
+        end
     end
-  end
 end
 
 @inline function flux_differencing_kernel!(du, u,
-                                           element, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                                           element,
+                                           mesh::Union{StructuredMesh{2},
+                                                       UnstructuredMesh2D, P4estMesh{2}
+                                                       },
                                            nonconservative_terms::True, equations,
-                                           volume_flux, dg::DGSEM, cache, alpha=true)
-  @unpack derivative_split = dg.basis
-  @unpack contravariant_vectors = cache.elements
-  symmetric_flux, nonconservative_flux = volume_flux
-
-  # Apply the symmetric flux as usual
-  flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, dg, cache, alpha)
-
-  # Calculate the remaining volume terms using the nonsymmetric generalized flux
-  for j in eachnode(dg), i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, j, element)
-
-    # pull the contravariant vectors in each coordinate direction
-    Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element)
-    Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
-
-    # The diagonal terms are zero since the diagonal of `derivative_split`
-    # is zero. We ignore this for now.
-    # In general, nonconservative fluxes can depend on both the contravariant
-    # vectors (normal direction) at the current node and the averaged ones.
-    # Thus, we need to pass both to the nonconservative flux.
-
-    # x direction
-    integral_contribution = zero(u_node)
-    for ii in eachnode(dg)
-      u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
-      # pull the contravariant vectors and compute the average
-      Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, element)
-      Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii)
-      # Compute the contravariant nonconservative flux.
-      fluxtilde1 = nonconservative_flux(u_node, u_node_ii, Ja1_node, Ja1_avg, equations)
-      integral_contribution = integral_contribution + derivative_split[i, ii] * fluxtilde1
-    end
+                                           volume_flux, dg::DGSEM, cache, alpha = true)
+    @unpack derivative_split = dg.basis
+    @unpack contravariant_vectors = cache.elements
+    symmetric_flux, nonconservative_flux = volume_flux
 
-    # y direction
-    for jj in eachnode(dg)
-      u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
-      # pull the contravariant vectors and compute the average
-      Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, element)
-      Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj)
-      # compute the contravariant nonconservative flux in the direction of the
-      # averaged contravariant vector
-      fluxtilde2 = nonconservative_flux(u_node, u_node_jj, Ja2_node, Ja2_avg, equations)
-      integral_contribution = integral_contribution + derivative_split[j, jj] * fluxtilde2
-    end
+    # Apply the symmetric flux as usual
+    flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux,
+                              dg, cache, alpha)
 
-    # The factor 0.5 cancels the factor 2 in the flux differencing form
-    multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, dg, i, j, element)
-  end
+    # Calculate the remaining volume terms using the nonsymmetric generalized flux
+    for j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, element)
+        Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, element)
+
+        # The diagonal terms are zero since the diagonal of `derivative_split`
+        # is zero. We ignore this for now.
+        # In general, nonconservative fluxes can depend on both the contravariant
+        # vectors (normal direction) at the current node and the averaged ones.
+        # Thus, we need to pass both to the nonconservative flux.
+
+        # x direction
+        integral_contribution = zero(u_node)
+        for ii in eachnode(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
+            # pull the contravariant vectors and compute the average
+            Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j,
+                                                   element)
+            Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii)
+            # Compute the contravariant nonconservative flux.
+            fluxtilde1 = nonconservative_flux(u_node, u_node_ii, Ja1_node, Ja1_avg,
+                                              equations)
+            integral_contribution = integral_contribution +
+                                    derivative_split[i, ii] * fluxtilde1
+        end
+
+        # y direction
+        for jj in eachnode(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
+            # pull the contravariant vectors and compute the average
+            Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj,
+                                                   element)
+            Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj)
+            # compute the contravariant nonconservative flux in the direction of the
+            # averaged contravariant vector
+            fluxtilde2 = nonconservative_flux(u_node, u_node_jj, Ja2_node, Ja2_avg,
+                                              equations)
+            integral_contribution = integral_contribution +
+                                    derivative_split[j, jj] * fluxtilde2
+        end
+
+        # The factor 0.5 cancels the factor 2 in the flux differencing form
+        multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations,
+                                   dg, i, j, element)
+    end
 end
 
-
 # Computing the normal vector for the FV method on curvilinear subcells.
 # To fulfill free-stream preservation we use the explicit formula B.53 in Appendix B.4
 # by Hennemann, Rueda-Ramirez, Hindenlang, Gassner (2020)
 # "A provably entropy stable subcell shock capturing approach for high order split form DG for the compressible Euler equations"
 # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044)
 @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u,
-                              mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                              mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
+                                          P4estMesh{2}},
                               nonconservative_terms::False, equations,
                               volume_flux_fv, dg::DGSEM, element, cache)
-  @unpack contravariant_vectors = cache.elements
-  @unpack weights, derivative_matrix = dg.basis
+    @unpack contravariant_vectors = cache.elements
+    @unpack weights, derivative_matrix = dg.basis
 
-  # Performance improvement if the metric terms of the subcell FV method are only computed
-  # once at the beginning of the simulation, instead of at every Runge-Kutta stage
-  fstar1_L[:, 1,            :] .= zero(eltype(fstar1_L))
-  fstar1_L[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_L))
-  fstar1_R[:, 1,            :] .= zero(eltype(fstar1_R))
-  fstar1_R[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_R))
+    # Performance improvement if the metric terms of the subcell FV method are only computed
+    # once at the beginning of the simulation, instead of at every Runge-Kutta stage
+    fstar1_L[:, 1, :] .= zero(eltype(fstar1_L))
+    fstar1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_L))
+    fstar1_R[:, 1, :] .= zero(eltype(fstar1_R))
+    fstar1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_R))
 
-  for j in eachnode(dg)
-    normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, element)
+    for j in eachnode(dg)
+        normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j,
+                                                    element)
 
-    for i in 2:nnodes(dg)
-      u_ll = get_node_vars(u, equations, dg, i-1, j, element)
-      u_rr = get_node_vars(u, equations, dg, i,   j, element)
+        for i in 2:nnodes(dg)
+            u_ll = get_node_vars(u, equations, dg, i - 1, j, element)
+            u_rr = get_node_vars(u, equations, dg, i, j, element)
 
-      for m in 1:nnodes(dg)
-        normal_direction += weights[i-1] * derivative_matrix[i-1, m] * get_contravariant_vector(1, contravariant_vectors, m, j, element)
-      end
+            for m in 1:nnodes(dg)
+                normal_direction += weights[i - 1] * derivative_matrix[i - 1, m] *
+                                    get_contravariant_vector(1, contravariant_vectors,
+                                                             m, j, element)
+            end
 
-      # Compute the contravariant flux
-      contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations)
+            # Compute the contravariant flux
+            contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations)
 
-      set_node_vars!(fstar1_L, contravariant_flux, equations, dg, i, j)
-      set_node_vars!(fstar1_R, contravariant_flux, equations, dg, i, j)
+            set_node_vars!(fstar1_L, contravariant_flux, equations, dg, i, j)
+            set_node_vars!(fstar1_R, contravariant_flux, equations, dg, i, j)
+        end
     end
-  end
-
-  fstar2_L[:, :, 1           ] .= zero(eltype(fstar2_L))
-  fstar2_L[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_L))
-  fstar2_R[:, :, 1           ] .= zero(eltype(fstar2_R))
-  fstar2_R[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_R))
-
-  for i in eachnode(dg)
-    normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, element)
 
-    for j in 2:nnodes(dg)
-      u_ll = get_node_vars(u, equations, dg, i, j-1, element)
-      u_rr = get_node_vars(u, equations, dg, i, j,   element)
+    fstar2_L[:, :, 1] .= zero(eltype(fstar2_L))
+    fstar2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_L))
+    fstar2_R[:, :, 1] .= zero(eltype(fstar2_R))
+    fstar2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_R))
 
-      for m in 1:nnodes(dg)
-        normal_direction += weights[j-1] * derivative_matrix[j-1, m] * get_contravariant_vector(2, contravariant_vectors, i, m, element)
-      end
-
-      # Compute the contravariant flux by taking the scalar product of the
-      # normal vector and the flux vector
-      contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations)
-
-      set_node_vars!(fstar2_L, contravariant_flux, equations, dg, i, j)
-      set_node_vars!(fstar2_R, contravariant_flux, equations, dg, i, j)
+    for i in eachnode(dg)
+        normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1,
+                                                    element)
+
+        for j in 2:nnodes(dg)
+            u_ll = get_node_vars(u, equations, dg, i, j - 1, element)
+            u_rr = get_node_vars(u, equations, dg, i, j, element)
+
+            for m in 1:nnodes(dg)
+                normal_direction += weights[j - 1] * derivative_matrix[j - 1, m] *
+                                    get_contravariant_vector(2, contravariant_vectors,
+                                                             i, m, element)
+            end
+
+            # Compute the contravariant flux by taking the scalar product of the
+            # normal vector and the flux vector
+            contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations)
+
+            set_node_vars!(fstar2_L, contravariant_flux, equations, dg, i, j)
+            set_node_vars!(fstar2_R, contravariant_flux, equations, dg, i, j)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 # Calculate the finite volume fluxes inside curvilinear elements (**with non-conservative terms**).
-@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u::AbstractArray{<:Any,4},
-                              mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R,
+                              u::AbstractArray{<:Any, 4},
+                              mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
+                                          P4estMesh{2}},
                               nonconservative_terms::True, equations,
                               volume_flux_fv, dg::DGSEM, element, cache)
-  @unpack contravariant_vectors = cache.elements
-  @unpack weights, derivative_matrix = dg.basis
-
-  volume_flux, nonconservative_flux = volume_flux_fv
-
-  # Performance improvement if the metric terms of the subcell FV method are only computed
-  # once at the beginning of the simulation, instead of at every Runge-Kutta stage
-  fstar1_L[:, 1,            :] .= zero(eltype(fstar1_L))
-  fstar1_L[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_L))
-  fstar1_R[:, 1,            :] .= zero(eltype(fstar1_R))
-  fstar1_R[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_R))
-
-  for j in eachnode(dg)
-    normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, element)
-    for i in 2:nnodes(dg)
-      u_ll = get_node_vars(u, equations, dg, i-1, j, element)
-      u_rr = get_node_vars(u, equations, dg, i,   j, element)
-
-      for m in eachnode(dg)
-        normal_direction += weights[i-1] * derivative_matrix[i-1, m] * get_contravariant_vector(1, contravariant_vectors, m, j, element)
-      end
-
-      # Compute the conservative part of the contravariant flux
-      ftilde1 = volume_flux(u_ll, u_rr, normal_direction, equations)
-
-      # Compute and add in the nonconservative part
-      # Note the factor 0.5 necessary for the nonconservative fluxes based on
-      # the interpretation of global SBP operators coupled discontinuously via
-      # central fluxes/SATs
-      ftilde1_L = ftilde1 + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations)
-      ftilde1_R = ftilde1 + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations)
-
-      set_node_vars!(fstar1_L, ftilde1_L, equations, dg, i, j)
-      set_node_vars!(fstar1_R, ftilde1_R, equations, dg, i, j)
+    @unpack contravariant_vectors = cache.elements
+    @unpack weights, derivative_matrix = dg.basis
+
+    volume_flux, nonconservative_flux = volume_flux_fv
+
+    # Performance improvement if the metric terms of the subcell FV method are only computed
+    # once at the beginning of the simulation, instead of at every Runge-Kutta stage
+    fstar1_L[:, 1, :] .= zero(eltype(fstar1_L))
+    fstar1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_L))
+    fstar1_R[:, 1, :] .= zero(eltype(fstar1_R))
+    fstar1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_R))
+
+    for j in eachnode(dg)
+        normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j,
+                                                    element)
+        for i in 2:nnodes(dg)
+            u_ll = get_node_vars(u, equations, dg, i - 1, j, element)
+            u_rr = get_node_vars(u, equations, dg, i, j, element)
+
+            for m in eachnode(dg)
+                normal_direction += weights[i - 1] * derivative_matrix[i - 1, m] *
+                                    get_contravariant_vector(1, contravariant_vectors,
+                                                             m, j, element)
+            end
+
+            # Compute the conservative part of the contravariant flux
+            ftilde1 = volume_flux(u_ll, u_rr, normal_direction, equations)
+
+            # Compute and add in the nonconservative part
+            # Note the factor 0.5 necessary for the nonconservative fluxes based on
+            # the interpretation of global SBP operators coupled discontinuously via
+            # central fluxes/SATs
+            ftilde1_L = ftilde1 +
+                        0.5 * nonconservative_flux(u_ll, u_rr, normal_direction,
+                                             normal_direction, equations)
+            ftilde1_R = ftilde1 +
+                        0.5 * nonconservative_flux(u_rr, u_ll, normal_direction,
+                                             normal_direction, equations)
+
+            set_node_vars!(fstar1_L, ftilde1_L, equations, dg, i, j)
+            set_node_vars!(fstar1_R, ftilde1_R, equations, dg, i, j)
+        end
     end
-  end
-
-  # Fluxes in y
-  fstar2_L[:, :, 1           ] .= zero(eltype(fstar2_L))
-  fstar2_L[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_L))
-  fstar2_R[:, :, 1           ] .= zero(eltype(fstar2_R))
-  fstar2_R[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_R))
-
-  # Compute inner fluxes
-  for i in eachnode(dg)
-    normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, element)
-
-    for j in 2:nnodes(dg)
-      u_ll = get_node_vars(u, equations, dg, i, j-1, element)
-      u_rr = get_node_vars(u, equations, dg, i, j,   element)
-
-      for m in eachnode(dg)
-        normal_direction += weights[j-1] * derivative_matrix[j-1, m] * get_contravariant_vector(2, contravariant_vectors, i, m, element)
-      end
-
-      # Compute the conservative part of the contravariant flux
-      ftilde2 = volume_flux(u_ll, u_rr, normal_direction, equations)
-
-      # Compute and add in the nonconservative part
-      # Note the factor 0.5 necessary for the nonconservative fluxes based on
-      # the interpretation of global SBP operators coupled discontinuously via
-      # central fluxes/SATs
-      ftilde2_L = ftilde2 + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations)
-      ftilde2_R = ftilde2 + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations)
-
-      set_node_vars!(fstar2_L, ftilde2_L, equations, dg, i, j)
-      set_node_vars!(fstar2_R, ftilde2_R, equations, dg, i, j)
+
+    # Fluxes in y
+    fstar2_L[:, :, 1] .= zero(eltype(fstar2_L))
+    fstar2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_L))
+    fstar2_R[:, :, 1] .= zero(eltype(fstar2_R))
+    fstar2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_R))
+
+    # Compute inner fluxes
+    for i in eachnode(dg)
+        normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1,
+                                                    element)
+
+        for j in 2:nnodes(dg)
+            u_ll = get_node_vars(u, equations, dg, i, j - 1, element)
+            u_rr = get_node_vars(u, equations, dg, i, j, element)
+
+            for m in eachnode(dg)
+                normal_direction += weights[j - 1] * derivative_matrix[j - 1, m] *
+                                    get_contravariant_vector(2, contravariant_vectors,
+                                                             i, m, element)
+            end
+
+            # Compute the conservative part of the contravariant flux
+            ftilde2 = volume_flux(u_ll, u_rr, normal_direction, equations)
+
+            # Compute and add in the nonconservative part
+            # Note the factor 0.5 necessary for the nonconservative fluxes based on
+            # the interpretation of global SBP operators coupled discontinuously via
+            # central fluxes/SATs
+            ftilde2_L = ftilde2 +
+                        0.5 * nonconservative_flux(u_ll, u_rr, normal_direction,
+                                             normal_direction, equations)
+            ftilde2_R = ftilde2 +
+                        0.5 * nonconservative_flux(u_rr, u_ll, normal_direction,
+                                             normal_direction, equations)
+
+            set_node_vars!(fstar2_L, ftilde2_L, equations, dg, i, j)
+            set_node_vars!(fstar2_R, ftilde2_R, equations, dg, i, j)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_interface_flux!(cache, u,
                               mesh::StructuredMesh{2},
                               nonconservative_terms, # can be True/False
                               equations, surface_integral, dg::DG)
-  @unpack elements = cache
-
-  @threaded for element in eachelement(dg, cache)
-    # Interfaces in negative directions
-    # Faster version of "for orientation in (1, 2)"
-
-    # Interfaces in x-direction (`orientation` = 1)
-    calc_interface_flux!(elements.surface_flux_values,
-                         elements.left_neighbors[1, element],
-                         element, 1, u, mesh,
-                         nonconservative_terms, equations,
-                         surface_integral, dg, cache)
-
-    # Interfaces in y-direction (`orientation` = 2)
-    calc_interface_flux!(elements.surface_flux_values,
-                         elements.left_neighbors[2, element],
-                         element, 2, u, mesh,
-                         nonconservative_terms, equations,
-                         surface_integral, dg, cache)
-  end
-
-  return nothing
-end
+    @unpack elements = cache
+
+    @threaded for element in eachelement(dg, cache)
+        # Interfaces in negative directions
+        # Faster version of "for orientation in (1, 2)"
+
+        # Interfaces in x-direction (`orientation` = 1)
+        calc_interface_flux!(elements.surface_flux_values,
+                             elements.left_neighbors[1, element],
+                             element, 1, u, mesh,
+                             nonconservative_terms, equations,
+                             surface_integral, dg, cache)
+
+        # Interfaces in y-direction (`orientation` = 2)
+        calc_interface_flux!(elements.surface_flux_values,
+                             elements.left_neighbors[2, element],
+                             element, 2, u, mesh,
+                             nonconservative_terms, equations,
+                             surface_integral, dg, cache)
+    end
 
+    return nothing
+end
 
 @inline function calc_interface_flux!(surface_flux_values, left_element, right_element,
                                       orientation, u,
                                       mesh::StructuredMesh{2},
                                       nonconservative_terms::False, equations,
                                       surface_integral, dg::DG, cache)
-  # This is slow for LSA, but for some reason faster for Euler (see #519)
-  if left_element <= 0 # left_element = 0 at boundaries
-    return nothing
-  end
-
-  @unpack surface_flux = surface_integral
-  @unpack contravariant_vectors, inverse_jacobian = cache.elements
-
-  right_direction = 2 * orientation
-  left_direction = right_direction - 1
-
-  for i in eachnode(dg)
-    if orientation == 1
-      u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, left_element)
-      u_rr = get_node_vars(u, equations, dg, 1,          i, right_element)
-
-      # If the mapping is orientation-reversing, the contravariant vectors' orientation
-      # is reversed as well. The normal vector must be oriented in the direction
-      # from `left_element` to `right_element`, or the numerical flux will be computed
-      # incorrectly (downwind direction).
-      sign_jacobian = sign(inverse_jacobian[1, i, right_element])
-
-      # First contravariant vector Ja^1 as SVector
-      normal_direction = sign_jacobian * get_contravariant_vector(1, contravariant_vectors,
-                                                                  1, i, right_element)
-    else # orientation == 2
-      u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), left_element)
-      u_rr = get_node_vars(u, equations, dg, i, 1,          right_element)
-
-      # See above
-      sign_jacobian = sign(inverse_jacobian[i, 1, right_element])
-
-      # Second contravariant vector Ja^2 as SVector
-      normal_direction = sign_jacobian * get_contravariant_vector(2, contravariant_vectors,
-                                                                  i, 1, right_element)
+    # This is slow for LSA, but for some reason faster for Euler (see #519)
+    if left_element <= 0 # left_element = 0 at boundaries
+        return nothing
     end
 
-    # If the mapping is orientation-reversing, the normal vector will be reversed (see above).
-    # However, the flux now has the wrong sign, since we need the physical flux in normal direction.
-    flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations)
+    @unpack surface_flux = surface_integral
+    @unpack contravariant_vectors, inverse_jacobian = cache.elements
 
-    for v in eachvariable(equations)
-      surface_flux_values[v, i, right_direction, left_element] = flux[v]
-      surface_flux_values[v, i, left_direction, right_element] = flux[v]
+    right_direction = 2 * orientation
+    left_direction = right_direction - 1
+
+    for i in eachnode(dg)
+        if orientation == 1
+            u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, left_element)
+            u_rr = get_node_vars(u, equations, dg, 1, i, right_element)
+
+            # If the mapping is orientation-reversing, the contravariant vectors' orientation
+            # is reversed as well. The normal vector must be oriented in the direction
+            # from `left_element` to `right_element`, or the numerical flux will be computed
+            # incorrectly (downwind direction).
+            sign_jacobian = sign(inverse_jacobian[1, i, right_element])
+
+            # First contravariant vector Ja^1 as SVector
+            normal_direction = sign_jacobian *
+                               get_contravariant_vector(1, contravariant_vectors,
+                                                        1, i, right_element)
+        else # orientation == 2
+            u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), left_element)
+            u_rr = get_node_vars(u, equations, dg, i, 1, right_element)
+
+            # See above
+            sign_jacobian = sign(inverse_jacobian[i, 1, right_element])
+
+            # Second contravariant vector Ja^2 as SVector
+            normal_direction = sign_jacobian *
+                               get_contravariant_vector(2, contravariant_vectors,
+                                                        i, 1, right_element)
+        end
+
+        # If the mapping is orientation-reversing, the normal vector will be reversed (see above).
+        # However, the flux now has the wrong sign, since we need the physical flux in normal direction.
+        flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations)
+
+        for v in eachvariable(equations)
+            surface_flux_values[v, i, right_direction, left_element] = flux[v]
+            surface_flux_values[v, i, left_direction, right_element] = flux[v]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 @inline function calc_interface_flux!(surface_flux_values, left_element, right_element,
@@ -417,148 +469,162 @@ end
                                       mesh::StructuredMesh{2},
                                       nonconservative_terms::True, equations,
                                       surface_integral, dg::DG, cache)
-  # See comment on `calc_interface_flux!` with `nonconservative_terms::False`
-  if left_element <= 0 # left_element = 0 at boundaries
-    return nothing
-  end
-
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-  @unpack contravariant_vectors, inverse_jacobian = cache.elements
-
-  right_direction = 2 * orientation
-  left_direction  = right_direction - 1
-
-  for i in eachnode(dg)
-    if orientation == 1
-      u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, left_element)
-      u_rr = get_node_vars(u, equations, dg, 1,          i, right_element)
-
-      # If the mapping is orientation-reversing, the contravariant vectors' orientation
-      # is reversed as well. The normal vector must be oriented in the direction
-      # from `left_element` to `right_element`, or the numerical flux will be computed
-      # incorrectly (downwind direction).
-      sign_jacobian = sign(inverse_jacobian[1, i, right_element])
-
-      # First contravariant vector Ja^1 as SVector
-      normal_direction = sign_jacobian * get_contravariant_vector(1, contravariant_vectors,
-                                                                  1, i, right_element)
-    else # orientation == 2
-      u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), left_element)
-      u_rr = get_node_vars(u, equations, dg, i, 1,          right_element)
-
-      # See above
-      sign_jacobian = sign(inverse_jacobian[i, 1, right_element])
-
-      # Second contravariant vector Ja^2 as SVector
-      normal_direction = sign_jacobian * get_contravariant_vector(2, contravariant_vectors,
-                                                                  i, 1, right_element)
+    # See comment on `calc_interface_flux!` with `nonconservative_terms::False`
+    if left_element <= 0 # left_element = 0 at boundaries
+        return nothing
     end
 
-    # If the mapping is orientation-reversing, the normal vector will be reversed (see above).
-    # However, the flux now has the wrong sign, since we need the physical flux in normal direction.
-    flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations)
-
-    # Compute both nonconservative fluxes
-    # In general, nonconservative fluxes can depend on both the contravariant
-    # vectors (normal direction) at the current node and the averaged ones.
-    # However, both are the same at watertight interfaces, so we pass the
-    # `normal_direction` twice.
-    # Scale with sign_jacobian to ensure that the normal_direction matches that
-    # from the flux above
-    noncons_left  = sign_jacobian * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations)
-    noncons_right = sign_jacobian * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations)
-
-    for v in eachvariable(equations)
-      # Note the factor 0.5 necessary for the nonconservative fluxes based on
-      # the interpretation of global SBP operators coupled discontinuously via
-      # central fluxes/SATs
-      surface_flux_values[v, i, right_direction, left_element] = flux[v] + 0.5 * noncons_left[v]
-      surface_flux_values[v, i, left_direction, right_element] = flux[v] + 0.5 * noncons_right[v]
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack contravariant_vectors, inverse_jacobian = cache.elements
+
+    right_direction = 2 * orientation
+    left_direction = right_direction - 1
+
+    for i in eachnode(dg)
+        if orientation == 1
+            u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, left_element)
+            u_rr = get_node_vars(u, equations, dg, 1, i, right_element)
+
+            # If the mapping is orientation-reversing, the contravariant vectors' orientation
+            # is reversed as well. The normal vector must be oriented in the direction
+            # from `left_element` to `right_element`, or the numerical flux will be computed
+            # incorrectly (downwind direction).
+            sign_jacobian = sign(inverse_jacobian[1, i, right_element])
+
+            # First contravariant vector Ja^1 as SVector
+            normal_direction = sign_jacobian *
+                               get_contravariant_vector(1, contravariant_vectors,
+                                                        1, i, right_element)
+        else # orientation == 2
+            u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), left_element)
+            u_rr = get_node_vars(u, equations, dg, i, 1, right_element)
+
+            # See above
+            sign_jacobian = sign(inverse_jacobian[i, 1, right_element])
+
+            # Second contravariant vector Ja^2 as SVector
+            normal_direction = sign_jacobian *
+                               get_contravariant_vector(2, contravariant_vectors,
+                                                        i, 1, right_element)
+        end
+
+        # If the mapping is orientation-reversing, the normal vector will be reversed (see above).
+        # However, the flux now has the wrong sign, since we need the physical flux in normal direction.
+        flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations)
+
+        # Compute both nonconservative fluxes
+        # In general, nonconservative fluxes can depend on both the contravariant
+        # vectors (normal direction) at the current node and the averaged ones.
+        # However, both are the same at watertight interfaces, so we pass the
+        # `normal_direction` twice.
+        # Scale with sign_jacobian to ensure that the normal_direction matches that
+        # from the flux above
+        noncons_left = sign_jacobian *
+                       nonconservative_flux(u_ll, u_rr, normal_direction,
+                                            normal_direction, equations)
+        noncons_right = sign_jacobian *
+                        nonconservative_flux(u_rr, u_ll, normal_direction,
+                                             normal_direction, equations)
+
+        for v in eachvariable(equations)
+            # Note the factor 0.5 necessary for the nonconservative fluxes based on
+            # the interpretation of global SBP operators coupled discontinuously via
+            # central fluxes/SATs
+            surface_flux_values[v, i, right_direction, left_element] = flux[v] +
+                                                                       0.5 *
+                                                                       noncons_left[v]
+            surface_flux_values[v, i, left_direction, right_element] = flux[v] +
+                                                                       0.5 *
+                                                                       noncons_right[v]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # TODO: Taal dimension agnostic
 function calc_boundary_flux!(cache, u, t, boundary_condition::BoundaryConditionPeriodic,
-                             mesh::StructuredMesh{2}, equations, surface_integral, dg::DG)
-  @assert isperiodic(mesh)
+                             mesh::StructuredMesh{2}, equations, surface_integral,
+                             dg::DG)
+    @assert isperiodic(mesh)
 end
 
 function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple,
-                             mesh::StructuredMesh{2}, equations, surface_integral, dg::DG)
-  @unpack surface_flux_values = cache.elements
-  linear_indices = LinearIndices(size(mesh))
-
-  for cell_y in axes(mesh, 2)
-    # Negative x-direction
-    direction = 1
-    element = linear_indices[begin, cell_y]
-
-    for j in eachnode(dg)
-      calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1,
-                                       boundary_conditions[direction],
-                                       mesh, equations, surface_integral, dg, cache,
-                                       direction, (1, j), (j,), element)
-    end
-
-    # Positive x-direction
-    direction = 2
-    element = linear_indices[end, cell_y]
-
-    for j in eachnode(dg)
-      calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1,
-                                       boundary_conditions[direction],
-                                       mesh, equations, surface_integral, dg, cache,
-                                       direction, (nnodes(dg), j), (j,), element)
-    end
-  end
-
-  for cell_x in axes(mesh, 1)
-    # Negative y-direction
-    direction = 3
-    element = linear_indices[cell_x, begin]
-
-    for i in eachnode(dg)
-      calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2,
-                                       boundary_conditions[direction],
-                                       mesh, equations, surface_integral, dg, cache,
-                                       direction, (i, 1), (i,), element)
+                             mesh::StructuredMesh{2}, equations, surface_integral,
+                             dg::DG)
+    @unpack surface_flux_values = cache.elements
+    linear_indices = LinearIndices(size(mesh))
+
+    for cell_y in axes(mesh, 2)
+        # Negative x-direction
+        direction = 1
+        element = linear_indices[begin, cell_y]
+
+        for j in eachnode(dg)
+            calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1,
+                                             boundary_conditions[direction],
+                                             mesh, equations, surface_integral, dg,
+                                             cache,
+                                             direction, (1, j), (j,), element)
+        end
+
+        # Positive x-direction
+        direction = 2
+        element = linear_indices[end, cell_y]
+
+        for j in eachnode(dg)
+            calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1,
+                                             boundary_conditions[direction],
+                                             mesh, equations, surface_integral, dg,
+                                             cache,
+                                             direction, (nnodes(dg), j), (j,), element)
+        end
     end
 
-    # Positive y-direction
-    direction = 4
-    element = linear_indices[cell_x, end]
-
-    for i in eachnode(dg)
-      calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2,
-                                       boundary_conditions[direction],
-                                       mesh, equations, surface_integral, dg, cache,
-                                       direction, (i, nnodes(dg)), (i,), element)
+    for cell_x in axes(mesh, 1)
+        # Negative y-direction
+        direction = 3
+        element = linear_indices[cell_x, begin]
+
+        for i in eachnode(dg)
+            calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2,
+                                             boundary_conditions[direction],
+                                             mesh, equations, surface_integral, dg,
+                                             cache,
+                                             direction, (i, 1), (i,), element)
+        end
+
+        # Positive y-direction
+        direction = 4
+        element = linear_indices[cell_x, end]
+
+        for i in eachnode(dg)
+            calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2,
+                                             boundary_conditions[direction],
+                                             mesh, equations, surface_integral, dg,
+                                             cache,
+                                             direction, (i, nnodes(dg)), (i,), element)
+        end
     end
-  end
 end
 
-
 function apply_jacobian!(du,
-                         mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                         mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2
+                                                                                      }
+                                     },
                          equations, dg::DG, cache)
-  @unpack inverse_jacobian = cache.elements
+    @unpack inverse_jacobian = cache.elements
 
-  @threaded for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      factor = -inverse_jacobian[i, j, element]
+    @threaded for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            factor = -inverse_jacobian[i, j, element]
 
-      for v in eachvariable(equations)
-        du[v, i, j, element] *= factor
-      end
+            for v in eachvariable(equations)
+                du[v, i, j, element] *= factor
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl
index c17c4d5923f..43f70da4750 100644
--- a/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl
+++ b/src/solvers/dgsem_structured/dg_2d_compressible_euler.jl
@@ -14,477 +14,490 @@
 # We do not wrap this code in `@muladd begin ... end` block. Optimizations like
 # this are handled automatically by LoopVectorization.jl.
 
-
 # We specialize on `PtrArray` since these will be returned by `Trixi.wrap_array`
 # if LoopVectorization.jl can handle the array types. This ensures that `@turbo`
 # works efficiently here.
 @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray,
                                            element,
-                                           mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                                           mesh::Union{StructuredMesh{2},
+                                                       UnstructuredMesh2D, P4estMesh{2}},
                                            nonconservative_terms::False,
                                            equations::CompressibleEulerEquations2D,
                                            volume_flux::typeof(flux_shima_etal_turbo),
                                            dg::DGSEM, cache, alpha)
-  @unpack derivative_split = dg.basis
-  @unpack contravariant_vectors = cache.elements
-
-  # Create a temporary array that will be used to store the RHS with permuted
-  # indices `[i, j, v]` to allow using SIMD instructions.
-  # `StrideArray`s with purely static dimensions do not allocate on the heap.
-  du = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations))))
-
-  # Convert conserved to primitive variables on the given `element`.
-  u_prim = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations))))
-
-  @turbo for j in eachnode(dg), i in eachnode(dg)
-    rho    = u_cons[1, i, j, element]
-    rho_v1 = u_cons[2, i, j, element]
-    rho_v2 = u_cons[3, i, j, element]
-    rho_e  = u_cons[4, i, j, element]
-
-    v1 = rho_v1 / rho
-    v2 = rho_v2 / rho
-    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 ))
-
-    u_prim[i, j, 1] = rho
-    u_prim[i, j, 2] = v1
-    u_prim[i, j, 3] = v2
-    u_prim[i, j, 4] = p
-  end
-
-
-  # x direction
-  # At first, we create new temporary arrays with permuted memory layout to
-  # allow using SIMD instructions along the first dimension (which is contiguous
-  # in memory).
-  du_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations))))
-
-  u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations))))
-
-  @turbo for v in eachvariable(equations),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    u_prim_permuted[j, i, v] = u_prim[i, j, v]
-  end
-  fill!(du_permuted, zero(eltype(du_permuted)))
-
-  # We must also permute the contravariant vectors.
-  contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(ndims(mesh))))
-
-  @turbo for j in eachnode(dg), i in eachnode(dg)
-    contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element]
-    contravariant_vectors_x[j, i, 2] = contravariant_vectors[2, 1, i, j, element]
-  end
-
-  # Next, we basically inline the volume flux. To allow SIMD vectorization and
-  # still use the symmetry of the volume flux and the derivative matrix, we
-  # loop over the triangular part in an outer loop and use a plain inner loop.
-  for i in eachnode(dg), ii in (i+1):nnodes(dg)
-    @turbo for j in eachnode(dg)
-      rho_ll = u_prim_permuted[j, i, 1]
-      v1_ll  = u_prim_permuted[j, i, 2]
-      v2_ll  = u_prim_permuted[j, i, 3]
-      p_ll   = u_prim_permuted[j, i, 4]
-
-      rho_rr = u_prim_permuted[j, ii, 1]
-      v1_rr  = u_prim_permuted[j, ii, 2]
-      v2_rr  = u_prim_permuted[j, ii, 3]
-      p_rr   = u_prim_permuted[j, ii, 4]
-
-      normal_direction_1 = 0.5 * (
-        contravariant_vectors_x[j, i, 1] + contravariant_vectors_x[j, ii, 1])
-      normal_direction_2 = 0.5 * (
-        contravariant_vectors_x[j, i, 2] + contravariant_vectors_x[j, ii, 2])
-
-      v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2
-      v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2
-
-      # Compute required mean values
-      rho_avg = 0.5 * (rho_ll + rho_rr)
-      v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-      v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-      v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr)
-      p_avg   = 0.5 * (  p_ll +   p_rr)
-      velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
-
-      # Calculate fluxes depending on normal_direction
-      f1 = rho_avg * v_dot_n_avg
-      f2 = f1 * v1_avg + p_avg * normal_direction_1
-      f3 = f1 * v2_avg + p_avg * normal_direction_2
-      f4 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
-            + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-      # Add scaled fluxes to RHS
-      factor_i = alpha * derivative_split[i, ii]
-      du_permuted[j, i, 1] += factor_i * f1
-      du_permuted[j, i, 2] += factor_i * f2
-      du_permuted[j, i, 3] += factor_i * f3
-      du_permuted[j, i, 4] += factor_i * f4
-
-      factor_ii = alpha * derivative_split[ii, i]
-      du_permuted[j, ii, 1] += factor_ii * f1
-      du_permuted[j, ii, 2] += factor_ii * f2
-      du_permuted[j, ii, 3] += factor_ii * f3
-      du_permuted[j, ii, 4] += factor_ii * f4
+    @unpack derivative_split = dg.basis
+    @unpack contravariant_vectors = cache.elements
+
+    # Create a temporary array that will be used to store the RHS with permuted
+    # indices `[i, j, v]` to allow using SIMD instructions.
+    # `StrideArray`s with purely static dimensions do not allocate on the heap.
+    du = StrideArray{eltype(u_cons)}(undef,
+                                     (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
+                                      StaticInt(nvariables(equations))))
+
+    # Convert conserved to primitive variables on the given `element`.
+    u_prim = StrideArray{eltype(u_cons)}(undef,
+                                         (ntuple(_ -> StaticInt(nnodes(dg)),
+                                                 ndims(mesh))...,
+                                          StaticInt(nvariables(equations))))
+
+    @turbo for j in eachnode(dg), i in eachnode(dg)
+        rho = u_cons[1, i, j, element]
+        rho_v1 = u_cons[2, i, j, element]
+        rho_v2 = u_cons[3, i, j, element]
+        rho_e = u_cons[4, i, j, element]
+
+        v1 = rho_v1 / rho
+        v2 = rho_v2 / rho
+        p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
+
+        u_prim[i, j, 1] = rho
+        u_prim[i, j, 2] = v1
+        u_prim[i, j, 3] = v2
+        u_prim[i, j, 4] = p
     end
-  end
-
-  @turbo for v in eachvariable(equations),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    du[i, j, v] = du_permuted[j, i, v]
-  end
-
-
-  # y direction
-  # We must also permute the contravariant vectors.
-  contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(ndims(mesh))))
-
-  @turbo for j in eachnode(dg), i in eachnode(dg)
-    contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element]
-    contravariant_vectors_y[i, j, 2] = contravariant_vectors[2, 2, i, j, element]
-  end
-
-  # The memory layout is already optimal for SIMD vectorization in this loop.
-  for j in eachnode(dg), jj in (j+1):nnodes(dg)
-    @turbo for i in eachnode(dg)
-      rho_ll = u_prim[i, j, 1]
-      v1_ll  = u_prim[i, j, 2]
-      v2_ll  = u_prim[i, j, 3]
-      p_ll   = u_prim[i, j, 4]
-
-      rho_rr = u_prim[i, jj, 1]
-      v1_rr  = u_prim[i, jj, 2]
-      v2_rr  = u_prim[i, jj, 3]
-      p_rr   = u_prim[i, jj, 4]
-
-      normal_direction_1 = 0.5 * (
-        contravariant_vectors_y[i, j, 1] + contravariant_vectors_y[i, jj, 1])
-      normal_direction_2 = 0.5 * (
-        contravariant_vectors_y[i, j, 2] + contravariant_vectors_y[i, jj, 2])
-
-      v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2
-      v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2
-
-      # Compute required mean values
-      rho_avg = 0.5 * (rho_ll + rho_rr)
-      v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-      v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-      v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr)
-      p_avg   = 0.5 * (  p_ll +   p_rr)
-      velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
-
-      # Calculate fluxes depending on normal_direction
-      f1 = rho_avg * v_dot_n_avg
-      f2 = f1 * v1_avg + p_avg * normal_direction_1
-      f3 = f1 * v2_avg + p_avg * normal_direction_2
-      f4 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
-            + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-      # Add scaled fluxes to RHS
-      factor_j = alpha * derivative_split[j, jj]
-      du[i, j, 1] += factor_j * f1
-      du[i, j, 2] += factor_j * f2
-      du[i, j, 3] += factor_j * f3
-      du[i, j, 4] += factor_j * f4
-
-      factor_jj = alpha * derivative_split[jj, j]
-      du[i, jj, 1] += factor_jj * f1
-      du[i, jj, 2] += factor_jj * f2
-      du[i, jj, 3] += factor_jj * f3
-      du[i, jj, 4] += factor_jj * f4
+
+    # x direction
+    # At first, we create new temporary arrays with permuted memory layout to
+    # allow using SIMD instructions along the first dimension (which is contiguous
+    # in memory).
+    du_permuted = StrideArray{eltype(u_cons)}(undef,
+                                              (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
+                                               StaticInt(nvariables(equations))))
+
+    u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
+                                                  (StaticInt(nnodes(dg)),
+                                                   StaticInt(nnodes(dg)),
+                                                   StaticInt(nvariables(equations))))
+
+    @turbo for v in eachvariable(equations),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        u_prim_permuted[j, i, v] = u_prim[i, j, v]
     end
-  end
+    fill!(du_permuted, zero(eltype(du_permuted)))
 
+    # We must also permute the contravariant vectors.
+    contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef,
+                                                                         (StaticInt(nnodes(dg)),
+                                                                          StaticInt(nnodes(dg)),
+                                                                          StaticInt(ndims(mesh))))
 
-  # Finally, we add the temporary RHS computed here to the global RHS in the
-  # given `element`.
-  @turbo for v in eachvariable(equations),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    _du[v, i, j, element] += du[i, j, v]
-  end
-end
+    @turbo for j in eachnode(dg), i in eachnode(dg)
+        contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element]
+        contravariant_vectors_x[j, i, 2] = contravariant_vectors[2, 1, i, j, element]
+    end
 
+    # Next, we basically inline the volume flux. To allow SIMD vectorization and
+    # still use the symmetry of the volume flux and the derivative matrix, we
+    # loop over the triangular part in an outer loop and use a plain inner loop.
+    for i in eachnode(dg), ii in (i + 1):nnodes(dg)
+        @turbo for j in eachnode(dg)
+            rho_ll = u_prim_permuted[j, i, 1]
+            v1_ll = u_prim_permuted[j, i, 2]
+            v2_ll = u_prim_permuted[j, i, 3]
+            p_ll = u_prim_permuted[j, i, 4]
+
+            rho_rr = u_prim_permuted[j, ii, 1]
+            v1_rr = u_prim_permuted[j, ii, 2]
+            v2_rr = u_prim_permuted[j, ii, 3]
+            p_rr = u_prim_permuted[j, ii, 4]
+
+            normal_direction_1 = 0.5 * (contravariant_vectors_x[j, i, 1] +
+                                  contravariant_vectors_x[j, ii, 1])
+            normal_direction_2 = 0.5 * (contravariant_vectors_x[j, i, 2] +
+                                  contravariant_vectors_x[j, ii, 2])
+
+            v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2
+            v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2
+
+            # Compute required mean values
+            rho_avg = 0.5 * (rho_ll + rho_rr)
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
+
+            # Calculate fluxes depending on normal_direction
+            f1 = rho_avg * v_dot_n_avg
+            f2 = f1 * v1_avg + p_avg * normal_direction_1
+            f3 = f1 * v2_avg + p_avg * normal_direction_2
+            f4 = (f1 * velocity_square_avg +
+                  p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
+                  + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+            # Add scaled fluxes to RHS
+            factor_i = alpha * derivative_split[i, ii]
+            du_permuted[j, i, 1] += factor_i * f1
+            du_permuted[j, i, 2] += factor_i * f2
+            du_permuted[j, i, 3] += factor_i * f3
+            du_permuted[j, i, 4] += factor_i * f4
+
+            factor_ii = alpha * derivative_split[ii, i]
+            du_permuted[j, ii, 1] += factor_ii * f1
+            du_permuted[j, ii, 2] += factor_ii * f2
+            du_permuted[j, ii, 3] += factor_ii * f3
+            du_permuted[j, ii, 4] += factor_ii * f4
+        end
+    end
 
+    @turbo for v in eachvariable(equations),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        du[i, j, v] = du_permuted[j, i, v]
+    end
+
+    # y direction
+    # We must also permute the contravariant vectors.
+    contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef,
+                                                                         (StaticInt(nnodes(dg)),
+                                                                          StaticInt(nnodes(dg)),
+                                                                          StaticInt(ndims(mesh))))
+
+    @turbo for j in eachnode(dg), i in eachnode(dg)
+        contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element]
+        contravariant_vectors_y[i, j, 2] = contravariant_vectors[2, 2, i, j, element]
+    end
+
+    # The memory layout is already optimal for SIMD vectorization in this loop.
+    for j in eachnode(dg), jj in (j + 1):nnodes(dg)
+        @turbo for i in eachnode(dg)
+            rho_ll = u_prim[i, j, 1]
+            v1_ll = u_prim[i, j, 2]
+            v2_ll = u_prim[i, j, 3]
+            p_ll = u_prim[i, j, 4]
+
+            rho_rr = u_prim[i, jj, 1]
+            v1_rr = u_prim[i, jj, 2]
+            v2_rr = u_prim[i, jj, 3]
+            p_rr = u_prim[i, jj, 4]
+
+            normal_direction_1 = 0.5 * (contravariant_vectors_y[i, j, 1] +
+                                  contravariant_vectors_y[i, jj, 1])
+            normal_direction_2 = 0.5 * (contravariant_vectors_y[i, j, 2] +
+                                  contravariant_vectors_y[i, jj, 2])
+
+            v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2
+            v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2
+
+            # Compute required mean values
+            rho_avg = 0.5 * (rho_ll + rho_rr)
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
+
+            # Calculate fluxes depending on normal_direction
+            f1 = rho_avg * v_dot_n_avg
+            f2 = f1 * v1_avg + p_avg * normal_direction_1
+            f3 = f1 * v2_avg + p_avg * normal_direction_2
+            f4 = (f1 * velocity_square_avg +
+                  p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
+                  + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+            # Add scaled fluxes to RHS
+            factor_j = alpha * derivative_split[j, jj]
+            du[i, j, 1] += factor_j * f1
+            du[i, j, 2] += factor_j * f2
+            du[i, j, 3] += factor_j * f3
+            du[i, j, 4] += factor_j * f4
+
+            factor_jj = alpha * derivative_split[jj, j]
+            du[i, jj, 1] += factor_jj * f1
+            du[i, jj, 2] += factor_jj * f2
+            du[i, jj, 3] += factor_jj * f3
+            du[i, jj, 4] += factor_jj * f4
+        end
+    end
+
+    # Finally, we add the temporary RHS computed here to the global RHS in the
+    # given `element`.
+    @turbo for v in eachvariable(equations),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        _du[v, i, j, element] += du[i, j, v]
+    end
+end
 
 @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray,
                                            element,
-                                           mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                                           mesh::Union{StructuredMesh{2},
+                                                       UnstructuredMesh2D, P4estMesh{2}},
                                            nonconservative_terms::False,
                                            equations::CompressibleEulerEquations2D,
                                            volume_flux::typeof(flux_ranocha_turbo),
                                            dg::DGSEM, cache, alpha)
-  @unpack derivative_split = dg.basis
-  @unpack contravariant_vectors = cache.elements
-
-  # Create a temporary array that will be used to store the RHS with permuted
-  # indices `[i, j, v]` to allow using SIMD instructions.
-  # `StrideArray`s with purely static dimensions do not allocate on the heap.
-  du = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations))))
-
-  # Convert conserved to primitive variables on the given `element`. In addition
-  # to the usual primitive variables, we also compute logarithms of the density
-  # and pressure to increase the performance of the required logarithmic mean
-  # values.
-  u_prim = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs
-
-  @turbo for j in eachnode(dg), i in eachnode(dg)
-    rho    = u_cons[1, i, j, element]
-    rho_v1 = u_cons[2, i, j, element]
-    rho_v2 = u_cons[3, i, j, element]
-    rho_e  = u_cons[4, i, j, element]
-
-    v1 = rho_v1 / rho
-    v2 = rho_v2 / rho
-    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
-
-    u_prim[i, j, 1] = rho
-    u_prim[i, j, 2] = v1
-    u_prim[i, j, 3] = v2
-    u_prim[i, j, 4] = p
-    u_prim[i, j, 5] = log(rho)
-    u_prim[i, j, 6] = log(p)
-  end
-
-
-  # x direction
-  # At first, we create new temporary arrays with permuted memory layout to
-  # allow using SIMD instructions along the first dimension (which is contiguous
-  # in memory).
-  du_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations))))
-
-  u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations) + 2)))
-
-  @turbo for v in indices(u_prim, 3), # v in eachvariable(equations) misses +2 logs
-             j in eachnode(dg),
-             i in eachnode(dg)
-    u_prim_permuted[j, i, v] = u_prim[i, j, v]
-  end
-  fill!(du_permuted, zero(eltype(du_permuted)))
-
-  # We must also permute the contravariant vectors.
-  contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(ndims(mesh))))
-
-  @turbo for j in eachnode(dg), i in eachnode(dg)
-    contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element]
-    contravariant_vectors_x[j, i, 2] = contravariant_vectors[2, 1, i, j, element]
-  end
-
-  # Next, we basically inline the volume flux. To allow SIMD vectorization and
-  # still use the symmetry of the volume flux and the derivative matrix, we
-  # loop over the triangular part in an outer loop and use a plain inner loop.
-  for i in eachnode(dg), ii in (i+1):nnodes(dg)
-    @turbo for j in eachnode(dg)
-      rho_ll     = u_prim_permuted[j, i, 1]
-      v1_ll      = u_prim_permuted[j, i, 2]
-      v2_ll      = u_prim_permuted[j, i, 3]
-      p_ll       = u_prim_permuted[j, i, 4]
-      log_rho_ll = u_prim_permuted[j, i, 5]
-      log_p_ll   = u_prim_permuted[j, i, 6]
-
-      rho_rr     = u_prim_permuted[j, ii, 1]
-      v1_rr      = u_prim_permuted[j, ii, 2]
-      v2_rr      = u_prim_permuted[j, ii, 3]
-      p_rr       = u_prim_permuted[j, ii, 4]
-      log_rho_rr = u_prim_permuted[j, ii, 5]
-      log_p_rr   = u_prim_permuted[j, ii, 6]
-
-      normal_direction_1 = 0.5 * (
-        contravariant_vectors_x[j, i, 1] + contravariant_vectors_x[j, ii, 1])
-      normal_direction_2 = 0.5 * (
-        contravariant_vectors_x[j, i, 2] + contravariant_vectors_x[j, ii, 2])
-
-      v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2
-      v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2
-
-      # Compute required mean values
-      # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
-      # it efficiently. This is equivalent to
-      #   rho_mean = ln_mean(rho_ll, rho_rr)
-      x1 = rho_ll
-      log_x1 = log_rho_ll
-      y1 = rho_rr
-      log_y1 = log_rho_rr
-      x1_plus_y1 = x1 + y1
-      y1_minus_x1 = y1 - x1
-      z1 = y1_minus_x1^2 / x1_plus_y1^2
-      special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1)))
-      regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
-      rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
-
-      # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-      # in exact arithmetic since
-      #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-      #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-      # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-      x2 = rho_ll * p_rr
-      log_x2 = log_rho_ll + log_p_rr
-      y2 = rho_rr * p_ll
-      log_y2 = log_rho_rr + log_p_ll
-      x2_plus_y2 = x2 + y2
-      y2_minus_x2 = y2 - x2
-      z2 = y2_minus_x2^2 / x2_plus_y2^2
-      special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2
-      regular_path2 = (log_y2 - log_x2) / y2_minus_x2
-      inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
-
-      v1_avg = 0.5 * (v1_ll + v1_rr)
-      v2_avg = 0.5 * (v2_ll + v2_rr)
-      p_avg  = 0.5 * (p_ll + p_rr)
-      velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr)
-
-      # Calculate fluxes depending on normal_direction
-      f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
-      f2 = f1 * v1_avg + p_avg * normal_direction_1
-      f3 = f1 * v2_avg + p_avg * normal_direction_2
-      f4 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-          + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-      # Add scaled fluxes to RHS
-      factor_i = alpha * derivative_split[i, ii]
-      du_permuted[j, i, 1] += factor_i * f1
-      du_permuted[j, i, 2] += factor_i * f2
-      du_permuted[j, i, 3] += factor_i * f3
-      du_permuted[j, i, 4] += factor_i * f4
-
-      factor_ii = alpha * derivative_split[ii, i]
-      du_permuted[j, ii, 1] += factor_ii * f1
-      du_permuted[j, ii, 2] += factor_ii * f2
-      du_permuted[j, ii, 3] += factor_ii * f3
-      du_permuted[j, ii, 4] += factor_ii * f4
+    @unpack derivative_split = dg.basis
+    @unpack contravariant_vectors = cache.elements
+
+    # Create a temporary array that will be used to store the RHS with permuted
+    # indices `[i, j, v]` to allow using SIMD instructions.
+    # `StrideArray`s with purely static dimensions do not allocate on the heap.
+    du = StrideArray{eltype(u_cons)}(undef,
+                                     (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
+                                      StaticInt(nvariables(equations))))
+
+    # Convert conserved to primitive variables on the given `element`. In addition
+    # to the usual primitive variables, we also compute logarithms of the density
+    # and pressure to increase the performance of the required logarithmic mean
+    # values.
+    u_prim = StrideArray{eltype(u_cons)}(undef,
+                                         (ntuple(_ -> StaticInt(nnodes(dg)),
+                                                 ndims(mesh))...,
+                                          StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs
+
+    @turbo for j in eachnode(dg), i in eachnode(dg)
+        rho = u_cons[1, i, j, element]
+        rho_v1 = u_cons[2, i, j, element]
+        rho_v2 = u_cons[3, i, j, element]
+        rho_e = u_cons[4, i, j, element]
+
+        v1 = rho_v1 / rho
+        v2 = rho_v2 / rho
+        p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
+
+        u_prim[i, j, 1] = rho
+        u_prim[i, j, 2] = v1
+        u_prim[i, j, 3] = v2
+        u_prim[i, j, 4] = p
+        u_prim[i, j, 5] = log(rho)
+        u_prim[i, j, 6] = log(p)
     end
-  end
-
-  @turbo for v in eachvariable(equations),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    du[i, j, v] = du_permuted[j, i, v]
-  end
-
-
-  # y direction
-  # We must also permute the contravariant vectors.
-  contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(ndims(mesh))))
-
-  @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element]
-    contravariant_vectors_y[i, j, 2] = contravariant_vectors[2, 2, i, j, element]
-  end
-
-  # The memory layout is already optimal for SIMD vectorization in this loop.
-  for j in eachnode(dg), jj in (j+1):nnodes(dg)
-    @turbo for i in eachnode(dg)
-      rho_ll     = u_prim[i, j, 1]
-      v1_ll      = u_prim[i, j, 2]
-      v2_ll      = u_prim[i, j, 3]
-      p_ll       = u_prim[i, j, 4]
-      log_rho_ll = u_prim[i, j, 5]
-      log_p_ll   = u_prim[i, j, 6]
-
-      rho_rr     = u_prim[i, jj, 1]
-      v1_rr      = u_prim[i, jj, 2]
-      v2_rr      = u_prim[i, jj, 3]
-      p_rr       = u_prim[i, jj, 4]
-      log_rho_rr = u_prim[i, jj, 5]
-      log_p_rr   = u_prim[i, jj, 6]
-
-      normal_direction_1 = 0.5 * (
-        contravariant_vectors_y[i, j, 1] + contravariant_vectors_y[i, jj, 1])
-      normal_direction_2 = 0.5 * (
-        contravariant_vectors_y[i, j, 2] + contravariant_vectors_y[i, jj, 2])
-
-      v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2
-      v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2
-
-      # Compute required mean values
-      # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
-      # it efficiently. This is equivalent to
-      #   rho_mean = ln_mean(rho_ll, rho_rr)
-      x1 = rho_ll
-      log_x1 = log_rho_ll
-      y1 = rho_rr
-      log_y1 = log_rho_rr
-      x1_plus_y1 = x1 + y1
-      y1_minus_x1 = y1 - x1
-      z1 = y1_minus_x1^2 / x1_plus_y1^2
-      special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1)))
-      regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
-      rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
-
-      # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-      # in exact arithmetic since
-      #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-      #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-      # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-      x2 = rho_ll * p_rr
-      log_x2 = log_rho_ll + log_p_rr
-      y2 = rho_rr * p_ll
-      log_y2 = log_rho_rr + log_p_ll
-      x2_plus_y2 = x2 + y2
-      y2_minus_x2 = y2 - x2
-      z2 = y2_minus_x2^2 / x2_plus_y2^2
-      special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2
-      regular_path2 = (log_y2 - log_x2) / y2_minus_x2
-      inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
-
-      v1_avg = 0.5 * (v1_ll + v1_rr)
-      v2_avg = 0.5 * (v2_ll + v2_rr)
-      p_avg  = 0.5 * (p_ll + p_rr)
-      velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr)
-
-      # Calculate fluxes depending on normal_direction
-      f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
-      f2 = f1 * v1_avg + p_avg * normal_direction_1
-      f3 = f1 * v2_avg + p_avg * normal_direction_2
-      f4 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-          + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-      # Add scaled fluxes to RHS
-      factor_j = alpha * derivative_split[j, jj]
-      du[i, j, 1] += factor_j * f1
-      du[i, j, 2] += factor_j * f2
-      du[i, j, 3] += factor_j * f3
-      du[i, j, 4] += factor_j * f4
-
-      factor_jj = alpha * derivative_split[jj, j]
-      du[i, jj, 1] += factor_jj * f1
-      du[i, jj, 2] += factor_jj * f2
-      du[i, jj, 3] += factor_jj * f3
-      du[i, jj, 4] += factor_jj * f4
+
+    # x direction
+    # At first, we create new temporary arrays with permuted memory layout to
+    # allow using SIMD instructions along the first dimension (which is contiguous
+    # in memory).
+    du_permuted = StrideArray{eltype(u_cons)}(undef,
+                                              (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
+                                               StaticInt(nvariables(equations))))
+
+    u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
+                                                  (StaticInt(nnodes(dg)),
+                                                   StaticInt(nnodes(dg)),
+                                                   StaticInt(nvariables(equations) + 2)))
+
+    @turbo for v in indices(u_prim, 3), # v in eachvariable(equations) misses +2 logs
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        u_prim_permuted[j, i, v] = u_prim[i, j, v]
+    end
+    fill!(du_permuted, zero(eltype(du_permuted)))
+
+    # We must also permute the contravariant vectors.
+    contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef,
+                                                                         (StaticInt(nnodes(dg)),
+                                                                          StaticInt(nnodes(dg)),
+                                                                          StaticInt(ndims(mesh))))
+
+    @turbo for j in eachnode(dg), i in eachnode(dg)
+        contravariant_vectors_x[j, i, 1] = contravariant_vectors[1, 1, i, j, element]
+        contravariant_vectors_x[j, i, 2] = contravariant_vectors[2, 1, i, j, element]
+    end
+
+    # Next, we basically inline the volume flux. To allow SIMD vectorization and
+    # still use the symmetry of the volume flux and the derivative matrix, we
+    # loop over the triangular part in an outer loop and use a plain inner loop.
+    for i in eachnode(dg), ii in (i + 1):nnodes(dg)
+        @turbo for j in eachnode(dg)
+            rho_ll = u_prim_permuted[j, i, 1]
+            v1_ll = u_prim_permuted[j, i, 2]
+            v2_ll = u_prim_permuted[j, i, 3]
+            p_ll = u_prim_permuted[j, i, 4]
+            log_rho_ll = u_prim_permuted[j, i, 5]
+            log_p_ll = u_prim_permuted[j, i, 6]
+
+            rho_rr = u_prim_permuted[j, ii, 1]
+            v1_rr = u_prim_permuted[j, ii, 2]
+            v2_rr = u_prim_permuted[j, ii, 3]
+            p_rr = u_prim_permuted[j, ii, 4]
+            log_rho_rr = u_prim_permuted[j, ii, 5]
+            log_p_rr = u_prim_permuted[j, ii, 6]
+
+            normal_direction_1 = 0.5 * (contravariant_vectors_x[j, i, 1] +
+                                  contravariant_vectors_x[j, ii, 1])
+            normal_direction_2 = 0.5 * (contravariant_vectors_x[j, i, 2] +
+                                  contravariant_vectors_x[j, ii, 2])
+
+            v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2
+            v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2
+
+            # Compute required mean values
+            # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
+            # it efficiently. This is equivalent to
+            #   rho_mean = ln_mean(rho_ll, rho_rr)
+            x1 = rho_ll
+            log_x1 = log_rho_ll
+            y1 = rho_rr
+            log_y1 = log_rho_rr
+            x1_plus_y1 = x1 + y1
+            y1_minus_x1 = y1 - x1
+            z1 = y1_minus_x1^2 / x1_plus_y1^2
+            special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1)))
+            regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
+            rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
+
+            # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+            # in exact arithmetic since
+            #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+            #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+            # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+            x2 = rho_ll * p_rr
+            log_x2 = log_rho_ll + log_p_rr
+            y2 = rho_rr * p_ll
+            log_y2 = log_rho_rr + log_p_ll
+            x2_plus_y2 = x2 + y2
+            y2_minus_x2 = y2 - x2
+            z2 = y2_minus_x2^2 / x2_plus_y2^2
+            special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2
+            regular_path2 = (log_y2 - log_x2) / y2_minus_x2
+            inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
+
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
+
+            # Calculate fluxes depending on normal_direction
+            f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
+            f2 = f1 * v1_avg + p_avg * normal_direction_1
+            f3 = f1 * v2_avg + p_avg * normal_direction_2
+            f4 = (f1 *
+                  (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+                  +
+                  0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+            # Add scaled fluxes to RHS
+            factor_i = alpha * derivative_split[i, ii]
+            du_permuted[j, i, 1] += factor_i * f1
+            du_permuted[j, i, 2] += factor_i * f2
+            du_permuted[j, i, 3] += factor_i * f3
+            du_permuted[j, i, 4] += factor_i * f4
+
+            factor_ii = alpha * derivative_split[ii, i]
+            du_permuted[j, ii, 1] += factor_ii * f1
+            du_permuted[j, ii, 2] += factor_ii * f2
+            du_permuted[j, ii, 3] += factor_ii * f3
+            du_permuted[j, ii, 4] += factor_ii * f4
+        end
     end
-  end
 
+    @turbo for v in eachvariable(equations),
+               j in eachnode(dg),
+               i in eachnode(dg)
 
-  # Finally, we add the temporary RHS computed here to the global RHS in the
-  # given `element`.
-  @turbo for v in eachvariable(equations),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    _du[v, i, j, element] += du[i, j, v]
-  end
+        du[i, j, v] = du_permuted[j, i, v]
+    end
+
+    # y direction
+    # We must also permute the contravariant vectors.
+    contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef,
+                                                                         (StaticInt(nnodes(dg)),
+                                                                          StaticInt(nnodes(dg)),
+                                                                          StaticInt(ndims(mesh))))
+
+    @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        contravariant_vectors_y[i, j, 1] = contravariant_vectors[1, 2, i, j, element]
+        contravariant_vectors_y[i, j, 2] = contravariant_vectors[2, 2, i, j, element]
+    end
+
+    # The memory layout is already optimal for SIMD vectorization in this loop.
+    for j in eachnode(dg), jj in (j + 1):nnodes(dg)
+        @turbo for i in eachnode(dg)
+            rho_ll = u_prim[i, j, 1]
+            v1_ll = u_prim[i, j, 2]
+            v2_ll = u_prim[i, j, 3]
+            p_ll = u_prim[i, j, 4]
+            log_rho_ll = u_prim[i, j, 5]
+            log_p_ll = u_prim[i, j, 6]
+
+            rho_rr = u_prim[i, jj, 1]
+            v1_rr = u_prim[i, jj, 2]
+            v2_rr = u_prim[i, jj, 3]
+            p_rr = u_prim[i, jj, 4]
+            log_rho_rr = u_prim[i, jj, 5]
+            log_p_rr = u_prim[i, jj, 6]
+
+            normal_direction_1 = 0.5 * (contravariant_vectors_y[i, j, 1] +
+                                  contravariant_vectors_y[i, jj, 1])
+            normal_direction_2 = 0.5 * (contravariant_vectors_y[i, j, 2] +
+                                  contravariant_vectors_y[i, jj, 2])
+
+            v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2
+            v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2
+
+            # Compute required mean values
+            # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
+            # it efficiently. This is equivalent to
+            #   rho_mean = ln_mean(rho_ll, rho_rr)
+            x1 = rho_ll
+            log_x1 = log_rho_ll
+            y1 = rho_rr
+            log_y1 = log_rho_rr
+            x1_plus_y1 = x1 + y1
+            y1_minus_x1 = y1 - x1
+            z1 = y1_minus_x1^2 / x1_plus_y1^2
+            special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1)))
+            regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
+            rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
+
+            # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+            # in exact arithmetic since
+            #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+            #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+            # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+            x2 = rho_ll * p_rr
+            log_x2 = log_rho_ll + log_p_rr
+            y2 = rho_rr * p_ll
+            log_y2 = log_rho_rr + log_p_ll
+            x2_plus_y2 = x2 + y2
+            y2_minus_x2 = y2 - x2
+            z2 = y2_minus_x2^2 / x2_plus_y2^2
+            special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2
+            regular_path2 = (log_y2 - log_x2) / y2_minus_x2
+            inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
+
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
+
+            # Calculate fluxes depending on normal_direction
+            f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
+            f2 = f1 * v1_avg + p_avg * normal_direction_1
+            f3 = f1 * v2_avg + p_avg * normal_direction_2
+            f4 = (f1 *
+                  (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+                  +
+                  0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+            # Add scaled fluxes to RHS
+            factor_j = alpha * derivative_split[j, jj]
+            du[i, j, 1] += factor_j * f1
+            du[i, j, 2] += factor_j * f2
+            du[i, j, 3] += factor_j * f3
+            du[i, j, 4] += factor_j * f4
+
+            factor_jj = alpha * derivative_split[jj, j]
+            du[i, jj, 1] += factor_jj * f1
+            du[i, jj, 2] += factor_jj * f2
+            du[i, jj, 3] += factor_jj * f3
+            du[i, jj, 4] += factor_jj * f4
+        end
+    end
+
+    # Finally, we add the temporary RHS computed here to the global RHS in the
+    # given `element`.
+    @turbo for v in eachvariable(equations),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        _du[v, i, j, element] += du[i, j, v]
+    end
 end
diff --git a/src/solvers/dgsem_structured/dg_3d.jl b/src/solvers/dgsem_structured/dg_3d.jl
index 6c27e206321..0e6bf8a2ac0 100644
--- a/src/solvers/dgsem_structured/dg_3d.jl
+++ b/src/solvers/dgsem_structured/dg_3d.jl
@@ -3,525 +3,592 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 function rhs!(du, u, t,
               mesh::StructuredMesh{3}, equations,
               initial_condition, boundary_conditions, source_terms::Source,
               dg::DG, cache) where {Source}
-  # Reset du
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
-
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache, u, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.surface_integral, dg)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, u, t, boundary_conditions, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations, dg.surface_integral, dg, cache)
-
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian!(
-    du, mesh, equations, dg, cache)
-
-  # Calculate source terms
-  @trixi_timeit timer() "source terms" calc_sources!(
-    du, u, t, source_terms, equations, dg, cache)
-
-  return nothing
-end
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, u, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.volume_integral, dg, cache)
+    end
 
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache, u, mesh,
+                             have_nonconservative_terms(equations), equations,
+                             dg.surface_integral, dg)
+    end
 
-@inline function weak_form_kernel!(du, u,
-                                   element, mesh::Union{StructuredMesh{3}, P4estMesh{3}},
-                                   nonconservative_terms::False, equations,
-                                   dg::DGSEM, cache, alpha=true)
-  # true * [some floating point value] == [exactly the same floating point value]
-  # This can (hopefully) be optimized away due to constant propagation.
-  @unpack derivative_dhat = dg.basis
-  @unpack contravariant_vectors = cache.elements
-
-  for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, j, k, element)
-
-    flux1 = flux(u_node, 1, equations)
-    flux2 = flux(u_node, 2, equations)
-    flux3 = flux(u_node, 3, equations)
-
-    # Compute the contravariant flux by taking the scalar product of the
-    # first contravariant vector Ja^1 and the flux vector
-    Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
-    contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 + Ja13 * flux3
-    for ii in eachnode(dg)
-      multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], contravariant_flux1, equations, dg, ii, j, k, element)
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux!(cache, u, t, boundary_conditions, mesh, equations,
+                            dg.surface_integral, dg)
     end
 
-    # Compute the contravariant flux by taking the scalar product of the
-    # second contravariant vector Ja^2 and the flux vector
-    Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
-    contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 + Ja23 * flux3
-    for jj in eachnode(dg)
-      multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], contravariant_flux2, equations, dg, i, jj, k, element)
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations,
+                               dg.surface_integral, dg, cache)
     end
 
-    # Compute the contravariant flux by taking the scalar product of the
-    # third contravariant vector Ja^3 and the flux vector
-    Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
-    contravariant_flux3 = Ja31 * flux1 + Ja32 * flux2 + Ja33 * flux3
-    for kk in eachnode(dg)
-      multiply_add_to_node_vars!(du, alpha * derivative_dhat[kk, k], contravariant_flux3, equations, dg, i, j, kk, element)
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache)
+
+    # Calculate source terms
+    @trixi_timeit timer() "source terms" begin
+        calc_sources!(du, u, t, source_terms, equations, dg, cache)
     end
-  end
 
-  return nothing
+    return nothing
 end
 
+@inline function weak_form_kernel!(du, u,
+                                   element,
+                                   mesh::Union{StructuredMesh{3}, P4estMesh{3}},
+                                   nonconservative_terms::False, equations,
+                                   dg::DGSEM, cache, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    @unpack derivative_dhat = dg.basis
+    @unpack contravariant_vectors = cache.elements
+
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        flux1 = flux(u_node, 1, equations)
+        flux2 = flux(u_node, 2, equations)
+        flux3 = flux(u_node, 3, equations)
+
+        # Compute the contravariant flux by taking the scalar product of the
+        # first contravariant vector Ja^1 and the flux vector
+        Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k,
+                                                    element)
+        contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 + Ja13 * flux3
+        for ii in eachnode(dg)
+            multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i],
+                                       contravariant_flux1, equations, dg, ii, j, k,
+                                       element)
+        end
+
+        # Compute the contravariant flux by taking the scalar product of the
+        # second contravariant vector Ja^2 and the flux vector
+        Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k,
+                                                    element)
+        contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 + Ja23 * flux3
+        for jj in eachnode(dg)
+            multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j],
+                                       contravariant_flux2, equations, dg, i, jj, k,
+                                       element)
+        end
+
+        # Compute the contravariant flux by taking the scalar product of the
+        # third contravariant vector Ja^3 and the flux vector
+        Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k,
+                                                    element)
+        contravariant_flux3 = Ja31 * flux1 + Ja32 * flux2 + Ja33 * flux3
+        for kk in eachnode(dg)
+            multiply_add_to_node_vars!(du, alpha * derivative_dhat[kk, k],
+                                       contravariant_flux3, equations, dg, i, j, kk,
+                                       element)
+        end
+    end
+
+    return nothing
+end
 
 # flux differencing volume integral on curvilinear hexahedral elements. Averaging of the
 # mapping terms, stored in `contravariant_vectors`, is peeled apart from the evaluation of
 # the physical fluxes in each Cartesian direction
 @inline function flux_differencing_kernel!(du, u,
-                                           element, mesh::Union{StructuredMesh{3}, P4estMesh{3}},
+                                           element,
+                                           mesh::Union{StructuredMesh{3}, P4estMesh{3}},
                                            nonconservative_terms::False, equations,
-                                           volume_flux, dg::DGSEM, cache, alpha=true)
-  # true * [some floating point value] == [exactly the same floating point value]
-  # This can (hopefully) be optimized away due to constant propagation.
-  @unpack derivative_split = dg.basis
-  @unpack contravariant_vectors = cache.elements
-
-  # Calculate volume integral in one element
-  for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, j, k, element)
-
-    # pull the contravariant vectors in each coordinate direction
-    Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
-    Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
-    Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
-
-    # All diagonal entries of `derivative_split` are zero. Thus, we can skip
-    # the computation of the diagonal terms. In addition, we use the symmetry
-    # of the `volume_flux` to save half of the possible two-point flux
-    # computations.
-
-    # x direction
-    for ii in (i+1):nnodes(dg)
-      u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
-      # pull the contravariant vectors and compute the average
-      Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors,
-                                             ii, j, k, element)
-      Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii)
-      # compute the contravariant sharp flux in the direction of the
-      # averaged contravariant vector
-      fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], fluxtilde1, equations, dg, i,  j, k, element)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], fluxtilde1, equations, dg, ii, j, k, element)
-    end
+                                           volume_flux, dg::DGSEM, cache, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    @unpack derivative_split = dg.basis
+    @unpack contravariant_vectors = cache.elements
 
-    # y direction
-    for jj in (j+1):nnodes(dg)
-      u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
-      # pull the contravariant vectors and compute the average
-      Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors,
-                                             i, jj, k, element)
-      Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj)
-      # compute the contravariant sharp flux in the direction of the
-      # averaged contravariant vector
-      fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], fluxtilde2, equations, dg, i, j,  k, element)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], fluxtilde2, equations, dg, i, jj, k, element)
-    end
-
-    # z direction
-    for kk in (k+1):nnodes(dg)
-      u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)
-      # pull the contravariant vectors and compute the average
-      Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors,
-                                             i, j, kk, element)
-      Ja3_avg = 0.5 * (Ja3_node + Ja3_node_kk)
-      # compute the contravariant sharp flux in the direction of the
-      # averaged contravariant vector
-      fluxtilde3 = volume_flux(u_node, u_node_kk, Ja3_avg, equations)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[k, kk], fluxtilde3, equations, dg, i, j, k,  element)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[kk, k], fluxtilde3, equations, dg, i, j, kk, element)
+    # Calculate volume integral in one element
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
+        Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
+        Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
+
+        # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+        # the computation of the diagonal terms. In addition, we use the symmetry
+        # of the `volume_flux` to save half of the possible two-point flux
+        # computations.
+
+        # x direction
+        for ii in (i + 1):nnodes(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
+            # pull the contravariant vectors and compute the average
+            Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors,
+                                                   ii, j, k, element)
+            Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii)
+            # compute the contravariant sharp flux in the direction of the
+            # averaged contravariant vector
+            fluxtilde1 = volume_flux(u_node, u_node_ii, Ja1_avg, equations)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], fluxtilde1,
+                                       equations, dg, i, j, k, element)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], fluxtilde1,
+                                       equations, dg, ii, j, k, element)
+        end
+
+        # y direction
+        for jj in (j + 1):nnodes(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
+            # pull the contravariant vectors and compute the average
+            Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors,
+                                                   i, jj, k, element)
+            Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj)
+            # compute the contravariant sharp flux in the direction of the
+            # averaged contravariant vector
+            fluxtilde2 = volume_flux(u_node, u_node_jj, Ja2_avg, equations)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], fluxtilde2,
+                                       equations, dg, i, j, k, element)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], fluxtilde2,
+                                       equations, dg, i, jj, k, element)
+        end
+
+        # z direction
+        for kk in (k + 1):nnodes(dg)
+            u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)
+            # pull the contravariant vectors and compute the average
+            Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors,
+                                                   i, j, kk, element)
+            Ja3_avg = 0.5 * (Ja3_node + Ja3_node_kk)
+            # compute the contravariant sharp flux in the direction of the
+            # averaged contravariant vector
+            fluxtilde3 = volume_flux(u_node, u_node_kk, Ja3_avg, equations)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[k, kk], fluxtilde3,
+                                       equations, dg, i, j, k, element)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[kk, k], fluxtilde3,
+                                       equations, dg, i, j, kk, element)
+        end
     end
-  end
 end
 
 @inline function flux_differencing_kernel!(du, u,
-                                           element, mesh::Union{StructuredMesh{3}, P4estMesh{3}},
+                                           element,
+                                           mesh::Union{StructuredMesh{3}, P4estMesh{3}},
                                            nonconservative_terms::True, equations,
-                                           volume_flux, dg::DGSEM, cache, alpha=true)
-  @unpack derivative_split = dg.basis
-  @unpack contravariant_vectors = cache.elements
-  symmetric_flux, nonconservative_flux = volume_flux
-
-  # Apply the symmetric flux as usual
-  flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, dg, cache, alpha)
-
-  # Calculate the remaining volume terms using the nonsymmetric generalized flux
-  for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, j, k, element)
-
-    # pull the contravariant vectors in each coordinate direction
-    Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
-    Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
-    Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
-
-    # The diagonal terms are zero since the diagonal of `derivative_split`
-    # is zero. We ignore this for now.
-    # In general, nonconservative fluxes can depend on both the contravariant
-    # vectors (normal direction) at the current node and the averaged ones.
-    # Thus, we need to pass both to the nonconservative flux.
-
-    # x direction
-    integral_contribution = zero(u_node)
-    for ii in eachnode(dg)
-      u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
-      # pull the contravariant vectors and compute the average
-      Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, k, element)
-      Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii)
-      # compute the contravariant nonconservative flux in the direction of the
-      # averaged contravariant vector
-      fluxtilde1 = nonconservative_flux(u_node, u_node_ii, Ja1_node, Ja1_avg, equations)
-      integral_contribution = integral_contribution + derivative_split[i, ii] * fluxtilde1
-    end
+                                           volume_flux, dg::DGSEM, cache, alpha = true)
+    @unpack derivative_split = dg.basis
+    @unpack contravariant_vectors = cache.elements
+    symmetric_flux, nonconservative_flux = volume_flux
 
-    # y direction
-    for jj in eachnode(dg)
-      u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
-      # pull the contravariant vectors and compute the average
-      Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, k, element)
-      Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj)
-      # compute the contravariant nonconservative flux in the direction of the
-      # averaged contravariant vector
-      fluxtilde2 = nonconservative_flux(u_node, u_node_jj, Ja2_node, Ja2_avg, equations)
-      integral_contribution = integral_contribution + derivative_split[j, jj] * fluxtilde2
-    end
+    # Apply the symmetric flux as usual
+    flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux,
+                              dg, cache, alpha)
 
-    # z direction
-    for kk in eachnode(dg)
-      u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)
-      # pull the contravariant vectors and compute the average
-      Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors, i, j, kk, element)
-      Ja3_avg = 0.5 * (Ja3_node + Ja3_node_kk)
-      # compute the contravariant nonconservative flux in the direction of the
-      # averaged contravariant vector
-      fluxtilde3 = nonconservative_flux(u_node, u_node_kk, Ja3_node, Ja3_avg, equations)
-      integral_contribution = integral_contribution + derivative_split[k, kk] * fluxtilde3
+    # Calculate the remaining volume terms using the nonsymmetric generalized flux
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # pull the contravariant vectors in each coordinate direction
+        Ja1_node = get_contravariant_vector(1, contravariant_vectors, i, j, k, element)
+        Ja2_node = get_contravariant_vector(2, contravariant_vectors, i, j, k, element)
+        Ja3_node = get_contravariant_vector(3, contravariant_vectors, i, j, k, element)
+
+        # The diagonal terms are zero since the diagonal of `derivative_split`
+        # is zero. We ignore this for now.
+        # In general, nonconservative fluxes can depend on both the contravariant
+        # vectors (normal direction) at the current node and the averaged ones.
+        # Thus, we need to pass both to the nonconservative flux.
+
+        # x direction
+        integral_contribution = zero(u_node)
+        for ii in eachnode(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
+            # pull the contravariant vectors and compute the average
+            Ja1_node_ii = get_contravariant_vector(1, contravariant_vectors, ii, j, k,
+                                                   element)
+            Ja1_avg = 0.5 * (Ja1_node + Ja1_node_ii)
+            # compute the contravariant nonconservative flux in the direction of the
+            # averaged contravariant vector
+            fluxtilde1 = nonconservative_flux(u_node, u_node_ii, Ja1_node, Ja1_avg,
+                                              equations)
+            integral_contribution = integral_contribution +
+                                    derivative_split[i, ii] * fluxtilde1
+        end
+
+        # y direction
+        for jj in eachnode(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
+            # pull the contravariant vectors and compute the average
+            Ja2_node_jj = get_contravariant_vector(2, contravariant_vectors, i, jj, k,
+                                                   element)
+            Ja2_avg = 0.5 * (Ja2_node + Ja2_node_jj)
+            # compute the contravariant nonconservative flux in the direction of the
+            # averaged contravariant vector
+            fluxtilde2 = nonconservative_flux(u_node, u_node_jj, Ja2_node, Ja2_avg,
+                                              equations)
+            integral_contribution = integral_contribution +
+                                    derivative_split[j, jj] * fluxtilde2
+        end
+
+        # z direction
+        for kk in eachnode(dg)
+            u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)
+            # pull the contravariant vectors and compute the average
+            Ja3_node_kk = get_contravariant_vector(3, contravariant_vectors, i, j, kk,
+                                                   element)
+            Ja3_avg = 0.5 * (Ja3_node + Ja3_node_kk)
+            # compute the contravariant nonconservative flux in the direction of the
+            # averaged contravariant vector
+            fluxtilde3 = nonconservative_flux(u_node, u_node_kk, Ja3_node, Ja3_avg,
+                                              equations)
+            integral_contribution = integral_contribution +
+                                    derivative_split[k, kk] * fluxtilde3
+        end
+
+        # The factor 0.5 cancels the factor 2 in the flux differencing form
+        multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations,
+                                   dg, i, j, k, element)
     end
-
-    # The factor 0.5 cancels the factor 2 in the flux differencing form
-    multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, dg, i, j, k, element)
-  end
 end
 
-
 # Computing the normal vector for the FV method on curvilinear subcells.
 # To fulfill free-stream preservation we use the explicit formula B.53 in Appendix B.4
 # by Hennemann, Rueda-Ramirez, Hindenlang, Gassner (2020)
 # "A provably entropy stable subcell shock capturing approach for high order split form DG for the compressible Euler equations"
 # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044)
-@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u,
-                              mesh::Union{StructuredMesh{3}, P4estMesh{3}}, nonconservative_terms::False,
+@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L,
+                              fstar3_R, u,
+                              mesh::Union{StructuredMesh{3}, P4estMesh{3}},
+                              nonconservative_terms::False,
                               equations, volume_flux_fv, dg::DGSEM, element, cache)
-  @unpack contravariant_vectors = cache.elements
-  @unpack weights, derivative_matrix = dg.basis
+    @unpack contravariant_vectors = cache.elements
+    @unpack weights, derivative_matrix = dg.basis
 
-  # Performance improvement if the metric terms of the subcell FV method are only computed
-  # once at the beginning of the simulation, instead of at every Runge-Kutta stage
-  fstar1_L[:, 1,            :, :] .= zero(eltype(fstar1_L))
-  fstar1_L[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_L))
-  fstar1_R[:, 1,            :, :] .= zero(eltype(fstar1_R))
-  fstar1_R[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_R))
+    # Performance improvement if the metric terms of the subcell FV method are only computed
+    # once at the beginning of the simulation, instead of at every Runge-Kutta stage
+    fstar1_L[:, 1, :, :] .= zero(eltype(fstar1_L))
+    fstar1_L[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_L))
+    fstar1_R[:, 1, :, :] .= zero(eltype(fstar1_R))
+    fstar1_R[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_R))
 
-  for k in eachnode(dg), j in eachnode(dg)
-    normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, k, element)
+    for k in eachnode(dg), j in eachnode(dg)
+        normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, k,
+                                                    element)
 
-    for i in 2:nnodes(dg)
-      u_ll = get_node_vars(u, equations, dg, i-1, j, k, element)
-      u_rr = get_node_vars(u, equations, dg, i,   j, k, element)
+        for i in 2:nnodes(dg)
+            u_ll = get_node_vars(u, equations, dg, i - 1, j, k, element)
+            u_rr = get_node_vars(u, equations, dg, i, j, k, element)
 
-      for m in 1:nnodes(dg)
-        normal_direction += weights[i-1] * derivative_matrix[i-1, m] * get_contravariant_vector(1, contravariant_vectors, m, j, k, element)
-      end
+            for m in 1:nnodes(dg)
+                normal_direction += weights[i - 1] * derivative_matrix[i - 1, m] *
+                                    get_contravariant_vector(1, contravariant_vectors,
+                                                             m, j, k, element)
+            end
 
-      # Compute the contravariant flux
-      contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations)
+            # Compute the contravariant flux
+            contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations)
 
-      set_node_vars!(fstar1_L, contravariant_flux, equations, dg, i, j, k)
-      set_node_vars!(fstar1_R, contravariant_flux, equations, dg, i, j, k)
+            set_node_vars!(fstar1_L, contravariant_flux, equations, dg, i, j, k)
+            set_node_vars!(fstar1_R, contravariant_flux, equations, dg, i, j, k)
+        end
     end
-  end
 
-  fstar2_L[:, :, 1           , :] .= zero(eltype(fstar2_L))
-  fstar2_L[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_L))
-  fstar2_R[:, :, 1           , :] .= zero(eltype(fstar2_R))
-  fstar2_R[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_R))
+    fstar2_L[:, :, 1, :] .= zero(eltype(fstar2_L))
+    fstar2_L[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_L))
+    fstar2_R[:, :, 1, :] .= zero(eltype(fstar2_R))
+    fstar2_R[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_R))
 
-  for k in eachnode(dg), i in eachnode(dg)
-    normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, k, element)
+    for k in eachnode(dg), i in eachnode(dg)
+        normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, k,
+                                                    element)
 
-    for j in 2:nnodes(dg)
-      u_ll = get_node_vars(u, equations, dg, i, j-1, k, element)
-      u_rr = get_node_vars(u, equations, dg, i, j,   k, element)
+        for j in 2:nnodes(dg)
+            u_ll = get_node_vars(u, equations, dg, i, j - 1, k, element)
+            u_rr = get_node_vars(u, equations, dg, i, j, k, element)
 
-      for m in 1:nnodes(dg)
-        normal_direction += weights[j-1] * derivative_matrix[j-1, m] * get_contravariant_vector(2, contravariant_vectors, i, m, k, element)
-      end
+            for m in 1:nnodes(dg)
+                normal_direction += weights[j - 1] * derivative_matrix[j - 1, m] *
+                                    get_contravariant_vector(2, contravariant_vectors,
+                                                             i, m, k, element)
+            end
 
-      # Compute the contravariant flux
-      contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations)
+            # Compute the contravariant flux
+            contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations)
 
-      set_node_vars!(fstar2_L, contravariant_flux, equations, dg, i, j, k)
-      set_node_vars!(fstar2_R, contravariant_flux, equations, dg, i, j, k)
+            set_node_vars!(fstar2_L, contravariant_flux, equations, dg, i, j, k)
+            set_node_vars!(fstar2_R, contravariant_flux, equations, dg, i, j, k)
+        end
     end
-  end
 
-  fstar3_L[:, :, :, 1           ] .= zero(eltype(fstar3_L))
-  fstar3_L[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_L))
-  fstar3_R[:, :, :, 1           ] .= zero(eltype(fstar3_R))
-  fstar3_R[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_R))
+    fstar3_L[:, :, :, 1] .= zero(eltype(fstar3_L))
+    fstar3_L[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_L))
+    fstar3_R[:, :, :, 1] .= zero(eltype(fstar3_R))
+    fstar3_R[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_R))
 
-  for j in eachnode(dg), i in eachnode(dg)
-    normal_direction = get_contravariant_vector(3, contravariant_vectors, i, j, 1, element)
+    for j in eachnode(dg), i in eachnode(dg)
+        normal_direction = get_contravariant_vector(3, contravariant_vectors, i, j, 1,
+                                                    element)
 
-    for k in 2:nnodes(dg)
-      u_ll = get_node_vars(u, equations, dg, i, j, k-1, element)
-      u_rr = get_node_vars(u, equations, dg, i, j, k,   element)
+        for k in 2:nnodes(dg)
+            u_ll = get_node_vars(u, equations, dg, i, j, k - 1, element)
+            u_rr = get_node_vars(u, equations, dg, i, j, k, element)
 
-      for m in 1:nnodes(dg)
-        normal_direction += weights[k-1] * derivative_matrix[k-1, m] * get_contravariant_vector(3, contravariant_vectors, i, j, m, element)
-      end
+            for m in 1:nnodes(dg)
+                normal_direction += weights[k - 1] * derivative_matrix[k - 1, m] *
+                                    get_contravariant_vector(3, contravariant_vectors,
+                                                             i, j, m, element)
+            end
 
-      # Compute the contravariant flux
-      contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations)
+            # Compute the contravariant flux
+            contravariant_flux = volume_flux_fv(u_ll, u_rr, normal_direction, equations)
 
-      set_node_vars!(fstar3_L, contravariant_flux, equations, dg, i, j, k)
-      set_node_vars!(fstar3_R, contravariant_flux, equations, dg, i, j, k)
+            set_node_vars!(fstar3_L, contravariant_flux, equations, dg, i, j, k)
+            set_node_vars!(fstar3_R, contravariant_flux, equations, dg, i, j, k)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 # # Calculate the finite volume fluxes inside curvilinear elements (**with non-conservative terms**).
-@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u,
-                              mesh::Union{StructuredMesh{3}, P4estMesh{3}}, nonconservative_terms::True,
+@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L,
+                              fstar3_R, u,
+                              mesh::Union{StructuredMesh{3}, P4estMesh{3}},
+                              nonconservative_terms::True,
                               equations, volume_flux_fv, dg::DGSEM, element, cache)
-  @unpack contravariant_vectors = cache.elements
-  @unpack weights, derivative_matrix = dg.basis
-
-  volume_flux, nonconservative_flux = volume_flux_fv
-
-  # Performance improvement if the metric terms of the subcell FV method are only computed
-  # once at the beginning of the simulation, instead of at every Runge-Kutta stage
-  fstar1_L[:, 1,            :, :] .= zero(eltype(fstar1_L))
-  fstar1_L[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_L))
-  fstar1_R[:, 1,            :, :] .= zero(eltype(fstar1_R))
-  fstar1_R[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_R))
-
-  for k in eachnode(dg), j in eachnode(dg)
-    normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, k, element)
-
-    for i in 2:nnodes(dg)
-      u_ll = get_node_vars(u, equations, dg, i-1, j, k, element)
-      u_rr = get_node_vars(u, equations, dg, i,   j, k, element)
+    @unpack contravariant_vectors = cache.elements
+    @unpack weights, derivative_matrix = dg.basis
 
-      for m in eachnode(dg)
-        normal_direction += weights[i-1] * derivative_matrix[i-1, m] * get_contravariant_vector(1, contravariant_vectors, m, j, k, element)
-      end
+    volume_flux, nonconservative_flux = volume_flux_fv
 
-      # Compute the contravariant conservative flux
-      ftilde = volume_flux(u_ll, u_rr, normal_direction, equations)
+    # Performance improvement if the metric terms of the subcell FV method are only computed
+    # once at the beginning of the simulation, instead of at every Runge-Kutta stage
+    fstar1_L[:, 1, :, :] .= zero(eltype(fstar1_L))
+    fstar1_L[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_L))
+    fstar1_R[:, 1, :, :] .= zero(eltype(fstar1_R))
+    fstar1_R[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_R))
 
-      # Compute and add in the nonconservative part
-      # Note the factor 0.5 necessary for the nonconservative fluxes based on
-      # the interpretation of global SBP operators coupled discontinuously via
-      # central fluxes/SATs
-      ftilde_L = ftilde + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations)
-      ftilde_R = ftilde + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations)
-
-      set_node_vars!(fstar1_L, ftilde_L, equations, dg, i, j, k)
-      set_node_vars!(fstar1_R, ftilde_R, equations, dg, i, j, k)
+    for k in eachnode(dg), j in eachnode(dg)
+        normal_direction = get_contravariant_vector(1, contravariant_vectors, 1, j, k,
+                                                    element)
+
+        for i in 2:nnodes(dg)
+            u_ll = get_node_vars(u, equations, dg, i - 1, j, k, element)
+            u_rr = get_node_vars(u, equations, dg, i, j, k, element)
+
+            for m in eachnode(dg)
+                normal_direction += weights[i - 1] * derivative_matrix[i - 1, m] *
+                                    get_contravariant_vector(1, contravariant_vectors,
+                                                             m, j, k, element)
+            end
+
+            # Compute the contravariant conservative flux
+            ftilde = volume_flux(u_ll, u_rr, normal_direction, equations)
+
+            # Compute and add in the nonconservative part
+            # Note the factor 0.5 necessary for the nonconservative fluxes based on
+            # the interpretation of global SBP operators coupled discontinuously via
+            # central fluxes/SATs
+            ftilde_L = ftilde +
+                       0.5 * nonconservative_flux(u_ll, u_rr, normal_direction,
+                                            normal_direction, equations)
+            ftilde_R = ftilde +
+                       0.5 * nonconservative_flux(u_rr, u_ll, normal_direction,
+                                            normal_direction, equations)
+
+            set_node_vars!(fstar1_L, ftilde_L, equations, dg, i, j, k)
+            set_node_vars!(fstar1_R, ftilde_R, equations, dg, i, j, k)
+        end
     end
-  end
-
-  fstar2_L[:, :, 1           , :] .= zero(eltype(fstar2_L))
-  fstar2_L[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_L))
-  fstar2_R[:, :, 1           , :] .= zero(eltype(fstar2_R))
-  fstar2_R[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_R))
-
-  for k in eachnode(dg), i in eachnode(dg)
-    normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, k, element)
-
-    for j in 2:nnodes(dg)
-      u_ll = get_node_vars(u, equations, dg, i, j-1, k, element)
-      u_rr = get_node_vars(u, equations, dg, i, j,   k, element)
 
-      for m in eachnode(dg)
-        normal_direction += weights[j-1] * derivative_matrix[j-1, m] * get_contravariant_vector(2, contravariant_vectors, i, m, k, element)
-      end
+    fstar2_L[:, :, 1, :] .= zero(eltype(fstar2_L))
+    fstar2_L[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_L))
+    fstar2_R[:, :, 1, :] .= zero(eltype(fstar2_R))
+    fstar2_R[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_R))
 
-      # Compute the contravariant conservative flux
-      ftilde = volume_flux(u_ll, u_rr, normal_direction, equations)
-
-      # Compute and add in the nonconservative part
-      # Note the factor 0.5 necessary for the nonconservative fluxes based on
-      # the interpretation of global SBP operators coupled discontinuously via
-      # central fluxes/SATs
-      ftilde_L = ftilde + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations)
-      ftilde_R = ftilde + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations)
-
-      set_node_vars!(fstar2_L, ftilde_L, equations, dg, i, j, k)
-      set_node_vars!(fstar2_R, ftilde_R, equations, dg, i, j, k)
+    for k in eachnode(dg), i in eachnode(dg)
+        normal_direction = get_contravariant_vector(2, contravariant_vectors, i, 1, k,
+                                                    element)
+
+        for j in 2:nnodes(dg)
+            u_ll = get_node_vars(u, equations, dg, i, j - 1, k, element)
+            u_rr = get_node_vars(u, equations, dg, i, j, k, element)
+
+            for m in eachnode(dg)
+                normal_direction += weights[j - 1] * derivative_matrix[j - 1, m] *
+                                    get_contravariant_vector(2, contravariant_vectors,
+                                                             i, m, k, element)
+            end
+
+            # Compute the contravariant conservative flux
+            ftilde = volume_flux(u_ll, u_rr, normal_direction, equations)
+
+            # Compute and add in the nonconservative part
+            # Note the factor 0.5 necessary for the nonconservative fluxes based on
+            # the interpretation of global SBP operators coupled discontinuously via
+            # central fluxes/SATs
+            ftilde_L = ftilde +
+                       0.5 * nonconservative_flux(u_ll, u_rr, normal_direction,
+                                            normal_direction, equations)
+            ftilde_R = ftilde +
+                       0.5 * nonconservative_flux(u_rr, u_ll, normal_direction,
+                                            normal_direction, equations)
+
+            set_node_vars!(fstar2_L, ftilde_L, equations, dg, i, j, k)
+            set_node_vars!(fstar2_R, ftilde_R, equations, dg, i, j, k)
+        end
     end
-  end
-
-  fstar3_L[:, :, :, 1           ] .= zero(eltype(fstar3_L))
-  fstar3_L[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_L))
-  fstar3_R[:, :, :, 1           ] .= zero(eltype(fstar3_R))
-  fstar3_R[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_R))
-
-  for j in eachnode(dg), i in eachnode(dg)
-    normal_direction = get_contravariant_vector(3, contravariant_vectors, i, j, 1, element)
 
-    for k in 2:nnodes(dg)
-      u_ll = get_node_vars(u, equations, dg, i, j, k-1, element)
-      u_rr = get_node_vars(u, equations, dg, i, j, k,   element)
+    fstar3_L[:, :, :, 1] .= zero(eltype(fstar3_L))
+    fstar3_L[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_L))
+    fstar3_R[:, :, :, 1] .= zero(eltype(fstar3_R))
+    fstar3_R[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_R))
 
-      for m in eachnode(dg)
-        normal_direction += weights[k-1] * derivative_matrix[k-1, m] * get_contravariant_vector(3, contravariant_vectors, i, j, m, element)
-      end
-
-      # Compute the contravariant conservative flux
-      ftilde = volume_flux(u_ll, u_rr, normal_direction, equations)
-
-      # Compute and add in the nonconservative part
-      # Note the factor 0.5 necessary for the nonconservative fluxes based on
-      # the interpretation of global SBP operators coupled discontinuously via
-      # central fluxes/SATs
-      ftilde_L = ftilde + 0.5 * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations)
-      ftilde_R = ftilde + 0.5 * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations)
-
-      set_node_vars!(fstar3_L, ftilde_L, equations, dg, i, j, k)
-      set_node_vars!(fstar3_R, ftilde_R, equations, dg, i, j, k)
+    for j in eachnode(dg), i in eachnode(dg)
+        normal_direction = get_contravariant_vector(3, contravariant_vectors, i, j, 1,
+                                                    element)
+
+        for k in 2:nnodes(dg)
+            u_ll = get_node_vars(u, equations, dg, i, j, k - 1, element)
+            u_rr = get_node_vars(u, equations, dg, i, j, k, element)
+
+            for m in eachnode(dg)
+                normal_direction += weights[k - 1] * derivative_matrix[k - 1, m] *
+                                    get_contravariant_vector(3, contravariant_vectors,
+                                                             i, j, m, element)
+            end
+
+            # Compute the contravariant conservative flux
+            ftilde = volume_flux(u_ll, u_rr, normal_direction, equations)
+
+            # Compute and add in the nonconservative part
+            # Note the factor 0.5 necessary for the nonconservative fluxes based on
+            # the interpretation of global SBP operators coupled discontinuously via
+            # central fluxes/SATs
+            ftilde_L = ftilde +
+                       0.5 * nonconservative_flux(u_ll, u_rr, normal_direction,
+                                            normal_direction, equations)
+            ftilde_R = ftilde +
+                       0.5 * nonconservative_flux(u_rr, u_ll, normal_direction,
+                                            normal_direction, equations)
+
+            set_node_vars!(fstar3_L, ftilde_L, equations, dg, i, j, k)
+            set_node_vars!(fstar3_R, ftilde_R, equations, dg, i, j, k)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_interface_flux!(cache, u, mesh::StructuredMesh{3},
                               nonconservative_terms, # can be True/False
                               equations, surface_integral, dg::DG)
-  @unpack elements = cache
-
-  @threaded for element in eachelement(dg, cache)
-    # Interfaces in negative directions
-    # Faster version of "for orientation in (1, 2, 3)"
-
-    # Interfaces in x-direction (`orientation` = 1)
-    calc_interface_flux!(elements.surface_flux_values,
-                         elements.left_neighbors[1, element],
-                         element, 1, u, mesh,
-                         nonconservative_terms, equations,
-                         surface_integral, dg, cache)
-
-    # Interfaces in y-direction (`orientation` = 2)
-    calc_interface_flux!(elements.surface_flux_values,
-                         elements.left_neighbors[2, element],
-                         element, 2, u, mesh,
-                         nonconservative_terms, equations,
-                         surface_integral, dg, cache)
-
-    # Interfaces in z-direction (`orientation` = 3)
-    calc_interface_flux!(elements.surface_flux_values,
-                         elements.left_neighbors[3, element],
-                         element, 3, u, mesh,
-                         nonconservative_terms, equations,
-                         surface_integral, dg, cache)
-  end
-
-  return nothing
-end
+    @unpack elements = cache
+
+    @threaded for element in eachelement(dg, cache)
+        # Interfaces in negative directions
+        # Faster version of "for orientation in (1, 2, 3)"
+
+        # Interfaces in x-direction (`orientation` = 1)
+        calc_interface_flux!(elements.surface_flux_values,
+                             elements.left_neighbors[1, element],
+                             element, 1, u, mesh,
+                             nonconservative_terms, equations,
+                             surface_integral, dg, cache)
+
+        # Interfaces in y-direction (`orientation` = 2)
+        calc_interface_flux!(elements.surface_flux_values,
+                             elements.left_neighbors[2, element],
+                             element, 2, u, mesh,
+                             nonconservative_terms, equations,
+                             surface_integral, dg, cache)
+
+        # Interfaces in z-direction (`orientation` = 3)
+        calc_interface_flux!(elements.surface_flux_values,
+                             elements.left_neighbors[3, element],
+                             element, 3, u, mesh,
+                             nonconservative_terms, equations,
+                             surface_integral, dg, cache)
+    end
 
+    return nothing
+end
 
 @inline function calc_interface_flux!(surface_flux_values, left_element, right_element,
                                       orientation, u,
                                       mesh::StructuredMesh{3},
                                       nonconservative_terms::False, equations,
                                       surface_integral, dg::DG, cache)
-  # This is slow for LSA, but for some reason faster for Euler (see #519)
-  if left_element <= 0 # left_element = 0 at boundaries
-    return surface_flux_values
-  end
-
-  @unpack surface_flux = surface_integral
-  @unpack contravariant_vectors, inverse_jacobian = cache.elements
-
-  right_direction = 2 * orientation
-  left_direction = right_direction - 1
-
-  for j in eachnode(dg), i in eachnode(dg)
-    if orientation == 1
-      u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, j, left_element)
-      u_rr = get_node_vars(u, equations, dg, 1,          i, j, right_element)
-
-      # If the mapping is orientation-reversing, the contravariant vectors' orientation
-      # is reversed as well. The normal vector must be oriented in the direction
-      # from `left_element` to `right_element`, or the numerical flux will be computed
-      # incorrectly (downwind direction).
-      sign_jacobian = sign(inverse_jacobian[1, i, j, right_element])
-
-      # First contravariant vector Ja^1 as SVector
-      normal_direction = sign_jacobian * get_contravariant_vector(1, contravariant_vectors,
-                                                                  1, i, j, right_element)
-    elseif orientation == 2
-      u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), j, left_element)
-      u_rr = get_node_vars(u, equations, dg, i, 1,          j, right_element)
-
-      # See above
-      sign_jacobian = sign(inverse_jacobian[i, 1, j, right_element])
-
-      # Second contravariant vector Ja^2 as SVector
-      normal_direction = sign_jacobian * get_contravariant_vector(2, contravariant_vectors,
-                                                                  i, 1, j, right_element)
-    else # orientation == 3
-      u_ll = get_node_vars(u, equations, dg, i, j, nnodes(dg), left_element)
-      u_rr = get_node_vars(u, equations, dg, i, j, 1,          right_element)
-
-      # See above
-      sign_jacobian = sign(inverse_jacobian[i, j, 1, right_element])
-
-      # Third contravariant vector Ja^3 as SVector
-      normal_direction = sign_jacobian * get_contravariant_vector(3, contravariant_vectors,
-                                                                  i, j, 1, right_element)
+    # This is slow for LSA, but for some reason faster for Euler (see #519)
+    if left_element <= 0 # left_element = 0 at boundaries
+        return surface_flux_values
     end
 
-    # If the mapping is orientation-reversing, the normal vector will be reversed (see above).
-    # However, the flux now has the wrong sign, since we need the physical flux in normal direction.
-    flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations)
+    @unpack surface_flux = surface_integral
+    @unpack contravariant_vectors, inverse_jacobian = cache.elements
 
-    for v in eachvariable(equations)
-      surface_flux_values[v, i, j, right_direction, left_element] = flux[v]
-      surface_flux_values[v, i, j, left_direction, right_element] = flux[v]
+    right_direction = 2 * orientation
+    left_direction = right_direction - 1
+
+    for j in eachnode(dg), i in eachnode(dg)
+        if orientation == 1
+            u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, j, left_element)
+            u_rr = get_node_vars(u, equations, dg, 1, i, j, right_element)
+
+            # If the mapping is orientation-reversing, the contravariant vectors' orientation
+            # is reversed as well. The normal vector must be oriented in the direction
+            # from `left_element` to `right_element`, or the numerical flux will be computed
+            # incorrectly (downwind direction).
+            sign_jacobian = sign(inverse_jacobian[1, i, j, right_element])
+
+            # First contravariant vector Ja^1 as SVector
+            normal_direction = sign_jacobian *
+                               get_contravariant_vector(1, contravariant_vectors,
+                                                        1, i, j, right_element)
+        elseif orientation == 2
+            u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), j, left_element)
+            u_rr = get_node_vars(u, equations, dg, i, 1, j, right_element)
+
+            # See above
+            sign_jacobian = sign(inverse_jacobian[i, 1, j, right_element])
+
+            # Second contravariant vector Ja^2 as SVector
+            normal_direction = sign_jacobian *
+                               get_contravariant_vector(2, contravariant_vectors,
+                                                        i, 1, j, right_element)
+        else # orientation == 3
+            u_ll = get_node_vars(u, equations, dg, i, j, nnodes(dg), left_element)
+            u_rr = get_node_vars(u, equations, dg, i, j, 1, right_element)
+
+            # See above
+            sign_jacobian = sign(inverse_jacobian[i, j, 1, right_element])
+
+            # Third contravariant vector Ja^3 as SVector
+            normal_direction = sign_jacobian *
+                               get_contravariant_vector(3, contravariant_vectors,
+                                                        i, j, 1, right_element)
+        end
+
+        # If the mapping is orientation-reversing, the normal vector will be reversed (see above).
+        # However, the flux now has the wrong sign, since we need the physical flux in normal direction.
+        flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations)
+
+        for v in eachvariable(equations)
+            surface_flux_values[v, i, j, right_direction, left_element] = flux[v]
+            surface_flux_values[v, i, j, left_direction, right_element] = flux[v]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 @inline function calc_interface_flux!(surface_flux_values, left_element, right_element,
@@ -529,181 +596,198 @@ end
                                       mesh::StructuredMesh{3},
                                       nonconservative_terms::True, equations,
                                       surface_integral, dg::DG, cache)
-  # See comment on `calc_interface_flux!` with `nonconservative_terms::False`
-  if left_element <= 0 # left_element = 0 at boundaries
-    return surface_flux_values
-  end
-
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-  @unpack contravariant_vectors, inverse_jacobian = cache.elements
-
-  right_direction = 2 * orientation
-  left_direction = right_direction - 1
-
-  for j in eachnode(dg), i in eachnode(dg)
-    if orientation == 1
-      u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, j, left_element)
-      u_rr = get_node_vars(u, equations, dg, 1,          i, j, right_element)
-
-      # If the mapping is orientation-reversing, the contravariant vectors' orientation
-      # is reversed as well. The normal vector must be oriented in the direction
-      # from `left_element` to `right_element`, or the numerical flux will be computed
-      # incorrectly (downwind direction).
-      sign_jacobian = sign(inverse_jacobian[1, i, j, right_element])
-
-      # First contravariant vector Ja^1 as SVector
-      normal_direction = sign_jacobian * get_contravariant_vector(1, contravariant_vectors,
-                                                                  1, i, j, right_element)
-    elseif orientation == 2
-      u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), j, left_element)
-      u_rr = get_node_vars(u, equations, dg, i, 1,          j, right_element)
-
-      # See above
-      sign_jacobian = sign(inverse_jacobian[i, 1, j, right_element])
-
-      # Second contravariant vector Ja^2 as SVector
-      normal_direction = sign_jacobian * get_contravariant_vector(2, contravariant_vectors,
-                                                                  i, 1, j, right_element)
-    else # orientation == 3
-      u_ll = get_node_vars(u, equations, dg, i, j, nnodes(dg), left_element)
-      u_rr = get_node_vars(u, equations, dg, i, j, 1,          right_element)
-
-      # See above
-      sign_jacobian = sign(inverse_jacobian[i, j, 1, right_element])
-
-      # Third contravariant vector Ja^3 as SVector
-      normal_direction = sign_jacobian * get_contravariant_vector(3, contravariant_vectors,
-                                                                  i, j, 1, right_element)
+    # See comment on `calc_interface_flux!` with `nonconservative_terms::False`
+    if left_element <= 0 # left_element = 0 at boundaries
+        return surface_flux_values
     end
 
-    # If the mapping is orientation-reversing, the normal vector will be reversed (see above).
-    # However, the flux now has the wrong sign, since we need the physical flux in normal direction.
-    flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations)
-
-    # Compute both nonconservative fluxes
-    # In general, nonconservative fluxes can depend on both the contravariant
-    # vectors (normal direction) at the current node and the averaged ones.
-    # However, both are the same at watertight interfaces, so we pass the
-    # `normal_direction` twice.
-    # Scale with sign_jacobian to ensure that the normal_direction matches that
-    # from the flux above
-    noncons_left  = sign_jacobian * nonconservative_flux(u_ll, u_rr, normal_direction, normal_direction, equations)
-    noncons_right = sign_jacobian * nonconservative_flux(u_rr, u_ll, normal_direction, normal_direction, equations)
-
-    for v in eachvariable(equations)
-      # Note the factor 0.5 necessary for the nonconservative fluxes based on
-      # the interpretation of global SBP operators coupled discontinuously via
-      # central fluxes/SATs
-      surface_flux_values[v, i, j, right_direction, left_element] = flux[v] + 0.5 * noncons_left[v]
-      surface_flux_values[v, i, j, left_direction, right_element] = flux[v] + 0.5 * noncons_right[v]
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack contravariant_vectors, inverse_jacobian = cache.elements
+
+    right_direction = 2 * orientation
+    left_direction = right_direction - 1
+
+    for j in eachnode(dg), i in eachnode(dg)
+        if orientation == 1
+            u_ll = get_node_vars(u, equations, dg, nnodes(dg), i, j, left_element)
+            u_rr = get_node_vars(u, equations, dg, 1, i, j, right_element)
+
+            # If the mapping is orientation-reversing, the contravariant vectors' orientation
+            # is reversed as well. The normal vector must be oriented in the direction
+            # from `left_element` to `right_element`, or the numerical flux will be computed
+            # incorrectly (downwind direction).
+            sign_jacobian = sign(inverse_jacobian[1, i, j, right_element])
+
+            # First contravariant vector Ja^1 as SVector
+            normal_direction = sign_jacobian *
+                               get_contravariant_vector(1, contravariant_vectors,
+                                                        1, i, j, right_element)
+        elseif orientation == 2
+            u_ll = get_node_vars(u, equations, dg, i, nnodes(dg), j, left_element)
+            u_rr = get_node_vars(u, equations, dg, i, 1, j, right_element)
+
+            # See above
+            sign_jacobian = sign(inverse_jacobian[i, 1, j, right_element])
+
+            # Second contravariant vector Ja^2 as SVector
+            normal_direction = sign_jacobian *
+                               get_contravariant_vector(2, contravariant_vectors,
+                                                        i, 1, j, right_element)
+        else # orientation == 3
+            u_ll = get_node_vars(u, equations, dg, i, j, nnodes(dg), left_element)
+            u_rr = get_node_vars(u, equations, dg, i, j, 1, right_element)
+
+            # See above
+            sign_jacobian = sign(inverse_jacobian[i, j, 1, right_element])
+
+            # Third contravariant vector Ja^3 as SVector
+            normal_direction = sign_jacobian *
+                               get_contravariant_vector(3, contravariant_vectors,
+                                                        i, j, 1, right_element)
+        end
+
+        # If the mapping is orientation-reversing, the normal vector will be reversed (see above).
+        # However, the flux now has the wrong sign, since we need the physical flux in normal direction.
+        flux = sign_jacobian * surface_flux(u_ll, u_rr, normal_direction, equations)
+
+        # Compute both nonconservative fluxes
+        # In general, nonconservative fluxes can depend on both the contravariant
+        # vectors (normal direction) at the current node and the averaged ones.
+        # However, both are the same at watertight interfaces, so we pass the
+        # `normal_direction` twice.
+        # Scale with sign_jacobian to ensure that the normal_direction matches that
+        # from the flux above
+        noncons_left = sign_jacobian *
+                       nonconservative_flux(u_ll, u_rr, normal_direction,
+                                            normal_direction, equations)
+        noncons_right = sign_jacobian *
+                        nonconservative_flux(u_rr, u_ll, normal_direction,
+                                             normal_direction, equations)
+
+        for v in eachvariable(equations)
+            # Note the factor 0.5 necessary for the nonconservative fluxes based on
+            # the interpretation of global SBP operators coupled discontinuously via
+            # central fluxes/SATs
+            surface_flux_values[v, i, j, right_direction, left_element] = flux[v] +
+                                                                          0.5 *
+                                                                          noncons_left[v]
+            surface_flux_values[v, i, j, left_direction, right_element] = flux[v] +
+                                                                          0.5 *
+                                                                          noncons_right[v]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # TODO: Taal dimension agnostic
 function calc_boundary_flux!(cache, u, t, boundary_condition::BoundaryConditionPeriodic,
-                             mesh::StructuredMesh{3}, equations, surface_integral, dg::DG)
-  @assert isperiodic(mesh)
+                             mesh::StructuredMesh{3}, equations, surface_integral,
+                             dg::DG)
+    @assert isperiodic(mesh)
 end
 
 function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple,
-                             mesh::StructuredMesh{3}, equations, surface_integral, dg::DG)
-  @unpack surface_flux_values = cache.elements
-  linear_indices = LinearIndices(size(mesh))
-
-  for cell_z in axes(mesh, 3), cell_y in axes(mesh, 2)
-    # Negative x-direction
-    direction = 1
-    element = linear_indices[begin, cell_y, cell_z]
-
-    for k in eachnode(dg), j in eachnode(dg)
-      calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1,
-                                       boundary_conditions[direction],
-                                       mesh, equations, surface_integral, dg, cache,
-                                       direction, (1, j, k), (j, k), element)
+                             mesh::StructuredMesh{3}, equations, surface_integral,
+                             dg::DG)
+    @unpack surface_flux_values = cache.elements
+    linear_indices = LinearIndices(size(mesh))
+
+    for cell_z in axes(mesh, 3), cell_y in axes(mesh, 2)
+        # Negative x-direction
+        direction = 1
+        element = linear_indices[begin, cell_y, cell_z]
+
+        for k in eachnode(dg), j in eachnode(dg)
+            calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1,
+                                             boundary_conditions[direction],
+                                             mesh, equations, surface_integral, dg,
+                                             cache,
+                                             direction, (1, j, k), (j, k), element)
+        end
+
+        # Positive x-direction
+        direction = 2
+        element = linear_indices[end, cell_y, cell_z]
+
+        for k in eachnode(dg), j in eachnode(dg)
+            calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1,
+                                             boundary_conditions[direction],
+                                             mesh, equations, surface_integral, dg,
+                                             cache,
+                                             direction, (nnodes(dg), j, k), (j, k),
+                                             element)
+        end
     end
 
-    # Positive x-direction
-    direction = 2
-    element = linear_indices[end, cell_y, cell_z]
-
-    for k in eachnode(dg), j in eachnode(dg)
-      calc_boundary_flux_by_direction!(surface_flux_values, u, t, 1,
-                                       boundary_conditions[direction],
-                                       mesh, equations, surface_integral, dg, cache,
-                                       direction, (nnodes(dg), j, k), (j, k), element)
+    for cell_z in axes(mesh, 3), cell_x in axes(mesh, 1)
+        # Negative y-direction
+        direction = 3
+        element = linear_indices[cell_x, begin, cell_z]
+
+        for k in eachnode(dg), i in eachnode(dg)
+            calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2,
+                                             boundary_conditions[direction],
+                                             mesh, equations, surface_integral, dg,
+                                             cache,
+                                             direction, (i, 1, k), (i, k), element)
+        end
+
+        # Positive y-direction
+        direction = 4
+        element = linear_indices[cell_x, end, cell_z]
+
+        for k in eachnode(dg), i in eachnode(dg)
+            calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2,
+                                             boundary_conditions[direction],
+                                             mesh, equations, surface_integral, dg,
+                                             cache,
+                                             direction, (i, nnodes(dg), k), (i, k),
+                                             element)
+        end
     end
-  end
-
-  for cell_z in axes(mesh, 3), cell_x in axes(mesh, 1)
-    # Negative y-direction
-    direction = 3
-    element = linear_indices[cell_x, begin, cell_z]
 
-    for k in eachnode(dg), i in eachnode(dg)
-      calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2,
-                                       boundary_conditions[direction],
-                                       mesh, equations, surface_integral, dg, cache,
-                                       direction, (i, 1, k), (i, k), element)
-    end
-
-    # Positive y-direction
-    direction = 4
-    element = linear_indices[cell_x, end, cell_z]
-
-    for k in eachnode(dg), i in eachnode(dg)
-      calc_boundary_flux_by_direction!(surface_flux_values, u, t, 2,
-                                       boundary_conditions[direction],
-                                       mesh, equations, surface_integral, dg, cache,
-                                       direction, (i, nnodes(dg), k), (i, k), element)
+    for cell_y in axes(mesh, 2), cell_x in axes(mesh, 1)
+        # Negative z-direction
+        direction = 5
+        element = linear_indices[cell_x, cell_y, begin]
+
+        for j in eachnode(dg), i in eachnode(dg)
+            calc_boundary_flux_by_direction!(surface_flux_values, u, t, 3,
+                                             boundary_conditions[direction],
+                                             mesh, equations, surface_integral, dg,
+                                             cache,
+                                             direction, (i, j, 1), (i, j), element)
+        end
+
+        # Positive z-direction
+        direction = 6
+        element = linear_indices[cell_x, cell_y, end]
+
+        for j in eachnode(dg), i in eachnode(dg)
+            calc_boundary_flux_by_direction!(surface_flux_values, u, t, 3,
+                                             boundary_conditions[direction],
+                                             mesh, equations, surface_integral, dg,
+                                             cache,
+                                             direction, (i, j, nnodes(dg)), (i, j),
+                                             element)
+        end
     end
-  end
-
-  for cell_y in axes(mesh, 2), cell_x in axes(mesh, 1)
-    # Negative z-direction
-    direction = 5
-    element = linear_indices[cell_x, cell_y, begin]
-
-    for j in eachnode(dg), i in eachnode(dg)
-      calc_boundary_flux_by_direction!(surface_flux_values, u, t, 3,
-                                       boundary_conditions[direction],
-                                       mesh, equations, surface_integral, dg, cache,
-                                       direction, (i, j, 1), (i, j), element)
-    end
-
-    # Positive z-direction
-    direction = 6
-    element = linear_indices[cell_x, cell_y, end]
-
-    for j in eachnode(dg), i in eachnode(dg)
-      calc_boundary_flux_by_direction!(surface_flux_values, u, t, 3,
-                                       boundary_conditions[direction],
-                                       mesh, equations, surface_integral, dg, cache,
-                                       direction, (i, j, nnodes(dg)), (i, j), element)
-    end
-  end
 end
 
-
 function apply_jacobian!(du,
                          mesh::Union{StructuredMesh{3}, P4estMesh{3}},
                          equations, dg::DG, cache)
-
-  @threaded for element in eachelement(dg, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      factor = -cache.elements.inverse_jacobian[i, j, k, element]
-
-      for v in eachvariable(equations)
-        du[v, i, j, k, element] *= factor
-      end
+    @threaded for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            factor = -cache.elements.inverse_jacobian[i, j, k, element]
+
+            for v in eachvariable(equations)
+                du[v, i, j, k, element] *= factor
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl
index 8caff2eff6b..64a3456b940 100644
--- a/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl
+++ b/src/solvers/dgsem_structured/dg_3d_compressible_euler.jl
@@ -14,733 +14,774 @@
 # We do not wrap this code in `@muladd begin ... end` block. Optimizations like
 # this are handled automatically by LoopVectorization.jl.
 
-
 # We specialize on `PtrArray` since these will be returned by `Trixi.wrap_array`
 # if LoopVectorization.jl can handle the array types. This ensures that `@turbo`
 # works efficiently here.
 @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray,
-                                           element, mesh::Union{StructuredMesh{3}, P4estMesh{3}},
+                                           element,
+                                           mesh::Union{StructuredMesh{3}, P4estMesh{3}},
                                            nonconservative_terms::False,
                                            equations::CompressibleEulerEquations3D,
                                            volume_flux::typeof(flux_shima_etal_turbo),
                                            dg::DGSEM, cache, alpha)
-  @unpack derivative_split = dg.basis
-  @unpack contravariant_vectors = cache.elements
-
-  # Create a temporary array that will be used to store the RHS with permuted
-  # indices `[i, j, k, v]` to allow using SIMD instructions.
-  # `StrideArray`s with purely static dimensions do not allocate on the heap.
-  du = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations))))
-
-  # Convert conserved to primitive variables on the given `element`.
-  u_prim = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations))))
-
-  @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    rho    = u_cons[1, i, j, k, element]
-    rho_v1 = u_cons[2, i, j, k, element]
-    rho_v2 = u_cons[3, i, j, k, element]
-    rho_v3 = u_cons[4, i, j, k, element]
-    rho_e  = u_cons[5, i, j, k, element]
-
-    v1 = rho_v1 / rho
-    v2 = rho_v2 / rho
-    v3 = rho_v3 / rho
-    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
-
-    u_prim[i, j, k, 1] = rho
-    u_prim[i, j, k, 2] = v1
-    u_prim[i, j, k, 3] = v2
-    u_prim[i, j, k, 4] = v3
-    u_prim[i, j, k, 5] = p
-  end
-
-
-  # x direction
-  # At first, we create new temporary arrays with permuted memory layout to
-  # allow using SIMD instructions along the first dimension (which is contiguous
-  # in memory).
-  du_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations))))
-
-  u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations))))
-
-  @turbo for v in eachvariable(equations),
-             k in eachnode(dg),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    jk = j + nnodes(dg) * (k- 1)
-    u_prim_permuted[jk, i, v] = u_prim[i, j, k, v]
-  end
-  fill!(du_permuted, zero(eltype(du_permuted)))
-
-  # We must also permute the contravariant vectors.
-  contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef,
-    (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-     StaticInt(ndims(mesh))))
-
-  @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    jk = j + nnodes(dg) * (k- 1)
-    contravariant_vectors_x[jk, i, 1] = contravariant_vectors[1, 1, i, j, k, element]
-    contravariant_vectors_x[jk, i, 2] = contravariant_vectors[2, 1, i, j, k, element]
-    contravariant_vectors_x[jk, i, 3] = contravariant_vectors[3, 1, i, j, k, element]
-  end
-
-  # Next, we basically inline the volume flux. To allow SIMD vectorization and
-  # still use the symmetry of the volume flux and the derivative matrix, we
-  # loop over the triangular part in an outer loop and use a plain inner loop.
-  for i in eachnode(dg), ii in (i+1):nnodes(dg)
-    @turbo for jk in Base.OneTo(nnodes(dg)^2)
-      rho_ll = u_prim_permuted[jk, i, 1]
-      v1_ll  = u_prim_permuted[jk, i, 2]
-      v2_ll  = u_prim_permuted[jk, i, 3]
-      v3_ll  = u_prim_permuted[jk, i, 4]
-      p_ll   = u_prim_permuted[jk, i, 5]
-
-      rho_rr = u_prim_permuted[jk, ii, 1]
-      v1_rr  = u_prim_permuted[jk, ii, 2]
-      v2_rr  = u_prim_permuted[jk, ii, 3]
-      v3_rr  = u_prim_permuted[jk, ii, 4]
-      p_rr   = u_prim_permuted[jk, ii, 5]
-
-      normal_direction_1 = 0.5 * (
-        contravariant_vectors_x[jk, i, 1] + contravariant_vectors_x[jk, ii, 1])
-      normal_direction_2 = 0.5 * (
-        contravariant_vectors_x[jk, i, 2] + contravariant_vectors_x[jk, ii, 2])
-      normal_direction_3 = 0.5 * (
-        contravariant_vectors_x[jk, i, 3] + contravariant_vectors_x[jk, ii, 3])
-
-      v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v3_ll * normal_direction_3
-      v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + v3_rr * normal_direction_3
-
-      # Compute required mean values
-      rho_avg = 0.5 * (rho_ll + rho_rr)
-      v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-      v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-      v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-      v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr)
-      p_avg   = 0.5 * (  p_ll +   p_rr)
-      velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-
-      # Calculate fluxes depending on normal_direction
-      f1 = rho_avg * v_dot_n_avg
-      f2 = f1 * v1_avg + p_avg * normal_direction_1
-      f3 = f1 * v2_avg + p_avg * normal_direction_2
-      f4 = f1 * v3_avg + p_avg * normal_direction_3
-      f5 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
-            + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-      # Add scaled fluxes to RHS
-      factor_i = alpha * derivative_split[i, ii]
-      du_permuted[jk, i, 1] += factor_i * f1
-      du_permuted[jk, i, 2] += factor_i * f2
-      du_permuted[jk, i, 3] += factor_i * f3
-      du_permuted[jk, i, 4] += factor_i * f4
-      du_permuted[jk, i, 5] += factor_i * f5
-
-      factor_ii = alpha * derivative_split[ii, i]
-      du_permuted[jk, ii, 1] += factor_ii * f1
-      du_permuted[jk, ii, 2] += factor_ii * f2
-      du_permuted[jk, ii, 3] += factor_ii * f3
-      du_permuted[jk, ii, 4] += factor_ii * f4
-      du_permuted[jk, ii, 5] += factor_ii * f5
+    @unpack derivative_split = dg.basis
+    @unpack contravariant_vectors = cache.elements
+
+    # Create a temporary array that will be used to store the RHS with permuted
+    # indices `[i, j, k, v]` to allow using SIMD instructions.
+    # `StrideArray`s with purely static dimensions do not allocate on the heap.
+    du = StrideArray{eltype(u_cons)}(undef,
+                                     (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
+                                      StaticInt(nvariables(equations))))
+
+    # Convert conserved to primitive variables on the given `element`.
+    u_prim = StrideArray{eltype(u_cons)}(undef,
+                                         (ntuple(_ -> StaticInt(nnodes(dg)),
+                                                 ndims(mesh))...,
+                                          StaticInt(nvariables(equations))))
+
+    @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        rho = u_cons[1, i, j, k, element]
+        rho_v1 = u_cons[2, i, j, k, element]
+        rho_v2 = u_cons[3, i, j, k, element]
+        rho_v3 = u_cons[4, i, j, k, element]
+        rho_e = u_cons[5, i, j, k, element]
+
+        v1 = rho_v1 / rho
+        v2 = rho_v2 / rho
+        v3 = rho_v3 / rho
+        p = (equations.gamma - 1) *
+            (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
+
+        u_prim[i, j, k, 1] = rho
+        u_prim[i, j, k, 2] = v1
+        u_prim[i, j, k, 3] = v2
+        u_prim[i, j, k, 4] = v3
+        u_prim[i, j, k, 5] = p
     end
-  end
-
-  @turbo for v in eachvariable(equations),
-             k in eachnode(dg),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    jk = j + nnodes(dg) * (k- 1)
-    du[i, j, k, v] = du_permuted[jk, i, v]
-  end
-
-
-  # y direction
-  # We must also permute the contravariant vectors.
-  contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(ndims(mesh))))
-
-  @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element]
-    contravariant_vectors_y[i, j, k, 2] = contravariant_vectors[2, 2, i, j, k, element]
-    contravariant_vectors_y[i, j, k, 3] = contravariant_vectors[3, 2, i, j, k, element]
-  end
-
-  # A possible permutation of array dimensions with improved opportunities for
-  # SIMD vectorization appeared to be slower than the direct version used here
-  # in preliminary numerical experiments on an AVX2 system.
-  for j in eachnode(dg), jj in (j+1):nnodes(dg)
-    @turbo for k in eachnode(dg), i in eachnode(dg)
-      rho_ll = u_prim[i, j, k, 1]
-      v1_ll  = u_prim[i, j, k, 2]
-      v2_ll  = u_prim[i, j, k, 3]
-      v3_ll  = u_prim[i, j, k, 4]
-      p_ll   = u_prim[i, j, k, 5]
-
-      rho_rr = u_prim[i, jj, k, 1]
-      v1_rr  = u_prim[i, jj, k, 2]
-      v2_rr  = u_prim[i, jj, k, 3]
-      v3_rr  = u_prim[i, jj, k, 4]
-      p_rr   = u_prim[i, jj, k, 5]
-
-      normal_direction_1 = 0.5 * (
-        contravariant_vectors_y[i, j, k, 1] + contravariant_vectors_y[i, jj, k, 1])
-      normal_direction_2 = 0.5 * (
-        contravariant_vectors_y[i, j, k, 2] + contravariant_vectors_y[i, jj, k, 2])
-      normal_direction_3 = 0.5 * (
-        contravariant_vectors_y[i, j, k, 3] + contravariant_vectors_y[i, jj, k, 3])
-
-      v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v3_ll * normal_direction_3
-      v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + v3_rr * normal_direction_3
-
-      # Compute required mean values
-      rho_avg = 0.5 * (rho_ll + rho_rr)
-      v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-      v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-      v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-      v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr)
-      p_avg   = 0.5 * (  p_ll +   p_rr)
-      velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-
-      # Calculate fluxes depending on normal_direction
-      f1 = rho_avg * v_dot_n_avg
-      f2 = f1 * v1_avg + p_avg * normal_direction_1
-      f3 = f1 * v2_avg + p_avg * normal_direction_2
-      f4 = f1 * v3_avg + p_avg * normal_direction_3
-      f5 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
-            + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-      # Add scaled fluxes to RHS
-      factor_j = alpha * derivative_split[j, jj]
-      du[i, j, k, 1] += factor_j * f1
-      du[i, j, k, 2] += factor_j * f2
-      du[i, j, k, 3] += factor_j * f3
-      du[i, j, k, 4] += factor_j * f4
-      du[i, j, k, 5] += factor_j * f5
-
-      factor_jj = alpha * derivative_split[jj, j]
-      du[i, jj, k, 1] += factor_jj * f1
-      du[i, jj, k, 2] += factor_jj * f2
-      du[i, jj, k, 3] += factor_jj * f3
-      du[i, jj, k, 4] += factor_jj * f4
-      du[i, jj, k, 5] += factor_jj * f5
+
+    # x direction
+    # At first, we create new temporary arrays with permuted memory layout to
+    # allow using SIMD instructions along the first dimension (which is contiguous
+    # in memory).
+    du_permuted = StrideArray{eltype(u_cons)}(undef,
+                                              (StaticInt(nnodes(dg)^2),
+                                               StaticInt(nnodes(dg)),
+                                               StaticInt(nvariables(equations))))
+
+    u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
+                                                  (StaticInt(nnodes(dg)^2),
+                                                   StaticInt(nnodes(dg)),
+                                                   StaticInt(nvariables(equations))))
+
+    @turbo for v in eachvariable(equations),
+               k in eachnode(dg),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        jk = j + nnodes(dg) * (k - 1)
+        u_prim_permuted[jk, i, v] = u_prim[i, j, k, v]
+    end
+    fill!(du_permuted, zero(eltype(du_permuted)))
+
+    # We must also permute the contravariant vectors.
+    contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef,
+                                                                         (StaticInt(nnodes(dg)^2),
+                                                                          StaticInt(nnodes(dg)),
+                                                                          StaticInt(ndims(mesh))))
+
+    @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        jk = j + nnodes(dg) * (k - 1)
+        contravariant_vectors_x[jk, i, 1] = contravariant_vectors[1, 1, i, j, k, element]
+        contravariant_vectors_x[jk, i, 2] = contravariant_vectors[2, 1, i, j, k, element]
+        contravariant_vectors_x[jk, i, 3] = contravariant_vectors[3, 1, i, j, k, element]
     end
-  end
 
+    # Next, we basically inline the volume flux. To allow SIMD vectorization and
+    # still use the symmetry of the volume flux and the derivative matrix, we
+    # loop over the triangular part in an outer loop and use a plain inner loop.
+    for i in eachnode(dg), ii in (i + 1):nnodes(dg)
+        @turbo for jk in Base.OneTo(nnodes(dg)^2)
+            rho_ll = u_prim_permuted[jk, i, 1]
+            v1_ll = u_prim_permuted[jk, i, 2]
+            v2_ll = u_prim_permuted[jk, i, 3]
+            v3_ll = u_prim_permuted[jk, i, 4]
+            p_ll = u_prim_permuted[jk, i, 5]
+
+            rho_rr = u_prim_permuted[jk, ii, 1]
+            v1_rr = u_prim_permuted[jk, ii, 2]
+            v2_rr = u_prim_permuted[jk, ii, 3]
+            v3_rr = u_prim_permuted[jk, ii, 4]
+            p_rr = u_prim_permuted[jk, ii, 5]
+
+            normal_direction_1 = 0.5 * (contravariant_vectors_x[jk, i, 1] +
+                                  contravariant_vectors_x[jk, ii, 1])
+            normal_direction_2 = 0.5 * (contravariant_vectors_x[jk, i, 2] +
+                                  contravariant_vectors_x[jk, ii, 2])
+            normal_direction_3 = 0.5 * (contravariant_vectors_x[jk, i, 3] +
+                                  contravariant_vectors_x[jk, ii, 3])
+
+            v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 +
+                         v3_ll * normal_direction_3
+            v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 +
+                         v3_rr * normal_direction_3
+
+            # Compute required mean values
+            rho_avg = 0.5 * (rho_ll + rho_rr)
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            v3_avg = 0.5 * (v3_ll + v3_rr)
+            v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+            # Calculate fluxes depending on normal_direction
+            f1 = rho_avg * v_dot_n_avg
+            f2 = f1 * v1_avg + p_avg * normal_direction_1
+            f3 = f1 * v2_avg + p_avg * normal_direction_2
+            f4 = f1 * v3_avg + p_avg * normal_direction_3
+            f5 = (f1 * velocity_square_avg +
+                  p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
+                  + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+            # Add scaled fluxes to RHS
+            factor_i = alpha * derivative_split[i, ii]
+            du_permuted[jk, i, 1] += factor_i * f1
+            du_permuted[jk, i, 2] += factor_i * f2
+            du_permuted[jk, i, 3] += factor_i * f3
+            du_permuted[jk, i, 4] += factor_i * f4
+            du_permuted[jk, i, 5] += factor_i * f5
+
+            factor_ii = alpha * derivative_split[ii, i]
+            du_permuted[jk, ii, 1] += factor_ii * f1
+            du_permuted[jk, ii, 2] += factor_ii * f2
+            du_permuted[jk, ii, 3] += factor_ii * f3
+            du_permuted[jk, ii, 4] += factor_ii * f4
+            du_permuted[jk, ii, 5] += factor_ii * f5
+        end
+    end
 
-  # z direction
-  # The memory layout is already optimal for SIMD vectorization in this loop.
-  # We just squeeze the first two dimensions to make the code slightly faster.
-  GC.@preserve u_prim begin
-    u_prim_reshaped = PtrArray(pointer(u_prim),
-      (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-       StaticInt(nvariables(equations))))
+    @turbo for v in eachvariable(equations),
+               k in eachnode(dg),
+               j in eachnode(dg),
+               i in eachnode(dg)
 
-    du_reshaped = PtrArray(pointer(du),
-      (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-       StaticInt(nvariables(equations))))
+        jk = j + nnodes(dg) * (k - 1)
+        du[i, j, k, v] = du_permuted[jk, i, v]
+    end
 
+    # y direction
     # We must also permute the contravariant vectors.
-    contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef,
-      (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-       StaticInt(ndims(mesh))))
+    contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef,
+                                                                         (StaticInt(nnodes(dg)),
+                                                                          StaticInt(nnodes(dg)),
+                                                                          StaticInt(nnodes(dg)),
+                                                                          StaticInt(ndims(mesh))))
 
     @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      ij = i + nnodes(dg) * (j- 1)
-      contravariant_vectors_z[ij, k, 1] = contravariant_vectors[1, 3, i, j, k, element]
-      contravariant_vectors_z[ij, k, 2] = contravariant_vectors[2, 3, i, j, k, element]
-      contravariant_vectors_z[ij, k, 3] = contravariant_vectors[3, 3, i, j, k, element]
+        contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element]
+        contravariant_vectors_y[i, j, k, 2] = contravariant_vectors[2, 2, i, j, k, element]
+        contravariant_vectors_y[i, j, k, 3] = contravariant_vectors[3, 2, i, j, k, element]
     end
 
-    for k in eachnode(dg), kk in (k+1):nnodes(dg)
-      @turbo for ij in Base.OneTo(nnodes(dg)^2)
-        rho_ll = u_prim_reshaped[ij, k, 1]
-        v1_ll  = u_prim_reshaped[ij, k, 2]
-        v2_ll  = u_prim_reshaped[ij, k, 3]
-        v3_ll  = u_prim_reshaped[ij, k, 4]
-        p_ll   = u_prim_reshaped[ij, k, 5]
-
-        rho_rr = u_prim_reshaped[ij, kk, 1]
-        v1_rr  = u_prim_reshaped[ij, kk, 2]
-        v2_rr  = u_prim_reshaped[ij, kk, 3]
-        v3_rr  = u_prim_reshaped[ij, kk, 4]
-        p_rr   = u_prim_reshaped[ij, kk, 5]
-
-        normal_direction_1 = 0.5 * (
-          contravariant_vectors_z[ij, k, 1] + contravariant_vectors_z[ij, kk, 1])
-        normal_direction_2 = 0.5 * (
-          contravariant_vectors_z[ij, k, 2] + contravariant_vectors_z[ij, kk, 2])
-        normal_direction_3 = 0.5 * (
-          contravariant_vectors_z[ij, k, 3] + contravariant_vectors_z[ij, kk, 3])
-
-        v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v3_ll * normal_direction_3
-        v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + v3_rr * normal_direction_3
-
-        # Compute required mean values
-        rho_avg = 0.5 * (rho_ll + rho_rr)
-        v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-        v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-        v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-        v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr)
-        p_avg   = 0.5 * (  p_ll +   p_rr)
-        velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-
-        # Calculate fluxes depending on normal_direction
-        f1 = rho_avg * v_dot_n_avg
-        f2 = f1 * v1_avg + p_avg * normal_direction_1
-        f3 = f1 * v2_avg + p_avg * normal_direction_2
-        f4 = f1 * v3_avg + p_avg * normal_direction_3
-        f5 = ( f1 * velocity_square_avg + p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
-              + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-        # Add scaled fluxes to RHS
-        factor_k = alpha * derivative_split[k, kk]
-        du_reshaped[ij, k, 1] += factor_k * f1
-        du_reshaped[ij, k, 2] += factor_k * f2
-        du_reshaped[ij, k, 3] += factor_k * f3
-        du_reshaped[ij, k, 4] += factor_k * f4
-        du_reshaped[ij, k, 5] += factor_k * f5
-
-        factor_kk = alpha * derivative_split[kk, k]
-        du_reshaped[ij, kk, 1] += factor_kk * f1
-        du_reshaped[ij, kk, 2] += factor_kk * f2
-        du_reshaped[ij, kk, 3] += factor_kk * f3
-        du_reshaped[ij, kk, 4] += factor_kk * f4
-        du_reshaped[ij, kk, 5] += factor_kk * f5
-      end
+    # A possible permutation of array dimensions with improved opportunities for
+    # SIMD vectorization appeared to be slower than the direct version used here
+    # in preliminary numerical experiments on an AVX2 system.
+    for j in eachnode(dg), jj in (j + 1):nnodes(dg)
+        @turbo for k in eachnode(dg), i in eachnode(dg)
+            rho_ll = u_prim[i, j, k, 1]
+            v1_ll = u_prim[i, j, k, 2]
+            v2_ll = u_prim[i, j, k, 3]
+            v3_ll = u_prim[i, j, k, 4]
+            p_ll = u_prim[i, j, k, 5]
+
+            rho_rr = u_prim[i, jj, k, 1]
+            v1_rr = u_prim[i, jj, k, 2]
+            v2_rr = u_prim[i, jj, k, 3]
+            v3_rr = u_prim[i, jj, k, 4]
+            p_rr = u_prim[i, jj, k, 5]
+
+            normal_direction_1 = 0.5 * (contravariant_vectors_y[i, j, k, 1] +
+                                  contravariant_vectors_y[i, jj, k, 1])
+            normal_direction_2 = 0.5 * (contravariant_vectors_y[i, j, k, 2] +
+                                  contravariant_vectors_y[i, jj, k, 2])
+            normal_direction_3 = 0.5 * (contravariant_vectors_y[i, j, k, 3] +
+                                  contravariant_vectors_y[i, jj, k, 3])
+
+            v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 +
+                         v3_ll * normal_direction_3
+            v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 +
+                         v3_rr * normal_direction_3
+
+            # Compute required mean values
+            rho_avg = 0.5 * (rho_ll + rho_rr)
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            v3_avg = 0.5 * (v3_ll + v3_rr)
+            v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+            # Calculate fluxes depending on normal_direction
+            f1 = rho_avg * v_dot_n_avg
+            f2 = f1 * v1_avg + p_avg * normal_direction_1
+            f3 = f1 * v2_avg + p_avg * normal_direction_2
+            f4 = f1 * v3_avg + p_avg * normal_direction_3
+            f5 = (f1 * velocity_square_avg +
+                  p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
+                  + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+            # Add scaled fluxes to RHS
+            factor_j = alpha * derivative_split[j, jj]
+            du[i, j, k, 1] += factor_j * f1
+            du[i, j, k, 2] += factor_j * f2
+            du[i, j, k, 3] += factor_j * f3
+            du[i, j, k, 4] += factor_j * f4
+            du[i, j, k, 5] += factor_j * f5
+
+            factor_jj = alpha * derivative_split[jj, j]
+            du[i, jj, k, 1] += factor_jj * f1
+            du[i, jj, k, 2] += factor_jj * f2
+            du[i, jj, k, 3] += factor_jj * f3
+            du[i, jj, k, 4] += factor_jj * f4
+            du[i, jj, k, 5] += factor_jj * f5
+        end
     end
-  end # GC.@preserve u_prim begin
 
-
-  # Finally, we add the temporary RHS computed here to the global RHS in the
-  # given `element`.
-  @turbo for v in eachvariable(equations),
-             k in eachnode(dg),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    _du[v, i, j, k, element] += du[i, j, k, v]
-  end
+    # z direction
+    # The memory layout is already optimal for SIMD vectorization in this loop.
+    # We just squeeze the first two dimensions to make the code slightly faster.
+    GC.@preserve u_prim begin
+        u_prim_reshaped = PtrArray(pointer(u_prim),
+                                   (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
+                                    StaticInt(nvariables(equations))))
+
+        du_reshaped = PtrArray(pointer(du),
+                               (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
+                                StaticInt(nvariables(equations))))
+
+        # We must also permute the contravariant vectors.
+        contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef,
+                                                                             (StaticInt(nnodes(dg)^2),
+                                                                              StaticInt(nnodes(dg)),
+                                                                              StaticInt(ndims(mesh))))
+
+        @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            ij = i + nnodes(dg) * (j - 1)
+            contravariant_vectors_z[ij, k, 1] = contravariant_vectors[1, 3, i, j, k,
+                                                                      element]
+            contravariant_vectors_z[ij, k, 2] = contravariant_vectors[2, 3, i, j, k,
+                                                                      element]
+            contravariant_vectors_z[ij, k, 3] = contravariant_vectors[3, 3, i, j, k,
+                                                                      element]
+        end
+
+        for k in eachnode(dg), kk in (k + 1):nnodes(dg)
+            @turbo for ij in Base.OneTo(nnodes(dg)^2)
+                rho_ll = u_prim_reshaped[ij, k, 1]
+                v1_ll = u_prim_reshaped[ij, k, 2]
+                v2_ll = u_prim_reshaped[ij, k, 3]
+                v3_ll = u_prim_reshaped[ij, k, 4]
+                p_ll = u_prim_reshaped[ij, k, 5]
+
+                rho_rr = u_prim_reshaped[ij, kk, 1]
+                v1_rr = u_prim_reshaped[ij, kk, 2]
+                v2_rr = u_prim_reshaped[ij, kk, 3]
+                v3_rr = u_prim_reshaped[ij, kk, 4]
+                p_rr = u_prim_reshaped[ij, kk, 5]
+
+                normal_direction_1 = 0.5 * (contravariant_vectors_z[ij, k, 1] +
+                                      contravariant_vectors_z[ij, kk, 1])
+                normal_direction_2 = 0.5 * (contravariant_vectors_z[ij, k, 2] +
+                                      contravariant_vectors_z[ij, kk, 2])
+                normal_direction_3 = 0.5 * (contravariant_vectors_z[ij, k, 3] +
+                                      contravariant_vectors_z[ij, kk, 3])
+
+                v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 +
+                             v3_ll * normal_direction_3
+                v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 +
+                             v3_rr * normal_direction_3
+
+                # Compute required mean values
+                rho_avg = 0.5 * (rho_ll + rho_rr)
+                v1_avg = 0.5 * (v1_ll + v1_rr)
+                v2_avg = 0.5 * (v2_ll + v2_rr)
+                v3_avg = 0.5 * (v3_ll + v3_rr)
+                v_dot_n_avg = 0.5 * (v_dot_n_ll + v_dot_n_rr)
+                p_avg = 0.5 * (p_ll + p_rr)
+                velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+                # Calculate fluxes depending on normal_direction
+                f1 = rho_avg * v_dot_n_avg
+                f2 = f1 * v1_avg + p_avg * normal_direction_1
+                f3 = f1 * v2_avg + p_avg * normal_direction_2
+                f4 = f1 * v3_avg + p_avg * normal_direction_3
+                f5 = (f1 * velocity_square_avg +
+                      p_avg * v_dot_n_avg * equations.inv_gamma_minus_one
+                      + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+                # Add scaled fluxes to RHS
+                factor_k = alpha * derivative_split[k, kk]
+                du_reshaped[ij, k, 1] += factor_k * f1
+                du_reshaped[ij, k, 2] += factor_k * f2
+                du_reshaped[ij, k, 3] += factor_k * f3
+                du_reshaped[ij, k, 4] += factor_k * f4
+                du_reshaped[ij, k, 5] += factor_k * f5
+
+                factor_kk = alpha * derivative_split[kk, k]
+                du_reshaped[ij, kk, 1] += factor_kk * f1
+                du_reshaped[ij, kk, 2] += factor_kk * f2
+                du_reshaped[ij, kk, 3] += factor_kk * f3
+                du_reshaped[ij, kk, 4] += factor_kk * f4
+                du_reshaped[ij, kk, 5] += factor_kk * f5
+            end
+        end
+    end # GC.@preserve u_prim begin
+
+    # Finally, we add the temporary RHS computed here to the global RHS in the
+    # given `element`.
+    @turbo for v in eachvariable(equations),
+               k in eachnode(dg),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        _du[v, i, j, k, element] += du[i, j, k, v]
+    end
 end
 
-
-
 @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray,
-                                           element, mesh::Union{StructuredMesh{3}, P4estMesh{3}},
+                                           element,
+                                           mesh::Union{StructuredMesh{3}, P4estMesh{3}},
                                            nonconservative_terms::False,
                                            equations::CompressibleEulerEquations3D,
                                            volume_flux::typeof(flux_ranocha_turbo),
                                            dg::DGSEM, cache, alpha)
-  @unpack derivative_split = dg.basis
-  @unpack contravariant_vectors = cache.elements
-
-  # Create a temporary array that will be used to store the RHS with permuted
-  # indices `[i, j, k, v]` to allow using SIMD instructions.
-  # `StrideArray`s with purely static dimensions do not allocate on the heap.
-  du = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations))))
-
-  # Convert conserved to primitive variables on the given `element`. In addition
-  # to the usual primitive variables, we also compute logarithms of the density
-  # and pressure to increase the performance of the required logarithmic mean
-  # values.
-  u_prim = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs
-
-  @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    rho    = u_cons[1, i, j, k, element]
-    rho_v1 = u_cons[2, i, j, k, element]
-    rho_v2 = u_cons[3, i, j, k, element]
-    rho_v3 = u_cons[4, i, j, k, element]
-    rho_e  = u_cons[5, i, j, k, element]
-
-    v1 = rho_v1 / rho
-    v2 = rho_v2 / rho
-    v3 = rho_v3 / rho
-    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
-
-    u_prim[i, j, k, 1] = rho
-    u_prim[i, j, k, 2] = v1
-    u_prim[i, j, k, 3] = v2
-    u_prim[i, j, k, 4] = v3
-    u_prim[i, j, k, 5] = p
-    u_prim[i, j, k, 6] = log(rho)
-    u_prim[i, j, k, 7] = log(p)
-  end
-
-
-  # x direction
-  # At first, we create new temporary arrays with permuted memory layout to
-  # allow using SIMD instructions along the first dimension (which is contiguous
-  # in memory).
-  du_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations))))
-
-  u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations) + 2)))
-
-  @turbo for v in indices(u_prim, 4), # v in eachvariable(equations) misses +2 logs
-             k in eachnode(dg),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    jk = j + nnodes(dg) * (k- 1)
-    u_prim_permuted[jk, i, v] = u_prim[i, j, k, v]
-  end
-  fill!(du_permuted, zero(eltype(du_permuted)))
-
-  # We must also permute the contravariant vectors.
-  contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef,
-    (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-     StaticInt(ndims(mesh))))
-
-  @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    jk = j + nnodes(dg) * (k- 1)
-    contravariant_vectors_x[jk, i, 1] = contravariant_vectors[1, 1, i, j, k, element]
-    contravariant_vectors_x[jk, i, 2] = contravariant_vectors[2, 1, i, j, k, element]
-    contravariant_vectors_x[jk, i, 3] = contravariant_vectors[3, 1, i, j, k, element]
-  end
-
-  # Next, we basically inline the volume flux. To allow SIMD vectorization and
-  # still use the symmetry of the volume flux and the derivative matrix, we
-  # loop over the triangular part in an outer loop and use a plain inner loop.
-  for i in eachnode(dg), ii in (i+1):nnodes(dg)
-    @turbo for jk in Base.OneTo(nnodes(dg)^2)
-      rho_ll     = u_prim_permuted[jk, i, 1]
-      v1_ll      = u_prim_permuted[jk, i, 2]
-      v2_ll      = u_prim_permuted[jk, i, 3]
-      v3_ll      = u_prim_permuted[jk, i, 4]
-      p_ll       = u_prim_permuted[jk, i, 5]
-      log_rho_ll = u_prim_permuted[jk, i, 6]
-      log_p_ll   = u_prim_permuted[jk, i, 7]
-
-      rho_rr     = u_prim_permuted[jk, ii, 1]
-      v1_rr      = u_prim_permuted[jk, ii, 2]
-      v2_rr      = u_prim_permuted[jk, ii, 3]
-      v3_rr      = u_prim_permuted[jk, ii, 4]
-      p_rr       = u_prim_permuted[jk, ii, 5]
-      log_rho_rr = u_prim_permuted[jk, ii, 6]
-      log_p_rr   = u_prim_permuted[jk, ii, 7]
-
-      normal_direction_1 = 0.5 * (
-        contravariant_vectors_x[jk, i, 1] + contravariant_vectors_x[jk, ii, 1])
-      normal_direction_2 = 0.5 * (
-        contravariant_vectors_x[jk, i, 2] + contravariant_vectors_x[jk, ii, 2])
-      normal_direction_3 = 0.5 * (
-        contravariant_vectors_x[jk, i, 3] + contravariant_vectors_x[jk, ii, 3])
-
-      v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v3_ll * normal_direction_3
-      v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + v3_rr * normal_direction_3
-
-      # Compute required mean values
-      # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
-      # it efficiently. This is equivalent to
-      #   rho_mean = ln_mean(rho_ll, rho_rr)
-      x1 = rho_ll
-      log_x1 = log_rho_ll
-      y1 = rho_rr
-      log_y1 = log_rho_rr
-      x1_plus_y1 = x1 + y1
-      y1_minus_x1 = y1 - x1
-      z1 = y1_minus_x1^2 / x1_plus_y1^2
-      special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1)))
-      regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
-      rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
-
-      # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-      # in exact arithmetic since
-      #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-      #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-      # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-      x2 = rho_ll * p_rr
-      log_x2 = log_rho_ll + log_p_rr
-      y2 = rho_rr * p_ll
-      log_y2 = log_rho_rr + log_p_ll
-      x2_plus_y2 = x2 + y2
-      y2_minus_x2 = y2 - x2
-      z2 = y2_minus_x2^2 / x2_plus_y2^2
-      special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2
-      regular_path2 = (log_y2 - log_x2) / y2_minus_x2
-      inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
-
-      v1_avg = 0.5 * (v1_ll + v1_rr)
-      v2_avg = 0.5 * (v2_ll + v2_rr)
-      v3_avg = 0.5 * (v3_ll + v3_rr)
-      p_avg  = 0.5 * (p_ll + p_rr)
-      velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr)
-
-      # Calculate fluxes depending on normal_direction
-      f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
-      f2 = f1 * v1_avg + p_avg * normal_direction_1
-      f3 = f1 * v2_avg + p_avg * normal_direction_2
-      f4 = f1 * v3_avg + p_avg * normal_direction_3
-      f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-          + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-      # Add scaled fluxes to RHS
-      factor_i = alpha * derivative_split[i, ii]
-      du_permuted[jk, i, 1] += factor_i * f1
-      du_permuted[jk, i, 2] += factor_i * f2
-      du_permuted[jk, i, 3] += factor_i * f3
-      du_permuted[jk, i, 4] += factor_i * f4
-      du_permuted[jk, i, 5] += factor_i * f5
-
-      factor_ii = alpha * derivative_split[ii, i]
-      du_permuted[jk, ii, 1] += factor_ii * f1
-      du_permuted[jk, ii, 2] += factor_ii * f2
-      du_permuted[jk, ii, 3] += factor_ii * f3
-      du_permuted[jk, ii, 4] += factor_ii * f4
-      du_permuted[jk, ii, 5] += factor_ii * f5
+    @unpack derivative_split = dg.basis
+    @unpack contravariant_vectors = cache.elements
+
+    # Create a temporary array that will be used to store the RHS with permuted
+    # indices `[i, j, k, v]` to allow using SIMD instructions.
+    # `StrideArray`s with purely static dimensions do not allocate on the heap.
+    du = StrideArray{eltype(u_cons)}(undef,
+                                     (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
+                                      StaticInt(nvariables(equations))))
+
+    # Convert conserved to primitive variables on the given `element`. In addition
+    # to the usual primitive variables, we also compute logarithms of the density
+    # and pressure to increase the performance of the required logarithmic mean
+    # values.
+    u_prim = StrideArray{eltype(u_cons)}(undef,
+                                         (ntuple(_ -> StaticInt(nnodes(dg)),
+                                                 ndims(mesh))...,
+                                          StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs
+
+    @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        rho = u_cons[1, i, j, k, element]
+        rho_v1 = u_cons[2, i, j, k, element]
+        rho_v2 = u_cons[3, i, j, k, element]
+        rho_v3 = u_cons[4, i, j, k, element]
+        rho_e = u_cons[5, i, j, k, element]
+
+        v1 = rho_v1 / rho
+        v2 = rho_v2 / rho
+        v3 = rho_v3 / rho
+        p = (equations.gamma - 1) *
+            (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
+
+        u_prim[i, j, k, 1] = rho
+        u_prim[i, j, k, 2] = v1
+        u_prim[i, j, k, 3] = v2
+        u_prim[i, j, k, 4] = v3
+        u_prim[i, j, k, 5] = p
+        u_prim[i, j, k, 6] = log(rho)
+        u_prim[i, j, k, 7] = log(p)
     end
-  end
-
-  @turbo for v in eachvariable(equations),
-             k in eachnode(dg),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    jk = j + nnodes(dg) * (k- 1)
-    du[i, j, k, v] = du_permuted[jk, i, v]
-  end
-
-
-  # y direction
-  # We must also permute the contravariant vectors.
-  contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(ndims(mesh))))
-
-  @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element]
-    contravariant_vectors_y[i, j, k, 2] = contravariant_vectors[2, 2, i, j, k, element]
-    contravariant_vectors_y[i, j, k, 3] = contravariant_vectors[3, 2, i, j, k, element]
-  end
-
-  # A possible permutation of array dimensions with improved opportunities for
-  # SIMD vectorization appeared to be slower than the direct version used here
-  # in preliminary numerical experiments on an AVX2 system.
-  for j in eachnode(dg), jj in (j+1):nnodes(dg)
-    @turbo for k in eachnode(dg), i in eachnode(dg)
-      rho_ll     = u_prim[i, j, k, 1]
-      v1_ll      = u_prim[i, j, k, 2]
-      v2_ll      = u_prim[i, j, k, 3]
-      v3_ll      = u_prim[i, j, k, 4]
-      p_ll       = u_prim[i, j, k, 5]
-      log_rho_ll = u_prim[i, j, k, 6]
-      log_p_ll   = u_prim[i, j, k, 7]
-
-      rho_rr     = u_prim[i, jj, k, 1]
-      v1_rr      = u_prim[i, jj, k, 2]
-      v2_rr      = u_prim[i, jj, k, 3]
-      v3_rr      = u_prim[i, jj, k, 4]
-      p_rr       = u_prim[i, jj, k, 5]
-      log_rho_rr = u_prim[i, jj, k, 6]
-      log_p_rr   = u_prim[i, jj, k, 7]
-
-      normal_direction_1 = 0.5 * (
-        contravariant_vectors_y[i, j, k, 1] + contravariant_vectors_y[i, jj, k, 1])
-      normal_direction_2 = 0.5 * (
-        contravariant_vectors_y[i, j, k, 2] + contravariant_vectors_y[i, jj, k, 2])
-      normal_direction_3 = 0.5 * (
-        contravariant_vectors_y[i, j, k, 3] + contravariant_vectors_y[i, jj, k, 3])
-
-      v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v3_ll * normal_direction_3
-      v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + v3_rr * normal_direction_3
-
-      # Compute required mean values
-      # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
-      # it efficiently. This is equivalent to
-      #   rho_mean = ln_mean(rho_ll, rho_rr)
-      x1 = rho_ll
-      log_x1 = log_rho_ll
-      y1 = rho_rr
-      log_y1 = log_rho_rr
-      x1_plus_y1 = x1 + y1
-      y1_minus_x1 = y1 - x1
-      z1 = y1_minus_x1^2 / x1_plus_y1^2
-      special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1)))
-      regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
-      rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
-
-      # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-      # in exact arithmetic since
-      #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-      #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-      # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-      x2 = rho_ll * p_rr
-      log_x2 = log_rho_ll + log_p_rr
-      y2 = rho_rr * p_ll
-      log_y2 = log_rho_rr + log_p_ll
-      x2_plus_y2 = x2 + y2
-      y2_minus_x2 = y2 - x2
-      z2 = y2_minus_x2^2 / x2_plus_y2^2
-      special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2
-      regular_path2 = (log_y2 - log_x2) / y2_minus_x2
-      inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
-
-      v1_avg = 0.5 * (v1_ll + v1_rr)
-      v2_avg = 0.5 * (v2_ll + v2_rr)
-      v3_avg = 0.5 * (v3_ll + v3_rr)
-      p_avg  = 0.5 * (p_ll + p_rr)
-      velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr)
-
-      # Calculate fluxes depending on normal_direction
-      f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
-      f2 = f1 * v1_avg + p_avg * normal_direction_1
-      f3 = f1 * v2_avg + p_avg * normal_direction_2
-      f4 = f1 * v3_avg + p_avg * normal_direction_3
-      f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-          + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-      # Add scaled fluxes to RHS
-      factor_j = alpha * derivative_split[j, jj]
-      du[i, j, k, 1] += factor_j * f1
-      du[i, j, k, 2] += factor_j * f2
-      du[i, j, k, 3] += factor_j * f3
-      du[i, j, k, 4] += factor_j * f4
-      du[i, j, k, 5] += factor_j * f5
-
-      factor_jj = alpha * derivative_split[jj, j]
-      du[i, jj, k, 1] += factor_jj * f1
-      du[i, jj, k, 2] += factor_jj * f2
-      du[i, jj, k, 3] += factor_jj * f3
-      du[i, jj, k, 4] += factor_jj * f4
-      du[i, jj, k, 5] += factor_jj * f5
+
+    # x direction
+    # At first, we create new temporary arrays with permuted memory layout to
+    # allow using SIMD instructions along the first dimension (which is contiguous
+    # in memory).
+    du_permuted = StrideArray{eltype(u_cons)}(undef,
+                                              (StaticInt(nnodes(dg)^2),
+                                               StaticInt(nnodes(dg)),
+                                               StaticInt(nvariables(equations))))
+
+    u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
+                                                  (StaticInt(nnodes(dg)^2),
+                                                   StaticInt(nnodes(dg)),
+                                                   StaticInt(nvariables(equations) + 2)))
+
+    @turbo for v in indices(u_prim, 4), # v in eachvariable(equations) misses +2 logs
+               k in eachnode(dg),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        jk = j + nnodes(dg) * (k - 1)
+        u_prim_permuted[jk, i, v] = u_prim[i, j, k, v]
     end
-  end
+    fill!(du_permuted, zero(eltype(du_permuted)))
 
+    # We must also permute the contravariant vectors.
+    contravariant_vectors_x = StrideArray{eltype(contravariant_vectors)}(undef,
+                                                                         (StaticInt(nnodes(dg)^2),
+                                                                          StaticInt(nnodes(dg)),
+                                                                          StaticInt(ndims(mesh))))
 
-  # z direction
-  # The memory layout is already optimal for SIMD vectorization in this loop.
-  # We just squeeze the first two dimensions to make the code slightly faster.
-  GC.@preserve u_prim begin
-    u_prim_reshaped = PtrArray(pointer(u_prim),
-      (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-       StaticInt(nvariables(equations) + 2)))
+    @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        jk = j + nnodes(dg) * (k - 1)
+        contravariant_vectors_x[jk, i, 1] = contravariant_vectors[1, 1, i, j, k, element]
+        contravariant_vectors_x[jk, i, 2] = contravariant_vectors[2, 1, i, j, k, element]
+        contravariant_vectors_x[jk, i, 3] = contravariant_vectors[3, 1, i, j, k, element]
+    end
 
-    du_reshaped = PtrArray(pointer(du),
-      (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-       StaticInt(nvariables(equations))))
+    # Next, we basically inline the volume flux. To allow SIMD vectorization and
+    # still use the symmetry of the volume flux and the derivative matrix, we
+    # loop over the triangular part in an outer loop and use a plain inner loop.
+    for i in eachnode(dg), ii in (i + 1):nnodes(dg)
+        @turbo for jk in Base.OneTo(nnodes(dg)^2)
+            rho_ll = u_prim_permuted[jk, i, 1]
+            v1_ll = u_prim_permuted[jk, i, 2]
+            v2_ll = u_prim_permuted[jk, i, 3]
+            v3_ll = u_prim_permuted[jk, i, 4]
+            p_ll = u_prim_permuted[jk, i, 5]
+            log_rho_ll = u_prim_permuted[jk, i, 6]
+            log_p_ll = u_prim_permuted[jk, i, 7]
+
+            rho_rr = u_prim_permuted[jk, ii, 1]
+            v1_rr = u_prim_permuted[jk, ii, 2]
+            v2_rr = u_prim_permuted[jk, ii, 3]
+            v3_rr = u_prim_permuted[jk, ii, 4]
+            p_rr = u_prim_permuted[jk, ii, 5]
+            log_rho_rr = u_prim_permuted[jk, ii, 6]
+            log_p_rr = u_prim_permuted[jk, ii, 7]
+
+            normal_direction_1 = 0.5 * (contravariant_vectors_x[jk, i, 1] +
+                                  contravariant_vectors_x[jk, ii, 1])
+            normal_direction_2 = 0.5 * (contravariant_vectors_x[jk, i, 2] +
+                                  contravariant_vectors_x[jk, ii, 2])
+            normal_direction_3 = 0.5 * (contravariant_vectors_x[jk, i, 3] +
+                                  contravariant_vectors_x[jk, ii, 3])
+
+            v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 +
+                         v3_ll * normal_direction_3
+            v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 +
+                         v3_rr * normal_direction_3
+
+            # Compute required mean values
+            # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
+            # it efficiently. This is equivalent to
+            #   rho_mean = ln_mean(rho_ll, rho_rr)
+            x1 = rho_ll
+            log_x1 = log_rho_ll
+            y1 = rho_rr
+            log_y1 = log_rho_rr
+            x1_plus_y1 = x1 + y1
+            y1_minus_x1 = y1 - x1
+            z1 = y1_minus_x1^2 / x1_plus_y1^2
+            special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1)))
+            regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
+            rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
+
+            # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+            # in exact arithmetic since
+            #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+            #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+            # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+            x2 = rho_ll * p_rr
+            log_x2 = log_rho_ll + log_p_rr
+            y2 = rho_rr * p_ll
+            log_y2 = log_rho_rr + log_p_ll
+            x2_plus_y2 = x2 + y2
+            y2_minus_x2 = y2 - x2
+            z2 = y2_minus_x2^2 / x2_plus_y2^2
+            special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2
+            regular_path2 = (log_y2 - log_x2) / y2_minus_x2
+            inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
+
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            v3_avg = 0.5 * (v3_ll + v3_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+            # Calculate fluxes depending on normal_direction
+            f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
+            f2 = f1 * v1_avg + p_avg * normal_direction_1
+            f3 = f1 * v2_avg + p_avg * normal_direction_2
+            f4 = f1 * v3_avg + p_avg * normal_direction_3
+            f5 = (f1 *
+                  (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+                  +
+                  0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+            # Add scaled fluxes to RHS
+            factor_i = alpha * derivative_split[i, ii]
+            du_permuted[jk, i, 1] += factor_i * f1
+            du_permuted[jk, i, 2] += factor_i * f2
+            du_permuted[jk, i, 3] += factor_i * f3
+            du_permuted[jk, i, 4] += factor_i * f4
+            du_permuted[jk, i, 5] += factor_i * f5
+
+            factor_ii = alpha * derivative_split[ii, i]
+            du_permuted[jk, ii, 1] += factor_ii * f1
+            du_permuted[jk, ii, 2] += factor_ii * f2
+            du_permuted[jk, ii, 3] += factor_ii * f3
+            du_permuted[jk, ii, 4] += factor_ii * f4
+            du_permuted[jk, ii, 5] += factor_ii * f5
+        end
+    end
+
+    @turbo for v in eachvariable(equations),
+               k in eachnode(dg),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        jk = j + nnodes(dg) * (k - 1)
+        du[i, j, k, v] = du_permuted[jk, i, v]
+    end
 
+    # y direction
     # We must also permute the contravariant vectors.
-    contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef,
-      (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-       StaticInt(ndims(mesh))))
+    contravariant_vectors_y = StrideArray{eltype(contravariant_vectors)}(undef,
+                                                                         (StaticInt(nnodes(dg)),
+                                                                          StaticInt(nnodes(dg)),
+                                                                          StaticInt(nnodes(dg)),
+                                                                          StaticInt(ndims(mesh))))
 
     @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      ij = i + nnodes(dg) * (j- 1)
-      contravariant_vectors_z[ij, k, 1] = contravariant_vectors[1, 3, i, j, k, element]
-      contravariant_vectors_z[ij, k, 2] = contravariant_vectors[2, 3, i, j, k, element]
-      contravariant_vectors_z[ij, k, 3] = contravariant_vectors[3, 3, i, j, k, element]
+        contravariant_vectors_y[i, j, k, 1] = contravariant_vectors[1, 2, i, j, k, element]
+        contravariant_vectors_y[i, j, k, 2] = contravariant_vectors[2, 2, i, j, k, element]
+        contravariant_vectors_y[i, j, k, 3] = contravariant_vectors[3, 2, i, j, k, element]
     end
 
-    for k in eachnode(dg), kk in (k+1):nnodes(dg)
-      @turbo for ij in Base.OneTo(nnodes(dg)^2)
-        rho_ll     = u_prim_reshaped[ij, k, 1]
-        v1_ll      = u_prim_reshaped[ij, k, 2]
-        v2_ll      = u_prim_reshaped[ij, k, 3]
-        v3_ll      = u_prim_reshaped[ij, k, 4]
-        p_ll       = u_prim_reshaped[ij, k, 5]
-        log_rho_ll = u_prim_reshaped[ij, k, 6]
-        log_p_ll   = u_prim_reshaped[ij, k, 7]
-
-        rho_rr     = u_prim_reshaped[ij, kk, 1]
-        v1_rr      = u_prim_reshaped[ij, kk, 2]
-        v2_rr      = u_prim_reshaped[ij, kk, 3]
-        v3_rr      = u_prim_reshaped[ij, kk, 4]
-        p_rr       = u_prim_reshaped[ij, kk, 5]
-        log_rho_rr = u_prim_reshaped[ij, kk, 6]
-        log_p_rr   = u_prim_reshaped[ij, kk, 7]
-
-        normal_direction_1 = 0.5 * (
-          contravariant_vectors_z[ij, k, 1] + contravariant_vectors_z[ij, kk, 1])
-        normal_direction_2 = 0.5 * (
-          contravariant_vectors_z[ij, k, 2] + contravariant_vectors_z[ij, kk, 2])
-        normal_direction_3 = 0.5 * (
-          contravariant_vectors_z[ij, k, 3] + contravariant_vectors_z[ij, kk, 3])
-
-        v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 + v3_ll * normal_direction_3
-        v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 + v3_rr * normal_direction_3
-
-        # Compute required mean values
-        # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
-        # it efficiently. This is equivalent to
-        #   rho_mean = ln_mean(rho_ll, rho_rr)
-        x1 = rho_ll
-        log_x1 = log_rho_ll
-        y1 = rho_rr
-        log_y1 = log_rho_rr
-        x1_plus_y1 = x1 + y1
-        y1_minus_x1 = y1 - x1
-        z1 = y1_minus_x1^2 / x1_plus_y1^2
-        special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1)))
-        regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
-        rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
-
-        # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-        # in exact arithmetic since
-        #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-        #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-        # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-        x2 = rho_ll * p_rr
-        log_x2 = log_rho_ll + log_p_rr
-        y2 = rho_rr * p_ll
-        log_y2 = log_rho_rr + log_p_ll
-        x2_plus_y2 = x2 + y2
-        y2_minus_x2 = y2 - x2
-        z2 = y2_minus_x2^2 / x2_plus_y2^2
-        special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2
-        regular_path2 = (log_y2 - log_x2) / y2_minus_x2
-        inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
-
-        v1_avg = 0.5 * (v1_ll + v1_rr)
-        v2_avg = 0.5 * (v2_ll + v2_rr)
-        v3_avg = 0.5 * (v3_ll + v3_rr)
-        p_avg  = 0.5 * (p_ll + p_rr)
-        velocity_square_avg = 0.5 * (v1_ll*v1_rr + v2_ll*v2_rr + v3_ll*v3_rr)
-
-        # Calculate fluxes depending on normal_direction
-        f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
-        f2 = f1 * v1_avg + p_avg * normal_direction_1
-        f3 = f1 * v2_avg + p_avg * normal_direction_2
-        f4 = f1 * v3_avg + p_avg * normal_direction_3
-        f5 = ( f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one )
-            + 0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll) )
-
-        # Add scaled fluxes to RHS
-        factor_k = alpha * derivative_split[k, kk]
-        du_reshaped[ij, k, 1] += factor_k * f1
-        du_reshaped[ij, k, 2] += factor_k * f2
-        du_reshaped[ij, k, 3] += factor_k * f3
-        du_reshaped[ij, k, 4] += factor_k * f4
-        du_reshaped[ij, k, 5] += factor_k * f5
-
-        factor_kk = alpha * derivative_split[kk, k]
-        du_reshaped[ij, kk, 1] += factor_kk * f1
-        du_reshaped[ij, kk, 2] += factor_kk * f2
-        du_reshaped[ij, kk, 3] += factor_kk * f3
-        du_reshaped[ij, kk, 4] += factor_kk * f4
-        du_reshaped[ij, kk, 5] += factor_kk * f5
-      end
+    # A possible permutation of array dimensions with improved opportunities for
+    # SIMD vectorization appeared to be slower than the direct version used here
+    # in preliminary numerical experiments on an AVX2 system.
+    for j in eachnode(dg), jj in (j + 1):nnodes(dg)
+        @turbo for k in eachnode(dg), i in eachnode(dg)
+            rho_ll = u_prim[i, j, k, 1]
+            v1_ll = u_prim[i, j, k, 2]
+            v2_ll = u_prim[i, j, k, 3]
+            v3_ll = u_prim[i, j, k, 4]
+            p_ll = u_prim[i, j, k, 5]
+            log_rho_ll = u_prim[i, j, k, 6]
+            log_p_ll = u_prim[i, j, k, 7]
+
+            rho_rr = u_prim[i, jj, k, 1]
+            v1_rr = u_prim[i, jj, k, 2]
+            v2_rr = u_prim[i, jj, k, 3]
+            v3_rr = u_prim[i, jj, k, 4]
+            p_rr = u_prim[i, jj, k, 5]
+            log_rho_rr = u_prim[i, jj, k, 6]
+            log_p_rr = u_prim[i, jj, k, 7]
+
+            normal_direction_1 = 0.5 * (contravariant_vectors_y[i, j, k, 1] +
+                                  contravariant_vectors_y[i, jj, k, 1])
+            normal_direction_2 = 0.5 * (contravariant_vectors_y[i, j, k, 2] +
+                                  contravariant_vectors_y[i, jj, k, 2])
+            normal_direction_3 = 0.5 * (contravariant_vectors_y[i, j, k, 3] +
+                                  contravariant_vectors_y[i, jj, k, 3])
+
+            v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 +
+                         v3_ll * normal_direction_3
+            v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 +
+                         v3_rr * normal_direction_3
+
+            # Compute required mean values
+            # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
+            # it efficiently. This is equivalent to
+            #   rho_mean = ln_mean(rho_ll, rho_rr)
+            x1 = rho_ll
+            log_x1 = log_rho_ll
+            y1 = rho_rr
+            log_y1 = log_rho_rr
+            x1_plus_y1 = x1 + y1
+            y1_minus_x1 = y1 - x1
+            z1 = y1_minus_x1^2 / x1_plus_y1^2
+            special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1)))
+            regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
+            rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
+
+            # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+            # in exact arithmetic since
+            #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+            #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+            # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+            x2 = rho_ll * p_rr
+            log_x2 = log_rho_ll + log_p_rr
+            y2 = rho_rr * p_ll
+            log_y2 = log_rho_rr + log_p_ll
+            x2_plus_y2 = x2 + y2
+            y2_minus_x2 = y2 - x2
+            z2 = y2_minus_x2^2 / x2_plus_y2^2
+            special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2
+            regular_path2 = (log_y2 - log_x2) / y2_minus_x2
+            inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
+
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            v3_avg = 0.5 * (v3_ll + v3_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+            # Calculate fluxes depending on normal_direction
+            f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
+            f2 = f1 * v1_avg + p_avg * normal_direction_1
+            f3 = f1 * v2_avg + p_avg * normal_direction_2
+            f4 = f1 * v3_avg + p_avg * normal_direction_3
+            f5 = (f1 *
+                  (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+                  +
+                  0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+            # Add scaled fluxes to RHS
+            factor_j = alpha * derivative_split[j, jj]
+            du[i, j, k, 1] += factor_j * f1
+            du[i, j, k, 2] += factor_j * f2
+            du[i, j, k, 3] += factor_j * f3
+            du[i, j, k, 4] += factor_j * f4
+            du[i, j, k, 5] += factor_j * f5
+
+            factor_jj = alpha * derivative_split[jj, j]
+            du[i, jj, k, 1] += factor_jj * f1
+            du[i, jj, k, 2] += factor_jj * f2
+            du[i, jj, k, 3] += factor_jj * f3
+            du[i, jj, k, 4] += factor_jj * f4
+            du[i, jj, k, 5] += factor_jj * f5
+        end
     end
-  end # GC.@preserve u_prim begin
-
 
-  # Finally, we add the temporary RHS computed here to the global RHS in the
-  # given `element`.
-  @turbo for v in eachvariable(equations),
-             k in eachnode(dg),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    _du[v, i, j, k, element] += du[i, j, k, v]
-  end
+    # z direction
+    # The memory layout is already optimal for SIMD vectorization in this loop.
+    # We just squeeze the first two dimensions to make the code slightly faster.
+    GC.@preserve u_prim begin
+        u_prim_reshaped = PtrArray(pointer(u_prim),
+                                   (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
+                                    StaticInt(nvariables(equations) + 2)))
+
+        du_reshaped = PtrArray(pointer(du),
+                               (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
+                                StaticInt(nvariables(equations))))
+
+        # We must also permute the contravariant vectors.
+        contravariant_vectors_z = StrideArray{eltype(contravariant_vectors)}(undef,
+                                                                             (StaticInt(nnodes(dg)^2),
+                                                                              StaticInt(nnodes(dg)),
+                                                                              StaticInt(ndims(mesh))))
+
+        @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            ij = i + nnodes(dg) * (j - 1)
+            contravariant_vectors_z[ij, k, 1] = contravariant_vectors[1, 3, i, j, k,
+                                                                      element]
+            contravariant_vectors_z[ij, k, 2] = contravariant_vectors[2, 3, i, j, k,
+                                                                      element]
+            contravariant_vectors_z[ij, k, 3] = contravariant_vectors[3, 3, i, j, k,
+                                                                      element]
+        end
+
+        for k in eachnode(dg), kk in (k + 1):nnodes(dg)
+            @turbo for ij in Base.OneTo(nnodes(dg)^2)
+                rho_ll = u_prim_reshaped[ij, k, 1]
+                v1_ll = u_prim_reshaped[ij, k, 2]
+                v2_ll = u_prim_reshaped[ij, k, 3]
+                v3_ll = u_prim_reshaped[ij, k, 4]
+                p_ll = u_prim_reshaped[ij, k, 5]
+                log_rho_ll = u_prim_reshaped[ij, k, 6]
+                log_p_ll = u_prim_reshaped[ij, k, 7]
+
+                rho_rr = u_prim_reshaped[ij, kk, 1]
+                v1_rr = u_prim_reshaped[ij, kk, 2]
+                v2_rr = u_prim_reshaped[ij, kk, 3]
+                v3_rr = u_prim_reshaped[ij, kk, 4]
+                p_rr = u_prim_reshaped[ij, kk, 5]
+                log_rho_rr = u_prim_reshaped[ij, kk, 6]
+                log_p_rr = u_prim_reshaped[ij, kk, 7]
+
+                normal_direction_1 = 0.5 * (contravariant_vectors_z[ij, k, 1] +
+                                      contravariant_vectors_z[ij, kk, 1])
+                normal_direction_2 = 0.5 * (contravariant_vectors_z[ij, k, 2] +
+                                      contravariant_vectors_z[ij, kk, 2])
+                normal_direction_3 = 0.5 * (contravariant_vectors_z[ij, k, 3] +
+                                      contravariant_vectors_z[ij, kk, 3])
+
+                v_dot_n_ll = v1_ll * normal_direction_1 + v2_ll * normal_direction_2 +
+                             v3_ll * normal_direction_3
+                v_dot_n_rr = v1_rr * normal_direction_1 + v2_rr * normal_direction_2 +
+                             v3_rr * normal_direction_3
+
+                # Compute required mean values
+                # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
+                # it efficiently. This is equivalent to
+                #   rho_mean = ln_mean(rho_ll, rho_rr)
+                x1 = rho_ll
+                log_x1 = log_rho_ll
+                y1 = rho_rr
+                log_y1 = log_rho_rr
+                x1_plus_y1 = x1 + y1
+                y1_minus_x1 = y1 - x1
+                z1 = y1_minus_x1^2 / x1_plus_y1^2
+                special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1)))
+                regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
+                rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
+
+                # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+                # in exact arithmetic since
+                #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+                #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+                # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+                x2 = rho_ll * p_rr
+                log_x2 = log_rho_ll + log_p_rr
+                y2 = rho_rr * p_ll
+                log_y2 = log_rho_rr + log_p_ll
+                x2_plus_y2 = x2 + y2
+                y2_minus_x2 = y2 - x2
+                z2 = y2_minus_x2^2 / x2_plus_y2^2
+                special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2
+                regular_path2 = (log_y2 - log_x2) / y2_minus_x2
+                inv_rho_p_mean = p_ll * p_rr *
+                                 ifelse(z2 < 1.0e-4, special_path2, regular_path2)
+
+                v1_avg = 0.5 * (v1_ll + v1_rr)
+                v2_avg = 0.5 * (v2_ll + v2_rr)
+                v3_avg = 0.5 * (v3_ll + v3_rr)
+                p_avg = 0.5 * (p_ll + p_rr)
+                velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+                # Calculate fluxes depending on normal_direction
+                f1 = rho_mean * 0.5 * (v_dot_n_ll + v_dot_n_rr)
+                f2 = f1 * v1_avg + p_avg * normal_direction_1
+                f3 = f1 * v2_avg + p_avg * normal_direction_2
+                f4 = f1 * v3_avg + p_avg * normal_direction_3
+                f5 = (f1 *
+                      (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one)
+                      +
+                      0.5 * (p_ll * v_dot_n_rr + p_rr * v_dot_n_ll))
+
+                # Add scaled fluxes to RHS
+                factor_k = alpha * derivative_split[k, kk]
+                du_reshaped[ij, k, 1] += factor_k * f1
+                du_reshaped[ij, k, 2] += factor_k * f2
+                du_reshaped[ij, k, 3] += factor_k * f3
+                du_reshaped[ij, k, 4] += factor_k * f4
+                du_reshaped[ij, k, 5] += factor_k * f5
+
+                factor_kk = alpha * derivative_split[kk, k]
+                du_reshaped[ij, kk, 1] += factor_kk * f1
+                du_reshaped[ij, kk, 2] += factor_kk * f2
+                du_reshaped[ij, kk, 3] += factor_kk * f3
+                du_reshaped[ij, kk, 4] += factor_kk * f4
+                du_reshaped[ij, kk, 5] += factor_kk * f5
+            end
+        end
+    end # GC.@preserve u_prim begin
+
+    # Finally, we add the temporary RHS computed here to the global RHS in the
+    # given `element`.
+    @turbo for v in eachvariable(equations),
+               k in eachnode(dg),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        _du[v, i, j, k, element] += du[i, j, k, v]
+    end
 end
diff --git a/src/solvers/dgsem_structured/indicators_1d.jl b/src/solvers/dgsem_structured/indicators_1d.jl
index 9bb66d34e9e..4299ec603dd 100644
--- a/src/solvers/dgsem_structured/indicators_1d.jl
+++ b/src/solvers/dgsem_structured/indicators_1d.jl
@@ -3,24 +3,24 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 function apply_smoothing!(mesh::StructuredMesh{1}, alpha, alpha_tmp, dg, cache)
-  # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
-  # Copy alpha values such that smoothing is indpedenent of the element access order
-  alpha_tmp .= alpha
+    # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
+    # Copy alpha values such that smoothing is indpedenent of the element access order
+    alpha_tmp .= alpha
 
-  # So far, alpha smoothing doesn't work for non-periodic initial conditions for structured meshes.
-  @assert isperiodic(mesh) "alpha smoothing for structured meshes works only with periodic initial conditions so far"
+    # So far, alpha smoothing doesn't work for non-periodic initial conditions for structured meshes.
+    @assert isperiodic(mesh) "alpha smoothing for structured meshes works only with periodic initial conditions so far"
 
-  # Loop over elements, because there is no interface container
-  for element in eachelement(dg,cache)
-    # Get neighboring element ids
-    left  = cache.elements.left_neighbors[1, element]
+    # Loop over elements, because there is no interface container
+    for element in eachelement(dg, cache)
+        # Get neighboring element ids
+        left = cache.elements.left_neighbors[1, element]
 
-    # Apply smoothing
-    alpha[left]     = max(alpha_tmp[left],    0.5 * alpha_tmp[element], alpha[left])
-    alpha[element]  = max(alpha_tmp[element], 0.5 * alpha_tmp[left],    alpha[element])      
-  end
+        # Apply smoothing
+        alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[element], alpha[left])
+        alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[left], alpha[element])
+    end
 end
-
 end # @muladd
diff --git a/src/solvers/dgsem_structured/indicators_2d.jl b/src/solvers/dgsem_structured/indicators_2d.jl
index abc054b572a..f4b07b70cb8 100644
--- a/src/solvers/dgsem_structured/indicators_2d.jl
+++ b/src/solvers/dgsem_structured/indicators_2d.jl
@@ -3,28 +3,28 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 function apply_smoothing!(mesh::StructuredMesh{2}, alpha, alpha_tmp, dg, cache)
-  # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
-  # Copy alpha values such that smoothing is indpedenent of the element access order
-  alpha_tmp .= alpha
+    # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
+    # Copy alpha values such that smoothing is indpedenent of the element access order
+    alpha_tmp .= alpha
 
-  # So far, alpha smoothing doesn't work for non-periodic initial conditions for structured meshes.
-  @assert isperiodic(mesh) "alpha smoothing for structured meshes works only with periodic initial conditions so far"
+    # So far, alpha smoothing doesn't work for non-periodic initial conditions for structured meshes.
+    @assert isperiodic(mesh) "alpha smoothing for structured meshes works only with periodic initial conditions so far"
 
-  # Loop over elements, because there is no interface container
-  for element in eachelement(dg,cache)
-    # Get neighboring element ids
-    left  = cache.elements.left_neighbors[1, element]
-    lower = cache.elements.left_neighbors[2, element] 
+    # Loop over elements, because there is no interface container
+    for element in eachelement(dg, cache)
+        # Get neighboring element ids
+        left = cache.elements.left_neighbors[1, element]
+        lower = cache.elements.left_neighbors[2, element]
 
-    # Apply smoothing
-    alpha[left]     = max(alpha_tmp[left],    0.5 * alpha_tmp[element], alpha[left])
-    alpha[element]  = max(alpha_tmp[element], 0.5 * alpha_tmp[left],    alpha[element])
+        # Apply smoothing
+        alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[element], alpha[left])
+        alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[left], alpha[element])
 
-    alpha[lower]    = max(alpha_tmp[lower],   0.5 * alpha_tmp[element], alpha[lower])
-    alpha[element]  = max(alpha_tmp[element], 0.5 * alpha_tmp[lower],   alpha[element])
-  end
+        alpha[lower] = max(alpha_tmp[lower], 0.5 * alpha_tmp[element], alpha[lower])
+        alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[lower], alpha[element])
+    end
 end
-
 end # @muladd
diff --git a/src/solvers/dgsem_structured/indicators_3d.jl b/src/solvers/dgsem_structured/indicators_3d.jl
index 4d3c4df278b..155bf50dc68 100644
--- a/src/solvers/dgsem_structured/indicators_3d.jl
+++ b/src/solvers/dgsem_structured/indicators_3d.jl
@@ -3,32 +3,32 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 function apply_smoothing!(mesh::StructuredMesh{3}, alpha, alpha_tmp, dg, cache)
-  # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
-  # Copy alpha values such that smoothing is indpedenent of the element access order
-  alpha_tmp .= alpha
+    # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
+    # Copy alpha values such that smoothing is indpedenent of the element access order
+    alpha_tmp .= alpha
 
-  # So far, alpha smoothing doesn't work for non-periodic initial conditions for structured meshes.
-  @assert isperiodic(mesh) "alpha smoothing for structured meshes works only with periodic initial conditions so far"
+    # So far, alpha smoothing doesn't work for non-periodic initial conditions for structured meshes.
+    @assert isperiodic(mesh) "alpha smoothing for structured meshes works only with periodic initial conditions so far"
 
-  # Loop over elements, because there is no interface container
-  for element in eachelement(dg,cache)
-    # Get neighboring element ids
-    left  = cache.elements.left_neighbors[1, element]
-    lower = cache.elements.left_neighbors[2, element] 
-    front = cache.elements.left_neighbors[3, element]
+    # Loop over elements, because there is no interface container
+    for element in eachelement(dg, cache)
+        # Get neighboring element ids
+        left = cache.elements.left_neighbors[1, element]
+        lower = cache.elements.left_neighbors[2, element]
+        front = cache.elements.left_neighbors[3, element]
 
-    # Apply smoothing
-    alpha[left]     = max(alpha_tmp[left],    0.5 * alpha_tmp[element], alpha[left])
-    alpha[element]  = max(alpha_tmp[element], 0.5 * alpha_tmp[left],    alpha[element])
+        # Apply smoothing
+        alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[element], alpha[left])
+        alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[left], alpha[element])
 
-    alpha[lower]    = max(alpha_tmp[lower],   0.5 * alpha_tmp[element], alpha[lower])
-    alpha[element]  = max(alpha_tmp[element], 0.5 * alpha_tmp[lower],   alpha[element])
-      
-    alpha[front]    = max(alpha_tmp[front],   0.5 * alpha_tmp[element], alpha[front])
-    alpha[element]  = max(alpha_tmp[element], 0.5 * alpha_tmp[front],   alpha[element])
-  end
-end
+        alpha[lower] = max(alpha_tmp[lower], 0.5 * alpha_tmp[element], alpha[lower])
+        alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[lower], alpha[element])
 
+        alpha[front] = max(alpha_tmp[front], 0.5 * alpha_tmp[element], alpha[front])
+        alpha[element] = max(alpha_tmp[element], 0.5 * alpha_tmp[front], alpha[element])
+    end
+end
 end # @muladd
diff --git a/src/solvers/dgsem_tree/containers.jl b/src/solvers/dgsem_tree/containers.jl
index 92bce0ce830..bba8b83b23a 100644
--- a/src/solvers/dgsem_tree/containers.jl
+++ b/src/solvers/dgsem_tree/containers.jl
@@ -3,58 +3,55 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Dimension independent code related to containers of the DG solver
 # with the mesh type TreeMesh
 
 function reinitialize_containers!(mesh::TreeMesh, equations, dg::DGSEM, cache)
-  # Get new list of leaf cells
-  leaf_cell_ids = local_leaf_cells(mesh.tree)
-
-  # re-initialize elements container
-  @unpack elements = cache
-  resize!(elements, length(leaf_cell_ids))
-  init_elements!(elements, leaf_cell_ids, mesh, dg.basis)
-
-  # re-initialize interfaces container
-  @unpack interfaces = cache
-  resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids))
-  init_interfaces!(interfaces, elements, mesh)
-
-  # re-initialize boundaries container
-  @unpack boundaries = cache
-  resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids))
-  init_boundaries!(boundaries, elements, mesh)
-
-  # re-initialize mortars container
-  @unpack mortars = cache
-  resize!(mortars, count_required_mortars(mesh, leaf_cell_ids))
-  init_mortars!(mortars, elements, mesh)
-
-  if mpi_isparallel()
-    # re-initialize mpi_interfaces container
-    @unpack mpi_interfaces = cache
-    resize!(mpi_interfaces, count_required_mpi_interfaces(mesh, leaf_cell_ids))
-    init_mpi_interfaces!(mpi_interfaces, elements, mesh)
-
-    # re-initialize mpi_mortars container
-    @unpack mpi_mortars = cache
-    resize!(mpi_mortars, count_required_mpi_mortars(mesh, leaf_cell_ids))
-    init_mpi_mortars!(mpi_mortars, elements, mesh)
-
-    # re-initialize mpi cache
-    @unpack mpi_cache = cache
-    init_mpi_cache!(mpi_cache, mesh, elements, mpi_interfaces, mpi_mortars,
-                    nvariables(equations), nnodes(dg), eltype(elements))
-  end
+    # Get new list of leaf cells
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
+
+    # re-initialize elements container
+    @unpack elements = cache
+    resize!(elements, length(leaf_cell_ids))
+    init_elements!(elements, leaf_cell_ids, mesh, dg.basis)
+
+    # re-initialize interfaces container
+    @unpack interfaces = cache
+    resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids))
+    init_interfaces!(interfaces, elements, mesh)
+
+    # re-initialize boundaries container
+    @unpack boundaries = cache
+    resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids))
+    init_boundaries!(boundaries, elements, mesh)
+
+    # re-initialize mortars container
+    @unpack mortars = cache
+    resize!(mortars, count_required_mortars(mesh, leaf_cell_ids))
+    init_mortars!(mortars, elements, mesh)
+
+    if mpi_isparallel()
+        # re-initialize mpi_interfaces container
+        @unpack mpi_interfaces = cache
+        resize!(mpi_interfaces, count_required_mpi_interfaces(mesh, leaf_cell_ids))
+        init_mpi_interfaces!(mpi_interfaces, elements, mesh)
+
+        # re-initialize mpi_mortars container
+        @unpack mpi_mortars = cache
+        resize!(mpi_mortars, count_required_mpi_mortars(mesh, leaf_cell_ids))
+        init_mpi_mortars!(mpi_mortars, elements, mesh)
+
+        # re-initialize mpi cache
+        @unpack mpi_cache = cache
+        init_mpi_cache!(mpi_cache, mesh, elements, mpi_interfaces, mpi_mortars,
+                        nvariables(equations), nnodes(dg), eltype(elements))
+    end
 end
 
-
 # Dimension-specific implementations
 include("containers_1d.jl")
 include("containers_2d.jl")
 include("containers_3d.jl")
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/containers_1d.jl b/src/solvers/dgsem_tree/containers_1d.jl
index 10718fb2e55..ecbcc1c4d9a 100644
--- a/src/solvers/dgsem_tree/containers_1d.jl
+++ b/src/solvers/dgsem_tree/containers_1d.jl
@@ -3,17 +3,17 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Container data structure (structure-of-arrays style) for DG elements
-mutable struct ElementContainer1D{RealT<:Real, uEltype<:Real} <: AbstractContainer
-  inverse_jacobian::Vector{RealT}        # [elements]
-  node_coordinates::Array{RealT, 3}      # [orientation, i, elements]
-  surface_flux_values::Array{uEltype, 3} # [variables, direction, elements]
-  cell_ids::Vector{Int}                  # [elements]
-  # internal `resize!`able storage
-  _node_coordinates::Vector{RealT}
-  _surface_flux_values::Vector{uEltype}
+mutable struct ElementContainer1D{RealT <: Real, uEltype <: Real} <: AbstractContainer
+    inverse_jacobian::Vector{RealT}        # [elements]
+    node_coordinates::Array{RealT, 3}      # [orientation, i, elements]
+    surface_flux_values::Array{uEltype, 3} # [variables, direction, elements]
+    cell_ids::Vector{Int}                  # [elements]
+    # internal `resize!`able storage
+    _node_coordinates::Vector{RealT}
+    _surface_flux_values::Vector{uEltype}
 end
 
 nvariables(elements::ElementContainer1D) = size(elements.surface_flux_values, 1)
@@ -26,50 +26,50 @@ Base.eltype(elements::ElementContainer1D) = eltype(elements.surface_flux_values)
 # `unsafe_wrap`ping multi-dimensional `Array`s around the
 # internal storage.
 function Base.resize!(elements::ElementContainer1D, capacity)
-  n_nodes = nnodes(elements)
-  n_variables = nvariables(elements)
-  @unpack _node_coordinates, _surface_flux_values,
-          inverse_jacobian, cell_ids = elements
+    n_nodes = nnodes(elements)
+    n_variables = nvariables(elements)
+    @unpack _node_coordinates, _surface_flux_values,
+    inverse_jacobian, cell_ids = elements
 
-  resize!(inverse_jacobian, capacity)
+    resize!(inverse_jacobian, capacity)
 
-  resize!(_node_coordinates, 1 * n_nodes * capacity)
-  elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-                                          (1, n_nodes, capacity))
+    resize!(_node_coordinates, 1 * n_nodes * capacity)
+    elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                            (1, n_nodes, capacity))
 
-  resize!(_surface_flux_values, n_variables * 2 * 1 * capacity)
-  elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
-                                             (n_variables, 2 * 1, capacity))
+    resize!(_surface_flux_values, n_variables * 2 * 1 * capacity)
+    elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
+                                               (n_variables, 2 * 1, capacity))
 
-  resize!(cell_ids, capacity)
+    resize!(cell_ids, capacity)
 
-  return nothing
+    return nothing
 end
 
+function ElementContainer1D{RealT, uEltype}(capacity::Integer, n_variables,
+                                            n_nodes) where {RealT <: Real,
+                                                            uEltype <: Real}
+    nan_RealT = convert(RealT, NaN)
+    nan_uEltype = convert(uEltype, NaN)
 
-function ElementContainer1D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real}
-  nan_RealT = convert(RealT, NaN)
-  nan_uEltype = convert(uEltype, NaN)
-
-  # Initialize fields with defaults
-  inverse_jacobian = fill(nan_RealT, capacity)
+    # Initialize fields with defaults
+    inverse_jacobian = fill(nan_RealT, capacity)
 
-  _node_coordinates = fill(nan_RealT, 1 * n_nodes * capacity)
-  node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-                                 (1, n_nodes, capacity))
+    _node_coordinates = fill(nan_RealT, 1 * n_nodes * capacity)
+    node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                   (1, n_nodes, capacity))
 
-  _surface_flux_values = fill(nan_uEltype, n_variables * 2 * 1 * capacity)
-  surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
-                                    (n_variables, 2 * 1, capacity))
+    _surface_flux_values = fill(nan_uEltype, n_variables * 2 * 1 * capacity)
+    surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
+                                      (n_variables, 2 * 1, capacity))
 
-  cell_ids = fill(typemin(Int), capacity)
+    cell_ids = fill(typemin(Int), capacity)
 
-  return ElementContainer1D{RealT, uEltype}(
-    inverse_jacobian, node_coordinates, surface_flux_values, cell_ids,
-    _node_coordinates, _surface_flux_values)
+    return ElementContainer1D{RealT, uEltype}(inverse_jacobian, node_coordinates,
+                                              surface_flux_values, cell_ids,
+                                              _node_coordinates, _surface_flux_values)
 end
 
-
 # Return number of elements
 @inline nelements(elements::ElementContainer1D) = length(elements.cell_ids)
 # TODO: Taal performance, 1:nelements(elements) vs. Base.OneTo(nelements(elements))
@@ -83,68 +83,68 @@ In particular, not the elements themselves are returned.
 @inline eachelement(elements::ElementContainer1D) = Base.OneTo(nelements(elements))
 @inline Base.real(elements::ElementContainer1D) = eltype(elements.node_coordinates)
 
-
 # Create element container and initialize element data
 function init_elements(cell_ids, mesh::TreeMesh1D,
                        equations::AbstractEquations{1},
-                       basis, ::Type{RealT}, ::Type{uEltype}) where {RealT<:Real, uEltype<:Real}
-  # Initialize container
-  n_elements = length(cell_ids)
-  elements = ElementContainer1D{RealT, uEltype}(
-    n_elements, nvariables(equations), nnodes(basis))
-
-  init_elements!(elements, cell_ids, mesh, basis)
-  return elements
+                       basis, ::Type{RealT},
+                       ::Type{uEltype}) where {RealT <: Real, uEltype <: Real}
+    # Initialize container
+    n_elements = length(cell_ids)
+    elements = ElementContainer1D{RealT, uEltype}(n_elements, nvariables(equations),
+                                                  nnodes(basis))
+
+    init_elements!(elements, cell_ids, mesh, basis)
+    return elements
 end
 
 function init_elements!(elements, cell_ids, mesh::TreeMesh1D, basis)
-  nodes = get_nodes(basis)
-  # Compute the length of the 1D reference interval by integrating
-  # the function with constant value unity on the corresponding
-  # element data type (using \circ)
-  reference_length = integrate(one ∘ eltype, nodes, basis)
-  # Compute the offset of the midpoint of the 1D reference interval
-  # (its difference from zero)
-  reference_offset = (first(nodes) + last(nodes)) / 2
-
-  # Store cell ids
-  elements.cell_ids .= cell_ids
-
-  # Calculate inverse Jacobian and node coordinates
-  for element in eachelement(elements)
-    # Get cell id
-    cell_id = cell_ids[element]
-
-    # Get cell length
-    dx = length_at_cell(mesh.tree, cell_id)
-
-    # Calculate inverse Jacobian
-    jacobian = dx / reference_length
-    elements.inverse_jacobian[element] = inv(jacobian)
-
-    # Calculate node coordinates
-    # Note that the `tree_coordinates` are the midpoints of the cells.
-    # Hence, we need to add an offset for `nodes` with a midpoint
-    # different from zero.
-      for i in eachnode(basis)
-        elements.node_coordinates[1, i, element] = (
-            mesh.tree.coordinates[1, cell_id] + jacobian * (nodes[i] - reference_offset))
-      end
-  end
-
-  return elements
-end
-
+    nodes = get_nodes(basis)
+    # Compute the length of the 1D reference interval by integrating
+    # the function with constant value unity on the corresponding
+    # element data type (using \circ)
+    reference_length = integrate(one ∘ eltype, nodes, basis)
+    # Compute the offset of the midpoint of the 1D reference interval
+    # (its difference from zero)
+    reference_offset = (first(nodes) + last(nodes)) / 2
+
+    # Store cell ids
+    elements.cell_ids .= cell_ids
+
+    # Calculate inverse Jacobian and node coordinates
+    for element in eachelement(elements)
+        # Get cell id
+        cell_id = cell_ids[element]
+
+        # Get cell length
+        dx = length_at_cell(mesh.tree, cell_id)
+
+        # Calculate inverse Jacobian
+        jacobian = dx / reference_length
+        elements.inverse_jacobian[element] = inv(jacobian)
+
+        # Calculate node coordinates
+        # Note that the `tree_coordinates` are the midpoints of the cells.
+        # Hence, we need to add an offset for `nodes` with a midpoint
+        # different from zero.
+        for i in eachnode(basis)
+            elements.node_coordinates[1, i, element] = (mesh.tree.coordinates[1,
+                                                                              cell_id] +
+                                                        jacobian *
+                                                        (nodes[i] - reference_offset))
+        end
+    end
 
+    return elements
+end
 
 # Container data structure (structure-of-arrays style) for DG interfaces
-mutable struct InterfaceContainer1D{uEltype<:Real} <: AbstractContainer
-  u::Array{uEltype, 3}      # [leftright, variables, interfaces]
-  neighbor_ids::Matrix{Int} # [leftright, interfaces]
-  orientations::Vector{Int} # [interfaces]
-  # internal `resize!`able storage
-  _u::Vector{uEltype}
-  _neighbor_ids::Vector{Int}
+mutable struct InterfaceContainer1D{uEltype <: Real} <: AbstractContainer
+    u::Array{uEltype, 3}      # [leftright, variables, interfaces]
+    neighbor_ids::Matrix{Int} # [leftright, interfaces]
+    orientations::Vector{Int} # [interfaces]
+    # internal `resize!`able storage
+    _u::Vector{uEltype}
+    _neighbor_ids::Vector{Int}
 end
 
 nvariables(interfaces::InterfaceContainer1D) = size(interfaces.u, 2)
@@ -152,151 +152,148 @@ Base.eltype(interfaces::InterfaceContainer1D) = eltype(interfaces.u)
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(interfaces::InterfaceContainer1D, capacity)
-  n_variables = nvariables(interfaces)
-  @unpack _u, _neighbor_ids, orientations = interfaces
+    n_variables = nvariables(interfaces)
+    @unpack _u, _neighbor_ids, orientations = interfaces
 
-  resize!(_u, 2 * n_variables * capacity)
-  interfaces.u = unsafe_wrap(Array, pointer(_u),
-                             (2, n_variables, capacity))
+    resize!(_u, 2 * n_variables * capacity)
+    interfaces.u = unsafe_wrap(Array, pointer(_u),
+                               (2, n_variables, capacity))
 
-  resize!(_neighbor_ids, 2 * capacity)
-  interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
-                                        (2, capacity))
+    resize!(_neighbor_ids, 2 * capacity)
+    interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+                                          (2, capacity))
 
-  resize!(orientations, capacity)
+    resize!(orientations, capacity)
 
-  return nothing
+    return nothing
 end
 
+function InterfaceContainer1D{uEltype}(capacity::Integer, n_variables,
+                                       n_nodes) where {uEltype <: Real}
+    nan = convert(uEltype, NaN)
 
-function InterfaceContainer1D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real}
-  nan = convert(uEltype, NaN)
+    # Initialize fields with defaults
+    _u = fill(nan, 2 * n_variables * capacity)
+    u = unsafe_wrap(Array, pointer(_u),
+                    (2, n_variables, capacity))
 
-  # Initialize fields with defaults
-  _u = fill(nan, 2 * n_variables * capacity)
-  u = unsafe_wrap(Array, pointer(_u),
-                  (2, n_variables, capacity))
+    _neighbor_ids = fill(typemin(Int), 2 * capacity)
+    neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+                               (2, capacity))
 
-  _neighbor_ids = fill(typemin(Int), 2 * capacity)
-  neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
-                             (2, capacity))
+    orientations = fill(typemin(Int), capacity)
 
-  orientations = fill(typemin(Int), capacity)
-
-  return InterfaceContainer1D{uEltype}(
-    u, neighbor_ids, orientations,
-    _u, _neighbor_ids)
+    return InterfaceContainer1D{uEltype}(u, neighbor_ids, orientations,
+                                         _u, _neighbor_ids)
 end
 
-
 # Return number of interfaces
 @inline ninterfaces(interfaces::InterfaceContainer1D) = length(interfaces.orientations)
 
-
 # Create interface container and initialize interface data in `elements`.
 function init_interfaces(cell_ids, mesh::TreeMesh1D,
                          elements::ElementContainer1D)
-  # Initialize container
-  n_interfaces = count_required_interfaces(mesh, cell_ids)
-  interfaces = InterfaceContainer1D{eltype(elements)}(
-    n_interfaces, nvariables(elements), nnodes(elements))
-
-  # Connect elements with interfaces
-  init_interfaces!(interfaces, elements, mesh)
-  return interfaces
+    # Initialize container
+    n_interfaces = count_required_interfaces(mesh, cell_ids)
+    interfaces = InterfaceContainer1D{eltype(elements)}(n_interfaces,
+                                                        nvariables(elements),
+                                                        nnodes(elements))
+
+    # Connect elements with interfaces
+    init_interfaces!(interfaces, elements, mesh)
+    return interfaces
 end
 
 # Count the number of interfaces that need to be created
 function count_required_interfaces(mesh::TreeMesh1D, cell_ids)
-  count = 0
-
-  # Iterate over all cells
-  for cell_id in cell_ids
-    for direction in eachdirection(mesh.tree)
-      # Only count interfaces in positive direction to avoid double counting
-      if direction == 1
-        continue
-      end
-
-      # Skip if no neighbor exists
-      if !has_any_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      count += 1
+    count = 0
+
+    # Iterate over all cells
+    for cell_id in cell_ids
+        for direction in eachdirection(mesh.tree)
+            # Only count interfaces in positive direction to avoid double counting
+            if direction == 1
+                continue
+            end
+
+            # Skip if no neighbor exists
+            if !has_any_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            count += 1
+        end
     end
-  end
 
-  return count
+    return count
 end
 
 # Initialize connectivity between elements and interfaces
 function init_interfaces!(interfaces, elements, mesh::TreeMesh1D)
-  # Construct cell -> element mapping for easier algorithm implementation
-  tree = mesh.tree
-  c2e = zeros(Int, length(tree))
-  for element in eachelement(elements)
-    c2e[elements.cell_ids[element]] = element
-  end
-
-  # Reset interface count
-  count = 0
+    # Construct cell -> element mapping for easier algorithm implementation
+    tree = mesh.tree
+    c2e = zeros(Int, length(tree))
+    for element in eachelement(elements)
+        c2e[elements.cell_ids[element]] = element
+    end
 
-  # Iterate over all elements to find neighbors and to connect via interfaces
-  for element in eachelement(elements)
-    # Get cell id
-    cell_id = elements.cell_ids[element]
+    # Reset interface count
+    count = 0
 
-    # Loop over directions
-    for direction in eachdirection(mesh.tree)
-      # Only create interfaces in positive direction
-      if direction == 1
-        continue
-      end
-
-      # Skip if no neighbor exists and current cell is not small
-      if !has_any_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      count += 1
-
-      if has_neighbor(mesh.tree, cell_id, direction)
-        neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
-        if has_children(mesh.tree, neighbor_cell_id) # Cell has small neighbor
-          interfaces.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]]
-        else # Cell has same refinement level neighbor
-          interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id]
+    # Iterate over all elements to find neighbors and to connect via interfaces
+    for element in eachelement(elements)
+        # Get cell id
+        cell_id = elements.cell_ids[element]
+
+        # Loop over directions
+        for direction in eachdirection(mesh.tree)
+            # Only create interfaces in positive direction
+            if direction == 1
+                continue
+            end
+
+            # Skip if no neighbor exists and current cell is not small
+            if !has_any_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            count += 1
+
+            if has_neighbor(mesh.tree, cell_id, direction)
+                neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
+                if has_children(mesh.tree, neighbor_cell_id) # Cell has small neighbor
+                    interfaces.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[1,
+                                                                                neighbor_cell_id]]
+                else # Cell has same refinement level neighbor
+                    interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id]
+                end
+            else # Cell is small and has large neighbor
+                parent_id = mesh.tree.parent_ids[cell_id]
+                neighbor_cell_id = mesh.tree.neighbor_ids[direction, parent_id]
+                interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id]
+            end
+
+            interfaces.neighbor_ids[1, count] = element
+            # Set orientation (x -> 1)
+            interfaces.orientations[count] = 1
         end
-      else # Cell is small and has large neighbor
-        parent_id = mesh.tree.parent_ids[cell_id]
-        neighbor_cell_id = mesh.tree.neighbor_ids[direction, parent_id]
-        interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id]
-      end
-
-      interfaces.neighbor_ids[1, count] = element
-      # Set orientation (x -> 1)
-      interfaces.orientations[count] = 1
     end
-  end
 
-  @assert count == ninterfaces(interfaces) ("Actual interface count ($count) does not match " *
+    @assert count==ninterfaces(interfaces) ("Actual interface count ($count) does not match "*
                                             "expectations $(ninterfaces(interfaces))")
 end
 
-
-
 # Container data structure (structure-of-arrays style) for DG boundaries
-mutable struct BoundaryContainer1D{RealT<:Real, uEltype<:Real} <: AbstractContainer
-  u::Array{uEltype, 3}              # [leftright, variables, boundaries]
-  neighbor_ids::Vector{Int}         # [boundaries]
-  orientations::Vector{Int}         # [boundaries]
-  neighbor_sides::Vector{Int}       # [boundaries]
-  node_coordinates::Array{RealT, 2} # [orientation, elements]
-  n_boundaries_per_direction::SVector{2, Int} # [direction]
-  # internal `resize!`able storage
-  _u::Vector{uEltype}
-  _node_coordinates::Vector{RealT}
+mutable struct BoundaryContainer1D{RealT <: Real, uEltype <: Real} <: AbstractContainer
+    u::Array{uEltype, 3}              # [leftright, variables, boundaries]
+    neighbor_ids::Vector{Int}         # [boundaries]
+    orientations::Vector{Int}         # [boundaries]
+    neighbor_sides::Vector{Int}       # [boundaries]
+    node_coordinates::Array{RealT, 2} # [orientation, elements]
+    n_boundaries_per_direction::SVector{2, Int} # [direction]
+    # internal `resize!`able storage
+    _u::Vector{uEltype}
+    _node_coordinates::Vector{RealT}
 end
 
 nvariables(boundaries::BoundaryContainer1D) = size(boundaries.u, 2)
@@ -304,163 +301,162 @@ Base.eltype(boundaries::BoundaryContainer1D) = eltype(boundaries.u)
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(boundaries::BoundaryContainer1D, capacity)
-  n_variables = nvariables(boundaries)
-  @unpack _u, _node_coordinates,
-          neighbor_ids, orientations, neighbor_sides = boundaries
+    n_variables = nvariables(boundaries)
+    @unpack _u, _node_coordinates,
+    neighbor_ids, orientations, neighbor_sides = boundaries
 
-  resize!(_u, 2 * n_variables * capacity)
-  boundaries.u = unsafe_wrap(Array, pointer(_u),
-                             (2, n_variables, capacity))
+    resize!(_u, 2 * n_variables * capacity)
+    boundaries.u = unsafe_wrap(Array, pointer(_u),
+                               (2, n_variables, capacity))
 
-  resize!(_node_coordinates, 1 * capacity)
-  boundaries.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-                                            (1, capacity))
+    resize!(_node_coordinates, 1 * capacity)
+    boundaries.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                              (1, capacity))
 
-  resize!(neighbor_ids, capacity)
+    resize!(neighbor_ids, capacity)
 
-  resize!(orientations, capacity)
+    resize!(orientations, capacity)
 
-  resize!(neighbor_sides, capacity)
+    resize!(neighbor_sides, capacity)
 
-  return nothing
+    return nothing
 end
 
+function BoundaryContainer1D{RealT, uEltype}(capacity::Integer, n_variables,
+                                             n_nodes) where {RealT <: Real,
+                                                             uEltype <: Real}
+    nan_RealT = convert(RealT, NaN)
+    nan_uEltype = convert(uEltype, NaN)
 
-function BoundaryContainer1D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real}
-  nan_RealT = convert(RealT, NaN)
-  nan_uEltype = convert(uEltype, NaN)
-
-  # Initialize fields with defaults
-  _u = fill(nan_uEltype, 2 * n_variables * capacity)
-  u = unsafe_wrap(Array, pointer(_u),
-                  (2, n_variables, capacity))
+    # Initialize fields with defaults
+    _u = fill(nan_uEltype, 2 * n_variables * capacity)
+    u = unsafe_wrap(Array, pointer(_u),
+                    (2, n_variables, capacity))
 
-  neighbor_ids = fill(typemin(Int), capacity)
+    neighbor_ids = fill(typemin(Int), capacity)
 
-  orientations = fill(typemin(Int), capacity)
+    orientations = fill(typemin(Int), capacity)
 
-  neighbor_sides = fill(typemin(Int), capacity)
+    neighbor_sides = fill(typemin(Int), capacity)
 
-  _node_coordinates = fill(nan_RealT, 1 * capacity)
-  node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-                                 (1, capacity))
+    _node_coordinates = fill(nan_RealT, 1 * capacity)
+    node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                   (1, capacity))
 
-  n_boundaries_per_direction = SVector(0, 0)
+    n_boundaries_per_direction = SVector(0, 0)
 
-  return BoundaryContainer1D{RealT, uEltype}(
-    u, neighbor_ids, orientations, neighbor_sides,
-    node_coordinates, n_boundaries_per_direction,
-    _u, _node_coordinates)
+    return BoundaryContainer1D{RealT, uEltype}(u, neighbor_ids, orientations,
+                                               neighbor_sides,
+                                               node_coordinates,
+                                               n_boundaries_per_direction,
+                                               _u, _node_coordinates)
 end
 
-
 # Return number of boundaries
 nboundaries(boundaries::BoundaryContainer1D) = length(boundaries.orientations)
 
-
 # Create boundaries container and initialize boundary data in `elements`.
 function init_boundaries(cell_ids, mesh::TreeMesh1D,
                          elements::ElementContainer1D)
-  # Initialize container
-  n_boundaries = count_required_boundaries(mesh, cell_ids)
-  boundaries = BoundaryContainer1D{real(elements), eltype(elements)}(
-    n_boundaries, nvariables(elements), nnodes(elements))
-
-  # Connect elements with boundaries
-  init_boundaries!(boundaries, elements, mesh)
-  return boundaries
+    # Initialize container
+    n_boundaries = count_required_boundaries(mesh, cell_ids)
+    boundaries = BoundaryContainer1D{real(elements), eltype(elements)}(n_boundaries,
+                                                                       nvariables(elements),
+                                                                       nnodes(elements))
+
+    # Connect elements with boundaries
+    init_boundaries!(boundaries, elements, mesh)
+    return boundaries
 end
 
 # Count the number of boundaries that need to be created
 function count_required_boundaries(mesh::TreeMesh1D, cell_ids)
-  count = 0
-
-  # Iterate over all cells
-  for cell_id in cell_ids
-    for direction in eachdirection(mesh.tree)
-      # If neighbor exists, current cell is not at a boundary
-      if has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # If coarse neighbor exists, current cell is not at a boundary
-      if has_coarse_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # No neighbor exists in this direction -> must be a boundary
-      count += 1
+    count = 0
+
+    # Iterate over all cells
+    for cell_id in cell_ids
+        for direction in eachdirection(mesh.tree)
+            # If neighbor exists, current cell is not at a boundary
+            if has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # If coarse neighbor exists, current cell is not at a boundary
+            if has_coarse_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # No neighbor exists in this direction -> must be a boundary
+            count += 1
+        end
     end
-  end
 
-  return count
+    return count
 end
 
 # Initialize connectivity between elements and boundaries
 function init_boundaries!(boundaries, elements, mesh::TreeMesh1D)
-  # Reset boundaries count
-  count = 0
-
-  # Initialize boundary counts
-  counts_per_direction = MVector(0, 0)
-
-  # OBS! Iterate over directions first, then over elements, and count boundaries in each direction
-  # Rationale: This way the boundaries are internally sorted by the directions -x, +x, -y etc.,
-  #            obviating the need to store the boundary condition to be applied explicitly.
-  # Loop over directions
-  for direction in eachdirection(mesh.tree)
-    # Iterate over all elements to find missing neighbors and to connect to boundaries
-    for element in eachelement(elements)
-      # Get cell id
-      cell_id = elements.cell_ids[element]
-
-      # If neighbor exists, current cell is not at a boundary
-      if has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # If coarse neighbor exists, current cell is not at a boundary
-      if has_coarse_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # Create boundary
-      count += 1
-      counts_per_direction[direction] += 1
-
-      # Set neighbor element id
-      boundaries.neighbor_ids[count] = element
-
-      # Set neighbor side, which denotes the direction (1 -> negative, 2 -> positive) of the element
-      if direction == 2
-        boundaries.neighbor_sides[count] = 1
-      else
-        boundaries.neighbor_sides[count] = 2
-      end
-
-      # Set orientation (x -> 1)
-      boundaries.orientations[count] = 1
-
-      # Store node coordinates
-      enc = elements.node_coordinates
-      if direction == 1 # -x direction
-        boundaries.node_coordinates[:, count] .= enc[:, 1,  element]
-      elseif direction == 2 # +x direction
-        boundaries.node_coordinates[:, count] .= enc[:, end, element]
-      else
-        error("should not happen")
-      end
+    # Reset boundaries count
+    count = 0
+
+    # Initialize boundary counts
+    counts_per_direction = MVector(0, 0)
+
+    # OBS! Iterate over directions first, then over elements, and count boundaries in each direction
+    # Rationale: This way the boundaries are internally sorted by the directions -x, +x, -y etc.,
+    #            obviating the need to store the boundary condition to be applied explicitly.
+    # Loop over directions
+    for direction in eachdirection(mesh.tree)
+        # Iterate over all elements to find missing neighbors and to connect to boundaries
+        for element in eachelement(elements)
+            # Get cell id
+            cell_id = elements.cell_ids[element]
+
+            # If neighbor exists, current cell is not at a boundary
+            if has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # If coarse neighbor exists, current cell is not at a boundary
+            if has_coarse_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # Create boundary
+            count += 1
+            counts_per_direction[direction] += 1
+
+            # Set neighbor element id
+            boundaries.neighbor_ids[count] = element
+
+            # Set neighbor side, which denotes the direction (1 -> negative, 2 -> positive) of the element
+            if direction == 2
+                boundaries.neighbor_sides[count] = 1
+            else
+                boundaries.neighbor_sides[count] = 2
+            end
+
+            # Set orientation (x -> 1)
+            boundaries.orientations[count] = 1
+
+            # Store node coordinates
+            enc = elements.node_coordinates
+            if direction == 1 # -x direction
+                boundaries.node_coordinates[:, count] .= enc[:, 1, element]
+            elseif direction == 2 # +x direction
+                boundaries.node_coordinates[:, count] .= enc[:, end, element]
+            else
+                error("should not happen")
+            end
+        end
     end
-  end
 
-  @assert count == nboundaries(boundaries) ("Actual boundaries count ($count) does not match " *
+    @assert count==nboundaries(boundaries) ("Actual boundaries count ($count) does not match "*
                                             "expectations $(nboundaries(boundaries))")
-  @assert sum(counts_per_direction) == count
+    @assert sum(counts_per_direction) == count
 
-  boundaries.n_boundaries_per_direction = SVector(counts_per_direction)
+    boundaries.n_boundaries_per_direction = SVector(counts_per_direction)
 
-  return boundaries.n_boundaries_per_direction
+    return boundaries.n_boundaries_per_direction
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/containers_2d.jl b/src/solvers/dgsem_tree/containers_2d.jl
index c0ece1f8c1a..5cf256d3499 100644
--- a/src/solvers/dgsem_tree/containers_2d.jl
+++ b/src/solvers/dgsem_tree/containers_2d.jl
@@ -3,17 +3,17 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Container data structure (structure-of-arrays style) for DG elements
-mutable struct ElementContainer2D{RealT<:Real, uEltype<:Real} <: AbstractContainer
-  inverse_jacobian::Vector{RealT}        # [elements]
-  node_coordinates::Array{RealT, 4}      # [orientation, i, j, elements]
-  surface_flux_values::Array{uEltype, 4} # [variables, i, direction, elements]
-  cell_ids::Vector{Int}                  # [elements]
-  # internal `resize!`able storage
-  _node_coordinates::Vector{RealT}
-  _surface_flux_values::Vector{uEltype}
+mutable struct ElementContainer2D{RealT <: Real, uEltype <: Real} <: AbstractContainer
+    inverse_jacobian::Vector{RealT}        # [elements]
+    node_coordinates::Array{RealT, 4}      # [orientation, i, j, elements]
+    surface_flux_values::Array{uEltype, 4} # [variables, i, direction, elements]
+    cell_ids::Vector{Int}                  # [elements]
+    # internal `resize!`able storage
+    _node_coordinates::Vector{RealT}
+    _surface_flux_values::Vector{uEltype}
 end
 
 nvariables(elements::ElementContainer2D) = size(elements.surface_flux_values, 1)
@@ -26,51 +26,50 @@ Base.eltype(elements::ElementContainer2D) = eltype(elements.surface_flux_values)
 # `unsafe_wrap`ping multi-dimensional `Array`s around the
 # internal storage.
 function Base.resize!(elements::ElementContainer2D, capacity)
-  n_nodes = nnodes(elements)
-  n_variables = nvariables(elements)
-  @unpack _node_coordinates, _surface_flux_values,
-          inverse_jacobian, cell_ids = elements
+    n_nodes = nnodes(elements)
+    n_variables = nvariables(elements)
+    @unpack _node_coordinates, _surface_flux_values,
+    inverse_jacobian, cell_ids = elements
 
-  resize!(inverse_jacobian, capacity)
+    resize!(inverse_jacobian, capacity)
 
-  resize!(_node_coordinates, 2 * n_nodes * n_nodes * capacity)
-  elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-                                          (2, n_nodes, n_nodes, capacity))
+    resize!(_node_coordinates, 2 * n_nodes * n_nodes * capacity)
+    elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                            (2, n_nodes, n_nodes, capacity))
 
-  resize!(_surface_flux_values, n_variables * n_nodes * 2 * 2 * capacity)
-  elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
-                                             (n_variables, n_nodes, 2 * 2, capacity))
+    resize!(_surface_flux_values, n_variables * n_nodes * 2 * 2 * capacity)
+    elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
+                                               (n_variables, n_nodes, 2 * 2, capacity))
 
-  resize!(cell_ids, capacity)
+    resize!(cell_ids, capacity)
 
-  return nothing
+    return nothing
 end
 
+function ElementContainer2D{RealT, uEltype}(capacity::Integer, n_variables,
+                                            n_nodes) where {RealT <: Real,
+                                                            uEltype <: Real}
+    nan_RealT = convert(RealT, NaN)
+    nan_uEltype = convert(uEltype, NaN)
 
-function ElementContainer2D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real}
-  nan_RealT = convert(RealT, NaN)
-  nan_uEltype = convert(uEltype, NaN)
-
-  # Initialize fields with defaults
-  inverse_jacobian = fill(nan_RealT, capacity)
+    # Initialize fields with defaults
+    inverse_jacobian = fill(nan_RealT, capacity)
 
-  _node_coordinates = fill(nan_RealT, 2 * n_nodes * n_nodes * capacity)
-  node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-                                 (2, n_nodes, n_nodes, capacity))
+    _node_coordinates = fill(nan_RealT, 2 * n_nodes * n_nodes * capacity)
+    node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                   (2, n_nodes, n_nodes, capacity))
 
-  _surface_flux_values = fill(nan_uEltype, n_variables * n_nodes * 2 * 2 * capacity)
-  surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
-                                    (n_variables, n_nodes, 2 * 2, capacity))
+    _surface_flux_values = fill(nan_uEltype, n_variables * n_nodes * 2 * 2 * capacity)
+    surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
+                                      (n_variables, n_nodes, 2 * 2, capacity))
 
-  cell_ids = fill(typemin(Int), capacity)
+    cell_ids = fill(typemin(Int), capacity)
 
-
-  return ElementContainer2D{RealT, uEltype}(
-    inverse_jacobian, node_coordinates, surface_flux_values, cell_ids,
-    _node_coordinates, _surface_flux_values)
+    return ElementContainer2D{RealT, uEltype}(inverse_jacobian, node_coordinates,
+                                              surface_flux_values, cell_ids,
+                                              _node_coordinates, _surface_flux_values)
 end
 
-
 # Return number of elements
 @inline nelements(elements::ElementContainer2D) = length(elements.cell_ids)
 # TODO: Taal performance, 1:nelements(elements) vs. Base.OneTo(nelements(elements))
@@ -84,70 +83,72 @@ In particular, not the elements themselves are returned.
 @inline eachelement(elements::ElementContainer2D) = Base.OneTo(nelements(elements))
 @inline Base.real(elements::ElementContainer2D) = eltype(elements.node_coordinates)
 
-
 # Create element container and initialize element data
 function init_elements(cell_ids, mesh::TreeMesh2D,
                        equations::AbstractEquations{2},
-                       basis, ::Type{RealT}, ::Type{uEltype}) where {RealT<:Real, uEltype<:Real}
-  # Initialize container
-  n_elements = length(cell_ids)
-  elements = ElementContainer2D{RealT, uEltype}(
-    n_elements, nvariables(equations), nnodes(basis))
-
-  init_elements!(elements, cell_ids, mesh, basis)
-  return elements
+                       basis, ::Type{RealT},
+                       ::Type{uEltype}) where {RealT <: Real, uEltype <: Real}
+    # Initialize container
+    n_elements = length(cell_ids)
+    elements = ElementContainer2D{RealT, uEltype}(n_elements, nvariables(equations),
+                                                  nnodes(basis))
+
+    init_elements!(elements, cell_ids, mesh, basis)
+    return elements
 end
 
 function init_elements!(elements, cell_ids, mesh::TreeMesh2D, basis)
-  nodes = get_nodes(basis)
-  # Compute the length of the 1D reference interval by integrating
-  # the function with constant value unity on the corresponding
-  # element data type (using \circ)
-  reference_length = integrate(one ∘ eltype, nodes, basis)
-  # Compute the offset of the midpoint of the 1D reference interval
-  # (its difference from zero)
-  reference_offset = (first(nodes) + last(nodes)) / 2
-
-  # Store cell ids
-  elements.cell_ids .= cell_ids
-
-  # Calculate inverse Jacobian and node coordinates
-  for element in eachelement(elements)
-    # Get cell id
-    cell_id = cell_ids[element]
-
-    # Get cell length
-    dx = length_at_cell(mesh.tree, cell_id)
-
-    # Calculate inverse Jacobian
-    jacobian = dx / reference_length
-    elements.inverse_jacobian[element] = inv(jacobian)
-
-    # Calculate node coordinates
-    # Note that the `tree_coordinates` are the midpoints of the cells.
-    # Hence, we need to add an offset for `nodes` with a midpoint
-    # different from zero.
-    for j in eachnode(basis), i in eachnode(basis)
-      elements.node_coordinates[1, i, j, element] = (
-          mesh.tree.coordinates[1, cell_id] + jacobian * (nodes[i] - reference_offset))
-      elements.node_coordinates[2, i, j, element] = (
-          mesh.tree.coordinates[2, cell_id] + jacobian * (nodes[j] - reference_offset))
+    nodes = get_nodes(basis)
+    # Compute the length of the 1D reference interval by integrating
+    # the function with constant value unity on the corresponding
+    # element data type (using \circ)
+    reference_length = integrate(one ∘ eltype, nodes, basis)
+    # Compute the offset of the midpoint of the 1D reference interval
+    # (its difference from zero)
+    reference_offset = (first(nodes) + last(nodes)) / 2
+
+    # Store cell ids
+    elements.cell_ids .= cell_ids
+
+    # Calculate inverse Jacobian and node coordinates
+    for element in eachelement(elements)
+        # Get cell id
+        cell_id = cell_ids[element]
+
+        # Get cell length
+        dx = length_at_cell(mesh.tree, cell_id)
+
+        # Calculate inverse Jacobian
+        jacobian = dx / reference_length
+        elements.inverse_jacobian[element] = inv(jacobian)
+
+        # Calculate node coordinates
+        # Note that the `tree_coordinates` are the midpoints of the cells.
+        # Hence, we need to add an offset for `nodes` with a midpoint
+        # different from zero.
+        for j in eachnode(basis), i in eachnode(basis)
+            elements.node_coordinates[1, i, j, element] = (mesh.tree.coordinates[1,
+                                                                                 cell_id] +
+                                                           jacobian *
+                                                           (nodes[i] - reference_offset))
+            elements.node_coordinates[2, i, j, element] = (mesh.tree.coordinates[2,
+                                                                                 cell_id] +
+                                                           jacobian *
+                                                           (nodes[j] - reference_offset))
+        end
     end
-  end
 
-  return elements
+    return elements
 end
 
-
-
 # Container data structure (structure-of-arrays style) for DG interfaces
-mutable struct InterfaceContainer2D{uEltype<:Real} <: AbstractContainer
-  u::Array{uEltype, 4}        # [leftright, variables, i, interfaces]
-  neighbor_ids::Array{Int, 2} # [leftright, interfaces]
-  orientations::Vector{Int}   # [interfaces]
-  # internal `resize!`able storage
-  _u::Vector{uEltype}
-  _neighbor_ids::Vector{Int}
+mutable struct InterfaceContainer2D{uEltype <: Real} <: AbstractContainer
+    u::Array{uEltype, 4}        # [leftright, variables, i, interfaces]
+    neighbor_ids::Array{Int, 2} # [leftright, interfaces]
+    orientations::Vector{Int}   # [interfaces]
+    # internal `resize!`able storage
+    _u::Vector{uEltype}
+    _neighbor_ids::Vector{Int}
 end
 
 nvariables(interfaces::InterfaceContainer2D) = size(interfaces.u, 2)
@@ -156,169 +157,164 @@ Base.eltype(interfaces::InterfaceContainer2D) = eltype(interfaces.u)
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(interfaces::InterfaceContainer2D, capacity)
-  n_nodes = nnodes(interfaces)
-  n_variables = nvariables(interfaces)
-  @unpack _u, _neighbor_ids, orientations = interfaces
+    n_nodes = nnodes(interfaces)
+    n_variables = nvariables(interfaces)
+    @unpack _u, _neighbor_ids, orientations = interfaces
 
-  resize!(_u, 2 * n_variables * n_nodes * capacity)
-  interfaces.u = unsafe_wrap(Array, pointer(_u),
-                             (2, n_variables, n_nodes, capacity))
+    resize!(_u, 2 * n_variables * n_nodes * capacity)
+    interfaces.u = unsafe_wrap(Array, pointer(_u),
+                               (2, n_variables, n_nodes, capacity))
 
-  resize!(_neighbor_ids, 2 * capacity)
-  interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
-                                        (2, capacity))
+    resize!(_neighbor_ids, 2 * capacity)
+    interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+                                          (2, capacity))
 
-  resize!(orientations, capacity)
+    resize!(orientations, capacity)
 
-  return nothing
+    return nothing
 end
 
+function InterfaceContainer2D{uEltype}(capacity::Integer, n_variables,
+                                       n_nodes) where {uEltype <: Real}
+    nan = convert(uEltype, NaN)
 
-function InterfaceContainer2D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real}
-  nan = convert(uEltype, NaN)
-
-  # Initialize fields with defaults
-  _u = fill(nan, 2 * n_variables * n_nodes * capacity)
-  u = unsafe_wrap(Array, pointer(_u),
-                  (2, n_variables, n_nodes, capacity))
-
-  _neighbor_ids = fill(typemin(Int), 2 * capacity)
-  neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
-                             (2, capacity))
+    # Initialize fields with defaults
+    _u = fill(nan, 2 * n_variables * n_nodes * capacity)
+    u = unsafe_wrap(Array, pointer(_u),
+                    (2, n_variables, n_nodes, capacity))
 
-  orientations = fill(typemin(Int), capacity)
+    _neighbor_ids = fill(typemin(Int), 2 * capacity)
+    neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+                               (2, capacity))
 
+    orientations = fill(typemin(Int), capacity)
 
-  return InterfaceContainer2D{uEltype}(
-    u, neighbor_ids, orientations,
-    _u, _neighbor_ids)
+    return InterfaceContainer2D{uEltype}(u, neighbor_ids, orientations,
+                                         _u, _neighbor_ids)
 end
 
-
 # Return number of interfaces
 @inline ninterfaces(interfaces::InterfaceContainer2D) = length(interfaces.orientations)
 
-
 # Create interface container and initialize interface data in `elements`.
 function init_interfaces(cell_ids, mesh::TreeMesh2D,
                          elements::ElementContainer2D)
-  # Initialize container
-  n_interfaces = count_required_interfaces(mesh, cell_ids)
-  interfaces = InterfaceContainer2D{eltype(elements)}(
-    n_interfaces, nvariables(elements), nnodes(elements))
-
-  # Connect elements with interfaces
-  init_interfaces!(interfaces, elements, mesh)
-  return interfaces
+    # Initialize container
+    n_interfaces = count_required_interfaces(mesh, cell_ids)
+    interfaces = InterfaceContainer2D{eltype(elements)}(n_interfaces,
+                                                        nvariables(elements),
+                                                        nnodes(elements))
+
+    # Connect elements with interfaces
+    init_interfaces!(interfaces, elements, mesh)
+    return interfaces
 end
 
 # Count the number of interfaces that need to be created
 function count_required_interfaces(mesh::TreeMesh2D, cell_ids)
-  count = 0
-
-  # Iterate over all cells
-  for cell_id in cell_ids
-    for direction in eachdirection(mesh.tree)
-      # Only count interfaces in positive direction to avoid double counting
-      if direction % 2 == 1
-        continue
-      end
-
-      # If no neighbor exists, current cell is small or at boundary and thus we need a mortar
-      if !has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # Skip if neighbor has children
-      neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
-      if has_children(mesh.tree, neighbor_cell_id)
-        continue
-      end
-
-      # Skip if neighbor is on different rank -> create MPI interface instead
-      if mpi_isparallel() && !is_own_cell(mesh.tree, neighbor_cell_id)
-        continue
-      end
-
-      count += 1
+    count = 0
+
+    # Iterate over all cells
+    for cell_id in cell_ids
+        for direction in eachdirection(mesh.tree)
+            # Only count interfaces in positive direction to avoid double counting
+            if direction % 2 == 1
+                continue
+            end
+
+            # If no neighbor exists, current cell is small or at boundary and thus we need a mortar
+            if !has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # Skip if neighbor has children
+            neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
+            if has_children(mesh.tree, neighbor_cell_id)
+                continue
+            end
+
+            # Skip if neighbor is on different rank -> create MPI interface instead
+            if mpi_isparallel() && !is_own_cell(mesh.tree, neighbor_cell_id)
+                continue
+            end
+
+            count += 1
+        end
     end
-  end
 
-  return count
+    return count
 end
 
 # Initialize connectivity between elements and interfaces
 function init_interfaces!(interfaces, elements, mesh::TreeMesh2D)
-  # Exit early if there are no interfaces to initialize
-  if ninterfaces(interfaces) == 0
-    return nothing
-  end
-
-  # Construct cell -> element mapping for easier algorithm implementation
-  tree = mesh.tree
-  c2e = zeros(Int, length(tree))
-  for element in eachelement(elements)
-    c2e[elements.cell_ids[element]] = element
-  end
+    # Exit early if there are no interfaces to initialize
+    if ninterfaces(interfaces) == 0
+        return nothing
+    end
 
-  # Reset interface count
-  count = 0
+    # Construct cell -> element mapping for easier algorithm implementation
+    tree = mesh.tree
+    c2e = zeros(Int, length(tree))
+    for element in eachelement(elements)
+        c2e[elements.cell_ids[element]] = element
+    end
 
-  # Iterate over all elements to find neighbors and to connect via interfaces
-  for element in eachelement(elements)
-    # Get cell id
-    cell_id = elements.cell_ids[element]
+    # Reset interface count
+    count = 0
 
-    # Loop over directions
-    for direction in eachdirection(mesh.tree)
-      # Only create interfaces in positive direction
-      if direction % 2 == 1
-        continue
-      end
-
-      # If no neighbor exists, current cell is small and thus we need a mortar
-      if !has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # Skip if neighbor has children
-      neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
-      if has_children(mesh.tree, neighbor_cell_id)
-        continue
-      end
-
-      # Skip if neighbor is on different rank -> create MPI interface instead
-      if mpi_isparallel() && !is_own_cell(mesh.tree, neighbor_cell_id)
-        continue
-      end
-
-      # Create interface between elements (1 -> "left" of interface, 2 -> "right" of interface)
-      count += 1
-      interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id]
-      interfaces.neighbor_ids[1, count] = element
-
-      # Set orientation (x -> 1, y -> 2)
-      interfaces.orientations[count] = div(direction, 2)
+    # Iterate over all elements to find neighbors and to connect via interfaces
+    for element in eachelement(elements)
+        # Get cell id
+        cell_id = elements.cell_ids[element]
+
+        # Loop over directions
+        for direction in eachdirection(mesh.tree)
+            # Only create interfaces in positive direction
+            if direction % 2 == 1
+                continue
+            end
+
+            # If no neighbor exists, current cell is small and thus we need a mortar
+            if !has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # Skip if neighbor has children
+            neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
+            if has_children(mesh.tree, neighbor_cell_id)
+                continue
+            end
+
+            # Skip if neighbor is on different rank -> create MPI interface instead
+            if mpi_isparallel() && !is_own_cell(mesh.tree, neighbor_cell_id)
+                continue
+            end
+
+            # Create interface between elements (1 -> "left" of interface, 2 -> "right" of interface)
+            count += 1
+            interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id]
+            interfaces.neighbor_ids[1, count] = element
+
+            # Set orientation (x -> 1, y -> 2)
+            interfaces.orientations[count] = div(direction, 2)
+        end
     end
-  end
 
-  @assert count == ninterfaces(interfaces) ("Actual interface count ($count) does not match " *
+    @assert count==ninterfaces(interfaces) ("Actual interface count ($count) does not match "*
                                             "expectations $(ninterfaces(interfaces))")
 end
 
-
-
 # Container data structure (structure-of-arrays style) for DG boundaries
-mutable struct BoundaryContainer2D{RealT<:Real, uEltype<:Real} <: AbstractContainer
-  u::Array{uEltype, 4}              # [leftright, variables, i, boundaries]
-  neighbor_ids::Vector{Int}         # [boundaries]
-  orientations::Vector{Int}         # [boundaries]
-  neighbor_sides::Vector{Int}       # [boundaries]
-  node_coordinates::Array{RealT, 3} # [orientation, i, elements]
-  n_boundaries_per_direction::SVector{4, Int} # [direction]
-  # internal `resize!`able storage
-  _u::Vector{uEltype}
-  _node_coordinates::Vector{RealT}
+mutable struct BoundaryContainer2D{RealT <: Real, uEltype <: Real} <: AbstractContainer
+    u::Array{uEltype, 4}              # [leftright, variables, i, boundaries]
+    neighbor_ids::Vector{Int}         # [boundaries]
+    orientations::Vector{Int}         # [boundaries]
+    neighbor_sides::Vector{Int}       # [boundaries]
+    node_coordinates::Array{RealT, 3} # [orientation, i, elements]
+    n_boundaries_per_direction::SVector{4, Int} # [direction]
+    # internal `resize!`able storage
+    _u::Vector{uEltype}
+    _node_coordinates::Vector{RealT}
 end
 
 nvariables(boundaries::BoundaryContainer2D) = size(boundaries.u, 2)
@@ -327,180 +323,179 @@ Base.eltype(boundaries::BoundaryContainer2D) = eltype(boundaries.u)
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(boundaries::BoundaryContainer2D, capacity)
-  n_nodes = nnodes(boundaries)
-  n_variables = nvariables(boundaries)
-  @unpack _u, _node_coordinates,
-          neighbor_ids, orientations, neighbor_sides = boundaries
+    n_nodes = nnodes(boundaries)
+    n_variables = nvariables(boundaries)
+    @unpack _u, _node_coordinates,
+    neighbor_ids, orientations, neighbor_sides = boundaries
 
-  resize!(_u, 2 * n_variables * n_nodes * capacity)
-  boundaries.u = unsafe_wrap(Array, pointer(_u),
-                             (2, n_variables, n_nodes, capacity))
+    resize!(_u, 2 * n_variables * n_nodes * capacity)
+    boundaries.u = unsafe_wrap(Array, pointer(_u),
+                               (2, n_variables, n_nodes, capacity))
 
-  resize!(_node_coordinates, 2 * n_nodes * capacity)
-  boundaries.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-                                            (2, n_nodes, capacity))
+    resize!(_node_coordinates, 2 * n_nodes * capacity)
+    boundaries.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                              (2, n_nodes, capacity))
 
-  resize!(neighbor_ids, capacity)
+    resize!(neighbor_ids, capacity)
 
-  resize!(orientations, capacity)
+    resize!(orientations, capacity)
 
-  resize!(neighbor_sides, capacity)
+    resize!(neighbor_sides, capacity)
 
-  return nothing
+    return nothing
 end
 
+function BoundaryContainer2D{RealT, uEltype}(capacity::Integer, n_variables,
+                                             n_nodes) where {RealT <: Real,
+                                                             uEltype <: Real}
+    nan_RealT = convert(RealT, NaN)
+    nan_uEltype = convert(uEltype, NaN)
 
-function BoundaryContainer2D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real}
-  nan_RealT = convert(RealT, NaN)
-  nan_uEltype = convert(uEltype, NaN)
-
-  # Initialize fields with defaults
-  _u = fill(nan_uEltype, 2 * n_variables * n_nodes * capacity)
-  u = unsafe_wrap(Array, pointer(_u),
-                  (2, n_variables, n_nodes, capacity))
+    # Initialize fields with defaults
+    _u = fill(nan_uEltype, 2 * n_variables * n_nodes * capacity)
+    u = unsafe_wrap(Array, pointer(_u),
+                    (2, n_variables, n_nodes, capacity))
 
-  neighbor_ids = fill(typemin(Int), capacity)
+    neighbor_ids = fill(typemin(Int), capacity)
 
-  orientations = fill(typemin(Int), capacity)
+    orientations = fill(typemin(Int), capacity)
 
-  neighbor_sides = fill(typemin(Int), capacity)
+    neighbor_sides = fill(typemin(Int), capacity)
 
-  _node_coordinates = fill(nan_RealT, 2 * n_nodes * capacity)
-  node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-                                 (2, n_nodes, capacity))
+    _node_coordinates = fill(nan_RealT, 2 * n_nodes * capacity)
+    node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                   (2, n_nodes, capacity))
 
-  n_boundaries_per_direction = SVector(0, 0, 0, 0)
+    n_boundaries_per_direction = SVector(0, 0, 0, 0)
 
-  return BoundaryContainer2D{RealT, uEltype}(
-    u, neighbor_ids, orientations, neighbor_sides,
-    node_coordinates, n_boundaries_per_direction,
-    _u, _node_coordinates)
+    return BoundaryContainer2D{RealT, uEltype}(u, neighbor_ids, orientations,
+                                               neighbor_sides,
+                                               node_coordinates,
+                                               n_boundaries_per_direction,
+                                               _u, _node_coordinates)
 end
 
-
 # Return number of boundaries
 @inline nboundaries(boundaries::BoundaryContainer2D) = length(boundaries.orientations)
 
-
 # Create boundaries container and initialize boundary data in `elements`.
 function init_boundaries(cell_ids, mesh::TreeMesh2D,
                          elements::ElementContainer2D)
-  # Initialize container
-  n_boundaries = count_required_boundaries(mesh, cell_ids)
-  boundaries = BoundaryContainer2D{real(elements), eltype(elements)}(
-    n_boundaries, nvariables(elements), nnodes(elements))
-
-  # Connect elements with boundaries
-  init_boundaries!(boundaries, elements, mesh)
-  return boundaries
+    # Initialize container
+    n_boundaries = count_required_boundaries(mesh, cell_ids)
+    boundaries = BoundaryContainer2D{real(elements), eltype(elements)}(n_boundaries,
+                                                                       nvariables(elements),
+                                                                       nnodes(elements))
+
+    # Connect elements with boundaries
+    init_boundaries!(boundaries, elements, mesh)
+    return boundaries
 end
 
 # Count the number of boundaries that need to be created
 function count_required_boundaries(mesh::TreeMesh2D, cell_ids)
-  count = 0
-
-  # Iterate over all cells
-  for cell_id in cell_ids
-    for direction in eachdirection(mesh.tree)
-      # If neighbor exists, current cell is not at a boundary
-      if has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # If coarse neighbor exists, current cell is not at a boundary
-      if has_coarse_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # No neighbor exists in this direction -> must be a boundary
-      count += 1
+    count = 0
+
+    # Iterate over all cells
+    for cell_id in cell_ids
+        for direction in eachdirection(mesh.tree)
+            # If neighbor exists, current cell is not at a boundary
+            if has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # If coarse neighbor exists, current cell is not at a boundary
+            if has_coarse_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # No neighbor exists in this direction -> must be a boundary
+            count += 1
+        end
     end
-  end
 
-  return count
+    return count
 end
 
 # Initialize connectivity between elements and boundaries
 function init_boundaries!(boundaries, elements, mesh::TreeMesh2D)
-  # Exit early if there are no boundaries to initialize
-  if nboundaries(boundaries) == 0
-    return nothing
-  end
+    # Exit early if there are no boundaries to initialize
+    if nboundaries(boundaries) == 0
+        return nothing
+    end
 
-  # Reset boundaries count
-  count = 0
+    # Reset boundaries count
+    count = 0
 
-  # Initialize boundary counts
-  counts_per_direction = MVector(0, 0, 0, 0)
+    # Initialize boundary counts
+    counts_per_direction = MVector(0, 0, 0, 0)
 
-  # OBS! Iterate over directions first, then over elements, and count boundaries in each direction
-  # Rationale: This way the boundaries are internally sorted by the directions -x, +x, -y etc.,
-  #            obviating the need to store the boundary condition to be applied explicitly.
-  # Loop over directions
-  for direction in eachdirection(mesh.tree)
-    # Iterate over all elements to find missing neighbors and to connect to boundaries
-    for element in eachelement(elements)
-      # Get cell id
-      cell_id = elements.cell_ids[element]
-
-      # If neighbor exists, current cell is not at a boundary
-      if has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # If coarse neighbor exists, current cell is not at a boundary
-      if has_coarse_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # Create boundary
-      count += 1
-      counts_per_direction[direction] += 1
-
-      # Set neighbor element id
-      boundaries.neighbor_ids[count] = element
-
-      # Set neighbor side, which denotes the direction (1 -> negative, 2 -> positive) of the element
-      if iseven(direction)
-        boundaries.neighbor_sides[count] = 1
-      else
-        boundaries.neighbor_sides[count] = 2
-      end
-
-      # Set orientation (x -> 1, y -> 2)
-      if direction in (1, 2)
-        boundaries.orientations[count] = 1
-      else
-        boundaries.orientations[count] = 2
-      end
-
-      # Store node coordinates
-      enc = elements.node_coordinates
-      if direction == 1 # -x direction
-        boundaries.node_coordinates[:, :, count] .= enc[:, 1,   :,   element]
-      elseif direction == 2 # +x direction
-        boundaries.node_coordinates[:, :, count] .= enc[:, end, :,   element]
-      elseif direction == 3 # -y direction
-        boundaries.node_coordinates[:, :, count] .= enc[:, :,   1,   element]
-      elseif direction == 4 # +y direction
-        boundaries.node_coordinates[:, :, count] .= enc[:, :,   end, element]
-      else
-        error("should not happen")
-      end
+    # OBS! Iterate over directions first, then over elements, and count boundaries in each direction
+    # Rationale: This way the boundaries are internally sorted by the directions -x, +x, -y etc.,
+    #            obviating the need to store the boundary condition to be applied explicitly.
+    # Loop over directions
+    for direction in eachdirection(mesh.tree)
+        # Iterate over all elements to find missing neighbors and to connect to boundaries
+        for element in eachelement(elements)
+            # Get cell id
+            cell_id = elements.cell_ids[element]
+
+            # If neighbor exists, current cell is not at a boundary
+            if has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # If coarse neighbor exists, current cell is not at a boundary
+            if has_coarse_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # Create boundary
+            count += 1
+            counts_per_direction[direction] += 1
+
+            # Set neighbor element id
+            boundaries.neighbor_ids[count] = element
+
+            # Set neighbor side, which denotes the direction (1 -> negative, 2 -> positive) of the element
+            if iseven(direction)
+                boundaries.neighbor_sides[count] = 1
+            else
+                boundaries.neighbor_sides[count] = 2
+            end
+
+            # Set orientation (x -> 1, y -> 2)
+            if direction in (1, 2)
+                boundaries.orientations[count] = 1
+            else
+                boundaries.orientations[count] = 2
+            end
+
+            # Store node coordinates
+            enc = elements.node_coordinates
+            if direction == 1 # -x direction
+                boundaries.node_coordinates[:, :, count] .= enc[:, 1, :, element]
+            elseif direction == 2 # +x direction
+                boundaries.node_coordinates[:, :, count] .= enc[:, end, :, element]
+            elseif direction == 3 # -y direction
+                boundaries.node_coordinates[:, :, count] .= enc[:, :, 1, element]
+            elseif direction == 4 # +y direction
+                boundaries.node_coordinates[:, :, count] .= enc[:, :, end, element]
+            else
+                error("should not happen")
+            end
+        end
     end
-  end
 
-  @assert count == nboundaries(boundaries) ("Actual boundaries count ($count) does not match " *
+    @assert count==nboundaries(boundaries) ("Actual boundaries count ($count) does not match "*
                                             "expectations $(nboundaries(boundaries))")
-  @assert sum(counts_per_direction) == count
+    @assert sum(counts_per_direction) == count
 
-  boundaries.n_boundaries_per_direction = SVector(counts_per_direction)
+    boundaries.n_boundaries_per_direction = SVector(counts_per_direction)
 
-  return boundaries.n_boundaries_per_direction
+    return boundaries.n_boundaries_per_direction
 end
 
-
-
 # Container data structure (structure-of-arrays style) for DG L2 mortars
 # Positions/directions for orientations = 1, large_sides = 2:
 # mortar is orthogonal to x-axis, large side is in positive coordinate direction wrt mortar
@@ -511,17 +506,17 @@ end
 #           |    |
 # lower = 1 |    |
 #           |    |
-mutable struct L2MortarContainer2D{uEltype<:Real} <: AbstractContainer
-  u_upper::Array{uEltype, 4}  # [leftright, variables, i, mortars]
-  u_lower::Array{uEltype, 4}  # [leftright, variables, i, mortars]
-  neighbor_ids::Array{Int, 2} # [position, mortars]
-  # Large sides: left -> 1, right -> 2
-  large_sides::Vector{Int}  # [mortars]
-  orientations::Vector{Int} # [mortars]
-  # internal `resize!`able storage
-  _u_upper::Vector{uEltype}
-  _u_lower::Vector{uEltype}
-  _neighbor_ids::Vector{Int}
+mutable struct L2MortarContainer2D{uEltype <: Real} <: AbstractContainer
+    u_upper::Array{uEltype, 4}  # [leftright, variables, i, mortars]
+    u_lower::Array{uEltype, 4}  # [leftright, variables, i, mortars]
+    neighbor_ids::Array{Int, 2} # [position, mortars]
+    # Large sides: left -> 1, right -> 2
+    large_sides::Vector{Int}  # [mortars]
+    orientations::Vector{Int} # [mortars]
+    # internal `resize!`able storage
+    _u_upper::Vector{uEltype}
+    _u_lower::Vector{uEltype}
+    _neighbor_ids::Vector{Int}
 end
 
 nvariables(mortars::L2MortarContainer2D) = size(mortars.u_upper, 2)
@@ -530,249 +525,251 @@ Base.eltype(mortars::L2MortarContainer2D) = eltype(mortars.u_upper)
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(mortars::L2MortarContainer2D, capacity)
-  n_nodes = nnodes(mortars)
-  n_variables = nvariables(mortars)
-  @unpack _u_upper, _u_lower, _neighbor_ids,
-          large_sides, orientations = mortars
+    n_nodes = nnodes(mortars)
+    n_variables = nvariables(mortars)
+    @unpack _u_upper, _u_lower, _neighbor_ids,
+    large_sides, orientations = mortars
 
-  resize!(_u_upper, 2 * n_variables * n_nodes * capacity)
-  mortars.u_upper = unsafe_wrap(Array, pointer(_u_upper),
-                                (2, n_variables, n_nodes, capacity))
+    resize!(_u_upper, 2 * n_variables * n_nodes * capacity)
+    mortars.u_upper = unsafe_wrap(Array, pointer(_u_upper),
+                                  (2, n_variables, n_nodes, capacity))
 
-  resize!(_u_lower, 2 * n_variables * n_nodes * capacity)
-  mortars.u_lower = unsafe_wrap(Array, pointer(_u_lower),
-                                (2, n_variables, n_nodes, capacity))
+    resize!(_u_lower, 2 * n_variables * n_nodes * capacity)
+    mortars.u_lower = unsafe_wrap(Array, pointer(_u_lower),
+                                  (2, n_variables, n_nodes, capacity))
 
-  resize!(_neighbor_ids, 3 * capacity)
-  mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
-                                        (3, capacity))
+    resize!(_neighbor_ids, 3 * capacity)
+    mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+                                       (3, capacity))
 
-  resize!(large_sides, capacity)
+    resize!(large_sides, capacity)
 
-  resize!(orientations, capacity)
+    resize!(orientations, capacity)
 
-  return nothing
+    return nothing
 end
 
+function L2MortarContainer2D{uEltype}(capacity::Integer, n_variables,
+                                      n_nodes) where {uEltype <: Real}
+    nan = convert(uEltype, NaN)
 
-function L2MortarContainer2D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real}
-  nan = convert(uEltype, NaN)
-
-  # Initialize fields with defaults
-  _u_upper = fill(nan, 2 * n_variables * n_nodes * capacity)
-  u_upper = unsafe_wrap(Array, pointer(_u_upper),
-                        (2, n_variables, n_nodes, capacity))
+    # Initialize fields with defaults
+    _u_upper = fill(nan, 2 * n_variables * n_nodes * capacity)
+    u_upper = unsafe_wrap(Array, pointer(_u_upper),
+                          (2, n_variables, n_nodes, capacity))
 
-  _u_lower = fill(nan, 2 * n_variables * n_nodes * capacity)
-  u_lower = unsafe_wrap(Array, pointer(_u_lower),
-                        (2, n_variables, n_nodes, capacity))
+    _u_lower = fill(nan, 2 * n_variables * n_nodes * capacity)
+    u_lower = unsafe_wrap(Array, pointer(_u_lower),
+                          (2, n_variables, n_nodes, capacity))
 
-  _neighbor_ids = fill(typemin(Int), 3 * capacity)
-  neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
-                             (3, capacity))
+    _neighbor_ids = fill(typemin(Int), 3 * capacity)
+    neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+                               (3, capacity))
 
-  large_sides  = fill(typemin(Int), capacity)
+    large_sides = fill(typemin(Int), capacity)
 
-  orientations = fill(typemin(Int), capacity)
+    orientations = fill(typemin(Int), capacity)
 
-  return L2MortarContainer2D{uEltype}(
-    u_upper, u_lower, neighbor_ids, large_sides, orientations,
-    _u_upper, _u_lower, _neighbor_ids)
+    return L2MortarContainer2D{uEltype}(u_upper, u_lower, neighbor_ids, large_sides,
+                                        orientations,
+                                        _u_upper, _u_lower, _neighbor_ids)
 end
 
-
 # Return number of L2 mortars
 @inline nmortars(l2mortars::L2MortarContainer2D) = length(l2mortars.orientations)
 
-
 # Allow printing container contents
 function Base.show(io::IO, ::MIME"text/plain", c::L2MortarContainer2D)
-  @nospecialize c # reduce precompilation time
-
-  println(io, '*'^20)
-  for idx in CartesianIndices(c.u_upper)
-    println(io, "c.u_upper[$idx] = $(c.u_upper[idx])")
-  end
-  for idx in CartesianIndices(c.u_lower)
-    println(io, "c.u_lower[$idx] = $(c.u_lower[idx])")
-  end
-  println(io, "transpose(c.neighbor_ids) = $(transpose(c.neighbor_ids))")
-  println(io, "c.large_sides = $(c.large_sides)")
-  println(io, "c.orientations = $(c.orientations)")
-  print(io,   '*'^20)
-end
+    @nospecialize c # reduce precompilation time
 
+    println(io, '*'^20)
+    for idx in CartesianIndices(c.u_upper)
+        println(io, "c.u_upper[$idx] = $(c.u_upper[idx])")
+    end
+    for idx in CartesianIndices(c.u_lower)
+        println(io, "c.u_lower[$idx] = $(c.u_lower[idx])")
+    end
+    println(io, "transpose(c.neighbor_ids) = $(transpose(c.neighbor_ids))")
+    println(io, "c.large_sides = $(c.large_sides)")
+    println(io, "c.orientations = $(c.orientations)")
+    print(io, '*'^20)
+end
 
 # Create mortar container and initialize mortar data in `elements`.
 function init_mortars(cell_ids, mesh::TreeMesh2D,
                       elements::ElementContainer2D,
                       ::LobattoLegendreMortarL2)
-  # Initialize containers
-  n_mortars = count_required_mortars(mesh, cell_ids)
-  mortars = L2MortarContainer2D{eltype(elements)}(
-    n_mortars, nvariables(elements), nnodes(elements))
-
-  # Connect elements with mortars
-  init_mortars!(mortars, elements, mesh)
-  return mortars
+    # Initialize containers
+    n_mortars = count_required_mortars(mesh, cell_ids)
+    mortars = L2MortarContainer2D{eltype(elements)}(n_mortars, nvariables(elements),
+                                                    nnodes(elements))
+
+    # Connect elements with mortars
+    init_mortars!(mortars, elements, mesh)
+    return mortars
 end
 
 # Count the number of mortars that need to be created
 function count_required_mortars(mesh::TreeMesh2D, cell_ids)
-  count = 0
-
-  # Iterate over all cells and count mortars from perspective of coarse cells
-  for cell_id in cell_ids
-    for direction in eachdirection(mesh.tree)
-      # If no neighbor exists, cell is small with large neighbor or at boundary -> do nothing
-      if !has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # If neighbor has no children, this is a conforming interface -> do nothing
-      neighbor_id = mesh.tree.neighbor_ids[direction, cell_id]
-      if !has_children(mesh.tree, neighbor_id)
-        continue
-      end
-
-      # Skip if one of the small cells is on different rank -> create mpi mortar instead
-      # (the coarse cell is always on the local rank)
-      if mpi_isparallel()
-        if direction == 1 # small cells left, mortar in x-direction
-          lower_cell_id = mesh.tree.child_ids[2, neighbor_id]
-          upper_cell_id = mesh.tree.child_ids[4, neighbor_id]
-        elseif direction == 2 # small cells right, mortar in x-direction
-          lower_cell_id = mesh.tree.child_ids[1, neighbor_id]
-          upper_cell_id = mesh.tree.child_ids[3, neighbor_id]
-        elseif direction == 3 # small cells left, mortar in y-direction
-          lower_cell_id = mesh.tree.child_ids[3, neighbor_id]
-          upper_cell_id = mesh.tree.child_ids[4, neighbor_id]
-        else # direction == 4, small cells right, mortar in y-direction
-          lower_cell_id = mesh.tree.child_ids[1, neighbor_id]
-          upper_cell_id = mesh.tree.child_ids[2, neighbor_id]
+    count = 0
+
+    # Iterate over all cells and count mortars from perspective of coarse cells
+    for cell_id in cell_ids
+        for direction in eachdirection(mesh.tree)
+            # If no neighbor exists, cell is small with large neighbor or at boundary -> do nothing
+            if !has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # If neighbor has no children, this is a conforming interface -> do nothing
+            neighbor_id = mesh.tree.neighbor_ids[direction, cell_id]
+            if !has_children(mesh.tree, neighbor_id)
+                continue
+            end
+
+            # Skip if one of the small cells is on different rank -> create mpi mortar instead
+            # (the coarse cell is always on the local rank)
+            if mpi_isparallel()
+                if direction == 1 # small cells left, mortar in x-direction
+                    lower_cell_id = mesh.tree.child_ids[2, neighbor_id]
+                    upper_cell_id = mesh.tree.child_ids[4, neighbor_id]
+                elseif direction == 2 # small cells right, mortar in x-direction
+                    lower_cell_id = mesh.tree.child_ids[1, neighbor_id]
+                    upper_cell_id = mesh.tree.child_ids[3, neighbor_id]
+                elseif direction == 3 # small cells left, mortar in y-direction
+                    lower_cell_id = mesh.tree.child_ids[3, neighbor_id]
+                    upper_cell_id = mesh.tree.child_ids[4, neighbor_id]
+                else # direction == 4, small cells right, mortar in y-direction
+                    lower_cell_id = mesh.tree.child_ids[1, neighbor_id]
+                    upper_cell_id = mesh.tree.child_ids[2, neighbor_id]
+                end
+                small_cell_ids = (lower_cell_id, upper_cell_id)
+                if any(cell -> !is_own_cell(mesh.tree, cell), small_cell_ids)
+                    continue
+                end
+            end
+
+            count += 1
         end
-        small_cell_ids = (lower_cell_id, upper_cell_id)
-        if any(cell -> !is_own_cell(mesh.tree, cell), small_cell_ids)
-          continue
-        end
-      end
-
-      count +=1
     end
-  end
 
-  return count
+    return count
 end
 
 # Initialize connectivity between elements and mortars
 function init_mortars!(mortars, elements, mesh::TreeMesh2D)
-  # Exit early if there are no mortars to initialize
-  if nmortars(mortars) == 0
-    return nothing
-  end
-
-  # Construct cell -> element mapping for easier algorithm implementation
-  tree = mesh.tree
-  c2e = zeros(Int, length(tree))
-  for element in eachelement(elements)
-    c2e[elements.cell_ids[element]] = element
-  end
+    # Exit early if there are no mortars to initialize
+    if nmortars(mortars) == 0
+        return nothing
+    end
 
-  # Reset interface count
-  count = 0
+    # Construct cell -> element mapping for easier algorithm implementation
+    tree = mesh.tree
+    c2e = zeros(Int, length(tree))
+    for element in eachelement(elements)
+        c2e[elements.cell_ids[element]] = element
+    end
 
-  # Iterate over all elements to find neighbors and to connect via interfaces
-  for element in eachelement(elements)
-    # Get cell id
-    cell_id = elements.cell_ids[element]
+    # Reset interface count
+    count = 0
 
-    for direction in eachdirection(mesh.tree)
-      # If no neighbor exists, cell is small with large neighbor -> do nothing
-      if !has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # If neighbor has no children, this is a conforming interface -> do nothing
-      neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
-      if !has_children(mesh.tree, neighbor_cell_id)
-        continue
-      end
-
-      # Skip if one of the small cells is on different rank -> create mpi mortar instead
-      # (the coarse cell is always on the local rank)
-      if mpi_isparallel()
-        if direction == 1 # small cells left, mortar in x-direction
-          lower_cell_id = mesh.tree.child_ids[2, neighbor_cell_id]
-          upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id]
-        elseif direction == 2 # small cells right, mortar in x-direction
-          lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id]
-          upper_cell_id = mesh.tree.child_ids[3, neighbor_cell_id]
-        elseif direction == 3 # small cells left, mortar in y-direction
-          lower_cell_id = mesh.tree.child_ids[3, neighbor_cell_id]
-          upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id]
-        else # direction == 4, small cells right, mortar in y-direction
-          lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id]
-          upper_cell_id = mesh.tree.child_ids[2, neighbor_cell_id]
-        end
-        small_cell_ids = (lower_cell_id, upper_cell_id)
-        if any(cell -> !is_own_cell(mesh.tree, cell), small_cell_ids)
-          continue
+    # Iterate over all elements to find neighbors and to connect via interfaces
+    for element in eachelement(elements)
+        # Get cell id
+        cell_id = elements.cell_ids[element]
+
+        for direction in eachdirection(mesh.tree)
+            # If no neighbor exists, cell is small with large neighbor -> do nothing
+            if !has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # If neighbor has no children, this is a conforming interface -> do nothing
+            neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
+            if !has_children(mesh.tree, neighbor_cell_id)
+                continue
+            end
+
+            # Skip if one of the small cells is on different rank -> create mpi mortar instead
+            # (the coarse cell is always on the local rank)
+            if mpi_isparallel()
+                if direction == 1 # small cells left, mortar in x-direction
+                    lower_cell_id = mesh.tree.child_ids[2, neighbor_cell_id]
+                    upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id]
+                elseif direction == 2 # small cells right, mortar in x-direction
+                    lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id]
+                    upper_cell_id = mesh.tree.child_ids[3, neighbor_cell_id]
+                elseif direction == 3 # small cells left, mortar in y-direction
+                    lower_cell_id = mesh.tree.child_ids[3, neighbor_cell_id]
+                    upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id]
+                else # direction == 4, small cells right, mortar in y-direction
+                    lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id]
+                    upper_cell_id = mesh.tree.child_ids[2, neighbor_cell_id]
+                end
+                small_cell_ids = (lower_cell_id, upper_cell_id)
+                if any(cell -> !is_own_cell(mesh.tree, cell), small_cell_ids)
+                    continue
+                end
+            end
+
+            # Create mortar between elements:
+            # 1 -> small element in negative coordinate direction
+            # 2 -> small element in positive coordinate direction
+            # 3 -> large element
+            count += 1
+            mortars.neighbor_ids[3, count] = element
+            if direction == 1
+                mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[2,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4,
+                                                                         neighbor_cell_id]]
+            elseif direction == 2
+                mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[3,
+                                                                         neighbor_cell_id]]
+            elseif direction == 3
+                mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[3,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4,
+                                                                         neighbor_cell_id]]
+            elseif direction == 4
+                mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[2,
+                                                                         neighbor_cell_id]]
+            else
+                error("should not happen")
+            end
+
+            # Set large side, which denotes the direction (1 -> negative, 2 -> positive) of the large side
+            if iseven(direction)
+                mortars.large_sides[count] = 1
+            else
+                mortars.large_sides[count] = 2
+            end
+
+            # Set orientation (x -> 1, y -> 2)
+            if direction in (1, 2)
+                mortars.orientations[count] = 1
+            else
+                mortars.orientations[count] = 2
+            end
         end
-      end
-
-
-      # Create mortar between elements:
-      # 1 -> small element in negative coordinate direction
-      # 2 -> small element in positive coordinate direction
-      # 3 -> large element
-      count += 1
-      mortars.neighbor_ids[3, count] = element
-      if direction == 1
-        mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[2, neighbor_cell_id]]
-        mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4, neighbor_cell_id]]
-      elseif direction == 2
-        mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]]
-        mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[3, neighbor_cell_id]]
-      elseif direction == 3
-        mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[3, neighbor_cell_id]]
-        mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4, neighbor_cell_id]]
-      elseif direction == 4
-        mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]]
-        mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[2, neighbor_cell_id]]
-      else
-        error("should not happen")
-      end
-
-      # Set large side, which denotes the direction (1 -> negative, 2 -> positive) of the large side
-      if iseven(direction)
-        mortars.large_sides[count] = 1
-      else
-        mortars.large_sides[count] = 2
-      end
-
-      # Set orientation (x -> 1, y -> 2)
-      if direction in (1, 2)
-        mortars.orientations[count] = 1
-      else
-        mortars.orientations[count] = 2
-      end
     end
-  end
 
-  @assert count == nmortars(mortars) ("Actual mortar count ($count) does not match " *
+    @assert count==nmortars(mortars) ("Actual mortar count ($count) does not match "*
                                       "expectations $(nmortars(mortars))")
 end
 
-
-
 # Container data structure (structure-of-arrays style) for DG MPI interfaces
-mutable struct MPIInterfaceContainer2D{uEltype<:Real} <: AbstractContainer
-  u::Array{uEltype, 4}           # [leftright, variables, i, interfaces]
-  local_neighbor_ids::Vector{Int} # [interfaces]
-  orientations::Vector{Int}      # [interfaces]
-  remote_sides::Vector{Int}      # [interfaces]
-  # internal `resize!`able storage
-  _u::Vector{uEltype}
+mutable struct MPIInterfaceContainer2D{uEltype <: Real} <: AbstractContainer
+    u::Array{uEltype, 4}           # [leftright, variables, i, interfaces]
+    local_neighbor_ids::Vector{Int} # [interfaces]
+    orientations::Vector{Int}      # [interfaces]
+    remote_sides::Vector{Int}      # [interfaces]
+    # internal `resize!`able storage
+    _u::Vector{uEltype}
 end
 
 nvariables(mpi_interfaces::MPIInterfaceContainer2D) = size(mpi_interfaces.u, 2)
@@ -781,154 +778,154 @@ Base.eltype(mpi_interfaces::MPIInterfaceContainer2D) = eltype(mpi_interfaces.u)
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(mpi_interfaces::MPIInterfaceContainer2D, capacity)
-  n_nodes = nnodes(mpi_interfaces)
-  n_variables = nvariables(mpi_interfaces)
-  @unpack _u, local_neighbor_ids, orientations, remote_sides = mpi_interfaces
+    n_nodes = nnodes(mpi_interfaces)
+    n_variables = nvariables(mpi_interfaces)
+    @unpack _u, local_neighbor_ids, orientations, remote_sides = mpi_interfaces
 
-  resize!(_u, 2 * n_variables * n_nodes * capacity)
-  mpi_interfaces.u = unsafe_wrap(Array, pointer(_u),
-                                 (2, n_variables, n_nodes, capacity))
+    resize!(_u, 2 * n_variables * n_nodes * capacity)
+    mpi_interfaces.u = unsafe_wrap(Array, pointer(_u),
+                                   (2, n_variables, n_nodes, capacity))
 
-  resize!(local_neighbor_ids, capacity)
+    resize!(local_neighbor_ids, capacity)
 
-  resize!(orientations, capacity)
+    resize!(orientations, capacity)
 
-  resize!(remote_sides, capacity)
+    resize!(remote_sides, capacity)
 
-  return nothing
+    return nothing
 end
 
+function MPIInterfaceContainer2D{uEltype}(capacity::Integer, n_variables,
+                                          n_nodes) where {uEltype <: Real}
+    nan = convert(uEltype, NaN)
 
-function MPIInterfaceContainer2D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real}
-  nan = convert(uEltype, NaN)
-
-  # Initialize fields with defaults
-  _u = fill(nan, 2 * n_variables * n_nodes * capacity)
-  u = unsafe_wrap(Array, pointer(_u),
-                  (2, n_variables, n_nodes, capacity))
+    # Initialize fields with defaults
+    _u = fill(nan, 2 * n_variables * n_nodes * capacity)
+    u = unsafe_wrap(Array, pointer(_u),
+                    (2, n_variables, n_nodes, capacity))
 
-  local_neighbor_ids = fill(typemin(Int), capacity)
+    local_neighbor_ids = fill(typemin(Int), capacity)
 
-  orientations = fill(typemin(Int), capacity)
+    orientations = fill(typemin(Int), capacity)
 
-  remote_sides = fill(typemin(Int), capacity)
+    remote_sides = fill(typemin(Int), capacity)
 
-  return MPIInterfaceContainer2D{uEltype}(
-    u, local_neighbor_ids, orientations, remote_sides,
-    _u)
+    return MPIInterfaceContainer2D{uEltype}(u, local_neighbor_ids, orientations,
+                                            remote_sides,
+                                            _u)
 end
 
-
 # TODO: Taal, rename to ninterfaces?
 # Return number of interfaces
-nmpiinterfaces(mpi_interfaces::MPIInterfaceContainer2D) = length(mpi_interfaces.orientations)
-
+function nmpiinterfaces(mpi_interfaces::MPIInterfaceContainer2D)
+    length(mpi_interfaces.orientations)
+end
 
 # Create MPI interface container and initialize MPI interface data in `elements`.
 function init_mpi_interfaces(cell_ids, mesh::TreeMesh2D,
                              elements::ElementContainer2D)
-  # Initialize container
-  n_mpi_interfaces = count_required_mpi_interfaces(mesh, cell_ids)
-  mpi_interfaces = MPIInterfaceContainer2D{eltype(elements)}(
-    n_mpi_interfaces, nvariables(elements), nnodes(elements))
-
-  # Connect elements with interfaces
-  init_mpi_interfaces!(mpi_interfaces, elements, mesh)
-  return mpi_interfaces
+    # Initialize container
+    n_mpi_interfaces = count_required_mpi_interfaces(mesh, cell_ids)
+    mpi_interfaces = MPIInterfaceContainer2D{eltype(elements)}(n_mpi_interfaces,
+                                                               nvariables(elements),
+                                                               nnodes(elements))
+
+    # Connect elements with interfaces
+    init_mpi_interfaces!(mpi_interfaces, elements, mesh)
+    return mpi_interfaces
 end
 
 # Count the number of MPI interfaces that need to be created
 function count_required_mpi_interfaces(mesh::TreeMesh2D, cell_ids)
-  # No MPI interfaces needed if MPI is not used
-  if !mpi_isparallel()
-    return 0
-  end
+    # No MPI interfaces needed if MPI is not used
+    if !mpi_isparallel()
+        return 0
+    end
 
-  count = 0
+    count = 0
 
-  # Iterate over all cells
-  for cell_id in cell_ids
-    for direction in eachdirection(mesh.tree)
-      # If no neighbor exists, current cell is small or at boundary and thus we need a mortar
-      if !has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # Skip if neighbor has children
-      neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
-      if has_children(mesh.tree, neighbor_cell_id)
-        continue
-      end
-
-      # Skip if neighbor is on this rank -> create regular interface instead
-      if is_own_cell(mesh.tree, neighbor_cell_id)
-        continue
-      end
-
-      count += 1
+    # Iterate over all cells
+    for cell_id in cell_ids
+        for direction in eachdirection(mesh.tree)
+            # If no neighbor exists, current cell is small or at boundary and thus we need a mortar
+            if !has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # Skip if neighbor has children
+            neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
+            if has_children(mesh.tree, neighbor_cell_id)
+                continue
+            end
+
+            # Skip if neighbor is on this rank -> create regular interface instead
+            if is_own_cell(mesh.tree, neighbor_cell_id)
+                continue
+            end
+
+            count += 1
+        end
     end
-  end
 
-  return count
+    return count
 end
 
 # Initialize connectivity between elements and interfaces
 function init_mpi_interfaces!(mpi_interfaces, elements, mesh::TreeMesh2D)
-  # Exit early if there are no MPI interfaces to initialize
-  if nmpiinterfaces(mpi_interfaces) == 0
-    return nothing
-  end
-
-  # Reset interface count
-  count = 0
+    # Exit early if there are no MPI interfaces to initialize
+    if nmpiinterfaces(mpi_interfaces) == 0
+        return nothing
+    end
 
-  # Iterate over all elements to find neighbors and to connect via mpi_interfaces
-  for element in eachelement(elements)
-    # Get cell id
-    cell_id = elements.cell_ids[element]
+    # Reset interface count
+    count = 0
 
-    # Loop over directions
-    for direction in eachdirection(mesh.tree)
-      # If no neighbor exists, current cell is small and thus we need a mortar
-      if !has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # Skip if neighbor has children
-      neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
-      if has_children(mesh.tree, neighbor_cell_id)
-        continue
-      end
-
-      # Skip if neighbor is on this MPI rank -> create regular interface instead
-      if is_own_cell(mesh.tree, neighbor_cell_id)
-        continue
-      end
-
-      # Create interface between elements
-      count += 1
-      mpi_interfaces.local_neighbor_ids[count] = element
-
-      if iseven(direction) # element is "left" of interface, remote cell is "right" of interface
-        mpi_interfaces.remote_sides[count] = 2
-      else
-        mpi_interfaces.remote_sides[count] = 1
-      end
-
-      # Set orientation (x -> 1, y -> 2)
-      if direction in (1, 2) # x-direction
-        mpi_interfaces.orientations[count] = 1
-      else # y-direction
-        mpi_interfaces.orientations[count] = 2
-      end
+    # Iterate over all elements to find neighbors and to connect via mpi_interfaces
+    for element in eachelement(elements)
+        # Get cell id
+        cell_id = elements.cell_ids[element]
+
+        # Loop over directions
+        for direction in eachdirection(mesh.tree)
+            # If no neighbor exists, current cell is small and thus we need a mortar
+            if !has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # Skip if neighbor has children
+            neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
+            if has_children(mesh.tree, neighbor_cell_id)
+                continue
+            end
+
+            # Skip if neighbor is on this MPI rank -> create regular interface instead
+            if is_own_cell(mesh.tree, neighbor_cell_id)
+                continue
+            end
+
+            # Create interface between elements
+            count += 1
+            mpi_interfaces.local_neighbor_ids[count] = element
+
+            if iseven(direction) # element is "left" of interface, remote cell is "right" of interface
+                mpi_interfaces.remote_sides[count] = 2
+            else
+                mpi_interfaces.remote_sides[count] = 1
+            end
+
+            # Set orientation (x -> 1, y -> 2)
+            if direction in (1, 2) # x-direction
+                mpi_interfaces.orientations[count] = 1
+            else # y-direction
+                mpi_interfaces.orientations[count] = 2
+            end
+        end
     end
-  end
 
-  @assert count == nmpiinterfaces(mpi_interfaces) ("Actual interface count ($count) does not match "
-                                                   * "expectations $(nmpiinterfaces(mpi_interfaces))")
+    @assert count==nmpiinterfaces(mpi_interfaces) ("Actual interface count ($count) does not match "
+                                                   *"expectations $(nmpiinterfaces(mpi_interfaces))")
 end
 
-
 # Container data structure (structure-of-arrays style) for DG L2 mortars
 # Positions/directions for orientations = 1, large_sides = 2:
 # mortar is orthogonal to x-axis, large side is in positive coordinate direction wrt mortar
@@ -939,17 +936,17 @@ end
 #           |    |
 # lower = 1 |    |
 #           |    |
-mutable struct MPIL2MortarContainer2D{uEltype<:Real} <: AbstractContainer
-  u_upper::Array{uEltype, 4} # [leftright, variables, i, mortars]
-  u_lower::Array{uEltype, 4} # [leftright, variables, i, mortars]
-  local_neighbor_ids::Vector{Vector{Int}}       # [mortars]
-  local_neighbor_positions::Vector{Vector{Int}} # [mortars]
-  # Large sides: left -> 1, right -> 2
-  large_sides::Vector{Int}  # [mortars]
-  orientations::Vector{Int} # [mortars]
-  # internal `resize!`able storage
-  _u_upper::Vector{uEltype}
-  _u_lower::Vector{uEltype}
+mutable struct MPIL2MortarContainer2D{uEltype <: Real} <: AbstractContainer
+    u_upper::Array{uEltype, 4} # [leftright, variables, i, mortars]
+    u_lower::Array{uEltype, 4} # [leftright, variables, i, mortars]
+    local_neighbor_ids::Vector{Vector{Int}}       # [mortars]
+    local_neighbor_positions::Vector{Vector{Int}} # [mortars]
+    # Large sides: left -> 1, right -> 2
+    large_sides::Vector{Int}  # [mortars]
+    orientations::Vector{Int} # [mortars]
+    # internal `resize!`able storage
+    _u_upper::Vector{uEltype}
+    _u_lower::Vector{uEltype}
 end
 
 nvariables(mpi_mortars::MPIL2MortarContainer2D) = size(mpi_mortars.u_upper, 2)
@@ -958,303 +955,303 @@ Base.eltype(mpi_mortars::MPIL2MortarContainer2D) = eltype(mpi_mortars.u_upper)
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(mpi_mortars::MPIL2MortarContainer2D, capacity)
-  n_nodes = nnodes(mpi_mortars)
-  n_variables = nvariables(mpi_mortars)
-  @unpack _u_upper, _u_lower, local_neighbor_ids, local_neighbor_positions,
-          large_sides, orientations = mpi_mortars
+    n_nodes = nnodes(mpi_mortars)
+    n_variables = nvariables(mpi_mortars)
+    @unpack _u_upper, _u_lower, local_neighbor_ids, local_neighbor_positions,
+    large_sides, orientations = mpi_mortars
 
-  resize!(_u_upper, 2 * n_variables * n_nodes * capacity)
-  mpi_mortars.u_upper = unsafe_wrap(Array, pointer(_u_upper),
-                                    (2, n_variables, n_nodes, capacity))
+    resize!(_u_upper, 2 * n_variables * n_nodes * capacity)
+    mpi_mortars.u_upper = unsafe_wrap(Array, pointer(_u_upper),
+                                      (2, n_variables, n_nodes, capacity))
 
-  resize!(_u_lower, 2 * n_variables * n_nodes * capacity)
-  mpi_mortars.u_lower = unsafe_wrap(Array, pointer(_u_lower),
-                                    (2, n_variables, n_nodes, capacity))
+    resize!(_u_lower, 2 * n_variables * n_nodes * capacity)
+    mpi_mortars.u_lower = unsafe_wrap(Array, pointer(_u_lower),
+                                      (2, n_variables, n_nodes, capacity))
 
-  resize!(local_neighbor_ids, capacity)
-  resize!(local_neighbor_positions, capacity)
+    resize!(local_neighbor_ids, capacity)
+    resize!(local_neighbor_positions, capacity)
 
-  resize!(large_sides, capacity)
+    resize!(large_sides, capacity)
 
-  resize!(orientations, capacity)
+    resize!(orientations, capacity)
 
-  return nothing
+    return nothing
 end
 
+function MPIL2MortarContainer2D{uEltype}(capacity::Integer, n_variables,
+                                         n_nodes) where {uEltype <: Real}
+    nan = convert(uEltype, NaN)
 
-function MPIL2MortarContainer2D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real}
-  nan = convert(uEltype, NaN)
-
-  # Initialize fields with defaults
-  _u_upper = fill(nan, 2 * n_variables * n_nodes * capacity)
-  u_upper = unsafe_wrap(Array, pointer(_u_upper),
-                        (2, n_variables, n_nodes, capacity))
+    # Initialize fields with defaults
+    _u_upper = fill(nan, 2 * n_variables * n_nodes * capacity)
+    u_upper = unsafe_wrap(Array, pointer(_u_upper),
+                          (2, n_variables, n_nodes, capacity))
 
-  _u_lower = fill(nan, 2 * n_variables * n_nodes * capacity)
-  u_lower = unsafe_wrap(Array, pointer(_u_lower),
-                        (2, n_variables, n_nodes, capacity))
+    _u_lower = fill(nan, 2 * n_variables * n_nodes * capacity)
+    u_lower = unsafe_wrap(Array, pointer(_u_lower),
+                          (2, n_variables, n_nodes, capacity))
 
-  local_neighbor_ids = fill(Vector{Int}(), capacity)
-  local_neighbor_positions = fill(Vector{Int}(), capacity)
+    local_neighbor_ids = fill(Vector{Int}(), capacity)
+    local_neighbor_positions = fill(Vector{Int}(), capacity)
 
-  large_sides = fill(typemin(Int), capacity)
+    large_sides = fill(typemin(Int), capacity)
 
-  orientations = fill(typemin(Int), capacity)
+    orientations = fill(typemin(Int), capacity)
 
-  return MPIL2MortarContainer2D{uEltype}(
-    u_upper, u_lower, local_neighbor_ids, local_neighbor_positions, large_sides, orientations,
-    _u_upper, _u_lower)
+    return MPIL2MortarContainer2D{uEltype}(u_upper, u_lower, local_neighbor_ids,
+                                           local_neighbor_positions, large_sides,
+                                           orientations,
+                                           _u_upper, _u_lower)
 end
 
-
 # Return number of L2 mortars
-@inline nmpimortars(mpi_l2mortars::MPIL2MortarContainer2D) = length(mpi_l2mortars.orientations)
-
+@inline function nmpimortars(mpi_l2mortars::MPIL2MortarContainer2D)
+    length(mpi_l2mortars.orientations)
+end
 
 # Create MPI mortar container and initialize MPI mortar data in `elements`.
 function init_mpi_mortars(cell_ids, mesh::TreeMesh2D,
                           elements::ElementContainer2D,
                           ::LobattoLegendreMortarL2)
-  # Initialize containers
-  n_mpi_mortars = count_required_mpi_mortars(mesh, cell_ids)
-  mpi_mortars = MPIL2MortarContainer2D{eltype(elements)}(
-    n_mpi_mortars, nvariables(elements), nnodes(elements))
-
-  # Connect elements with mortars
-  init_mpi_mortars!(mpi_mortars, elements, mesh)
-  return mpi_mortars
+    # Initialize containers
+    n_mpi_mortars = count_required_mpi_mortars(mesh, cell_ids)
+    mpi_mortars = MPIL2MortarContainer2D{eltype(elements)}(n_mpi_mortars,
+                                                           nvariables(elements),
+                                                           nnodes(elements))
+
+    # Connect elements with mortars
+    init_mpi_mortars!(mpi_mortars, elements, mesh)
+    return mpi_mortars
 end
 
 # Count the number of MPI mortars that need to be created
 function count_required_mpi_mortars(mesh::TreeMesh2D, cell_ids)
-  # No MPI mortars needed if MPI is not used
-  if !mpi_isparallel()
-    return 0
-  end
-
-  count = 0
-
-  for cell_id in cell_ids
-    for direction in eachdirection(mesh.tree)
-      # If no neighbor exists, cell is small with large neighbor or at boundary
-      if !has_neighbor(mesh.tree, cell_id, direction)
-        # If no large neighbor exists, cell is at boundary -> do nothing
-        if !has_coarse_neighbor(mesh.tree, cell_id, direction)
-          continue
-        end
-
-        # Skip if the large neighbor is on the same rank to prevent double counting
-        parent_id = mesh.tree.parent_ids[cell_id]
-        large_cell_id = mesh.tree.neighbor_ids[direction, parent_id]
-        if is_own_cell(mesh.tree, large_cell_id)
-          continue
-        end
-
-        # Current cell is small with large neighbor on a different rank, find the other
-        # small cell
-        if direction == 1 # small cells right, mortar in x-direction
-          lower_cell_id = mesh.tree.child_ids[1, parent_id]
-          upper_cell_id = mesh.tree.child_ids[3, parent_id]
-        elseif direction == 2 # small cells left, mortar in x-direction
-          lower_cell_id = mesh.tree.child_ids[2, parent_id]
-          upper_cell_id = mesh.tree.child_ids[4, parent_id]
-        elseif direction == 3 # small cells right, mortar in y-direction
-          lower_cell_id = mesh.tree.child_ids[1, parent_id]
-          upper_cell_id = mesh.tree.child_ids[2, parent_id]
-        else # direction == 4, small cells left, mortar in y-direction
-          lower_cell_id = mesh.tree.child_ids[3, parent_id]
-          upper_cell_id = mesh.tree.child_ids[4, parent_id]
-        end
-
-        if cell_id == lower_cell_id
-          sibling_id = upper_cell_id
-        elseif cell_id == upper_cell_id
-          sibling_id = lower_cell_id
-        else
-          error("should not happen")
-        end
-
-        # Skip if the other small cell is on the same rank and its id is smaller than the current
-        # cell id to prevent double counting
-        if is_own_cell(mesh.tree, sibling_id) && sibling_id < cell_id
-          continue
-        end
-      else # Cell has a neighbor
-        # If neighbor has no children, this is a conforming interface -> do nothing
-        neighbor_id = mesh.tree.neighbor_ids[direction, cell_id]
-        if !has_children(mesh.tree, neighbor_id)
-          continue
-        end
+    # No MPI mortars needed if MPI is not used
+    if !mpi_isparallel()
+        return 0
+    end
 
-        # Skip if both small cells are on this rank -> create regular mortar instead
-        if direction == 1 # small cells left, mortar in x-direction
-          lower_cell_id = mesh.tree.child_ids[2, neighbor_id]
-          upper_cell_id = mesh.tree.child_ids[4, neighbor_id]
-        elseif direction == 2 # small cells right, mortar in x-direction
-          lower_cell_id = mesh.tree.child_ids[1, neighbor_id]
-          upper_cell_id = mesh.tree.child_ids[3, neighbor_id]
-        elseif direction == 3 # small cells left, mortar in y-direction
-          lower_cell_id = mesh.tree.child_ids[3, neighbor_id]
-          upper_cell_id = mesh.tree.child_ids[4, neighbor_id]
-        else # direction == 4, small cells right, mortar in y-direction
-          lower_cell_id = mesh.tree.child_ids[1, neighbor_id]
-          upper_cell_id = mesh.tree.child_ids[2, neighbor_id]
-        end
-        small_cell_ids = (lower_cell_id, upper_cell_id)
-        if all(cell -> is_own_cell(mesh.tree, cell), small_cell_ids)
-          continue
+    count = 0
+
+    for cell_id in cell_ids
+        for direction in eachdirection(mesh.tree)
+            # If no neighbor exists, cell is small with large neighbor or at boundary
+            if !has_neighbor(mesh.tree, cell_id, direction)
+                # If no large neighbor exists, cell is at boundary -> do nothing
+                if !has_coarse_neighbor(mesh.tree, cell_id, direction)
+                    continue
+                end
+
+                # Skip if the large neighbor is on the same rank to prevent double counting
+                parent_id = mesh.tree.parent_ids[cell_id]
+                large_cell_id = mesh.tree.neighbor_ids[direction, parent_id]
+                if is_own_cell(mesh.tree, large_cell_id)
+                    continue
+                end
+
+                # Current cell is small with large neighbor on a different rank, find the other
+                # small cell
+                if direction == 1 # small cells right, mortar in x-direction
+                    lower_cell_id = mesh.tree.child_ids[1, parent_id]
+                    upper_cell_id = mesh.tree.child_ids[3, parent_id]
+                elseif direction == 2 # small cells left, mortar in x-direction
+                    lower_cell_id = mesh.tree.child_ids[2, parent_id]
+                    upper_cell_id = mesh.tree.child_ids[4, parent_id]
+                elseif direction == 3 # small cells right, mortar in y-direction
+                    lower_cell_id = mesh.tree.child_ids[1, parent_id]
+                    upper_cell_id = mesh.tree.child_ids[2, parent_id]
+                else # direction == 4, small cells left, mortar in y-direction
+                    lower_cell_id = mesh.tree.child_ids[3, parent_id]
+                    upper_cell_id = mesh.tree.child_ids[4, parent_id]
+                end
+
+                if cell_id == lower_cell_id
+                    sibling_id = upper_cell_id
+                elseif cell_id == upper_cell_id
+                    sibling_id = lower_cell_id
+                else
+                    error("should not happen")
+                end
+
+                # Skip if the other small cell is on the same rank and its id is smaller than the current
+                # cell id to prevent double counting
+                if is_own_cell(mesh.tree, sibling_id) && sibling_id < cell_id
+                    continue
+                end
+            else # Cell has a neighbor
+                # If neighbor has no children, this is a conforming interface -> do nothing
+                neighbor_id = mesh.tree.neighbor_ids[direction, cell_id]
+                if !has_children(mesh.tree, neighbor_id)
+                    continue
+                end
+
+                # Skip if both small cells are on this rank -> create regular mortar instead
+                if direction == 1 # small cells left, mortar in x-direction
+                    lower_cell_id = mesh.tree.child_ids[2, neighbor_id]
+                    upper_cell_id = mesh.tree.child_ids[4, neighbor_id]
+                elseif direction == 2 # small cells right, mortar in x-direction
+                    lower_cell_id = mesh.tree.child_ids[1, neighbor_id]
+                    upper_cell_id = mesh.tree.child_ids[3, neighbor_id]
+                elseif direction == 3 # small cells left, mortar in y-direction
+                    lower_cell_id = mesh.tree.child_ids[3, neighbor_id]
+                    upper_cell_id = mesh.tree.child_ids[4, neighbor_id]
+                else # direction == 4, small cells right, mortar in y-direction
+                    lower_cell_id = mesh.tree.child_ids[1, neighbor_id]
+                    upper_cell_id = mesh.tree.child_ids[2, neighbor_id]
+                end
+                small_cell_ids = (lower_cell_id, upper_cell_id)
+                if all(cell -> is_own_cell(mesh.tree, cell), small_cell_ids)
+                    continue
+                end
+            end
+
+            count += 1
         end
-      end
-
-      count += 1
     end
-  end
 
-  return count
+    return count
 end
 
 # Initialize connectivity between elements and mortars
 function init_mpi_mortars!(mpi_mortars, elements, mesh::TreeMesh2D)
-  # Exit early if there are no MPI mortars to initialize
-  if nmpimortars(mpi_mortars) == 0
-    return nothing
-  end
-
-  # Construct cell -> element mapping for easier algorithm implementation
-  tree = mesh.tree
-  c2e = zeros(Int, length(tree))
-  for element in eachelement(elements)
-    c2e[elements.cell_ids[element]] = element
-  end
-
-  # Reset mortar count
-  count = 0
-
-  # Iterate over all elements to find neighbors and to connect via mortars
-  for element in eachelement(elements)
-    cell_id = elements.cell_ids[element]
-
-    for direction in eachdirection(mesh.tree)
-      # If no neighbor exists, cell is small with large neighbor or at boundary
-      if !has_neighbor(mesh.tree, cell_id, direction)
-        # If no large neighbor exists, cell is at boundary -> do nothing
-        if !has_coarse_neighbor(mesh.tree, cell_id, direction)
-          continue
-        end
-
-        # Skip if the large neighbor is on the same rank -> will be handled in another iteration
-        parent_cell_id = mesh.tree.parent_ids[cell_id]
-        large_cell_id = mesh.tree.neighbor_ids[direction, parent_cell_id]
-        if is_own_cell(mesh.tree, large_cell_id)
-          continue
-        end
-
-        # Current cell is small with large neighbor on a different rank, find the other
-        # small cell
-        if direction == 1 # small cells right, mortar in x-direction
-          lower_cell_id = mesh.tree.child_ids[1, parent_cell_id]
-          upper_cell_id = mesh.tree.child_ids[3, parent_cell_id]
-        elseif direction == 2 # small cells left, mortar in x-direction
-          lower_cell_id = mesh.tree.child_ids[2, parent_cell_id]
-          upper_cell_id = mesh.tree.child_ids[4, parent_cell_id]
-        elseif direction == 3 # small cells right, mortar in y-direction
-          lower_cell_id = mesh.tree.child_ids[1, parent_cell_id]
-          upper_cell_id = mesh.tree.child_ids[2, parent_cell_id]
-        else # direction == 4, small cells left, mortar in y-direction
-          lower_cell_id = mesh.tree.child_ids[3, parent_cell_id]
-          upper_cell_id = mesh.tree.child_ids[4, parent_cell_id]
-        end
-
-        if cell_id == lower_cell_id
-          sibling_id = upper_cell_id
-        elseif cell_id == upper_cell_id
-          sibling_id = lower_cell_id
-        else
-          error("should not happen")
-        end
+    # Exit early if there are no MPI mortars to initialize
+    if nmpimortars(mpi_mortars) == 0
+        return nothing
+    end
 
-        # Skip if the other small cell is on the same rank and its id is smaller than the current
-        # cell id to prevent double counting
-        if is_own_cell(mesh.tree, sibling_id) && sibling_id < cell_id
-          continue
-        end
-      else # Cell has a neighbor
-        large_cell_id = cell_id # save explicitly for later processing
+    # Construct cell -> element mapping for easier algorithm implementation
+    tree = mesh.tree
+    c2e = zeros(Int, length(tree))
+    for element in eachelement(elements)
+        c2e[elements.cell_ids[element]] = element
+    end
 
-        # If neighbor has no children, this is a conforming interface -> do nothing
-        neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
-        if !has_children(mesh.tree, neighbor_cell_id)
-          continue
-        end
+    # Reset mortar count
+    count = 0
 
-        # Skip if both small cells are on this rank -> create regular mortar instead
-        if direction == 1 # small cells left, mortar in x-direction
-          lower_cell_id = mesh.tree.child_ids[2, neighbor_cell_id]
-          upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id]
-        elseif direction == 2 # small cells right, mortar in x-direction
-          lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id]
-          upper_cell_id = mesh.tree.child_ids[3, neighbor_cell_id]
-        elseif direction == 3 # small cells left, mortar in y-direction
-          lower_cell_id = mesh.tree.child_ids[3, neighbor_cell_id]
-          upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id]
-        else # direction == 4, small cells right, mortar in y-direction
-          lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id]
-          upper_cell_id = mesh.tree.child_ids[2, neighbor_cell_id]
-        end
-        small_cell_ids = (lower_cell_id, upper_cell_id)
-        if all(cell -> is_own_cell(mesh.tree, cell), small_cell_ids)
-          continue
+    # Iterate over all elements to find neighbors and to connect via mortars
+    for element in eachelement(elements)
+        cell_id = elements.cell_ids[element]
+
+        for direction in eachdirection(mesh.tree)
+            # If no neighbor exists, cell is small with large neighbor or at boundary
+            if !has_neighbor(mesh.tree, cell_id, direction)
+                # If no large neighbor exists, cell is at boundary -> do nothing
+                if !has_coarse_neighbor(mesh.tree, cell_id, direction)
+                    continue
+                end
+
+                # Skip if the large neighbor is on the same rank -> will be handled in another iteration
+                parent_cell_id = mesh.tree.parent_ids[cell_id]
+                large_cell_id = mesh.tree.neighbor_ids[direction, parent_cell_id]
+                if is_own_cell(mesh.tree, large_cell_id)
+                    continue
+                end
+
+                # Current cell is small with large neighbor on a different rank, find the other
+                # small cell
+                if direction == 1 # small cells right, mortar in x-direction
+                    lower_cell_id = mesh.tree.child_ids[1, parent_cell_id]
+                    upper_cell_id = mesh.tree.child_ids[3, parent_cell_id]
+                elseif direction == 2 # small cells left, mortar in x-direction
+                    lower_cell_id = mesh.tree.child_ids[2, parent_cell_id]
+                    upper_cell_id = mesh.tree.child_ids[4, parent_cell_id]
+                elseif direction == 3 # small cells right, mortar in y-direction
+                    lower_cell_id = mesh.tree.child_ids[1, parent_cell_id]
+                    upper_cell_id = mesh.tree.child_ids[2, parent_cell_id]
+                else # direction == 4, small cells left, mortar in y-direction
+                    lower_cell_id = mesh.tree.child_ids[3, parent_cell_id]
+                    upper_cell_id = mesh.tree.child_ids[4, parent_cell_id]
+                end
+
+                if cell_id == lower_cell_id
+                    sibling_id = upper_cell_id
+                elseif cell_id == upper_cell_id
+                    sibling_id = lower_cell_id
+                else
+                    error("should not happen")
+                end
+
+                # Skip if the other small cell is on the same rank and its id is smaller than the current
+                # cell id to prevent double counting
+                if is_own_cell(mesh.tree, sibling_id) && sibling_id < cell_id
+                    continue
+                end
+            else # Cell has a neighbor
+                large_cell_id = cell_id # save explicitly for later processing
+
+                # If neighbor has no children, this is a conforming interface -> do nothing
+                neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
+                if !has_children(mesh.tree, neighbor_cell_id)
+                    continue
+                end
+
+                # Skip if both small cells are on this rank -> create regular mortar instead
+                if direction == 1 # small cells left, mortar in x-direction
+                    lower_cell_id = mesh.tree.child_ids[2, neighbor_cell_id]
+                    upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id]
+                elseif direction == 2 # small cells right, mortar in x-direction
+                    lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id]
+                    upper_cell_id = mesh.tree.child_ids[3, neighbor_cell_id]
+                elseif direction == 3 # small cells left, mortar in y-direction
+                    lower_cell_id = mesh.tree.child_ids[3, neighbor_cell_id]
+                    upper_cell_id = mesh.tree.child_ids[4, neighbor_cell_id]
+                else # direction == 4, small cells right, mortar in y-direction
+                    lower_cell_id = mesh.tree.child_ids[1, neighbor_cell_id]
+                    upper_cell_id = mesh.tree.child_ids[2, neighbor_cell_id]
+                end
+                small_cell_ids = (lower_cell_id, upper_cell_id)
+                if all(cell -> is_own_cell(mesh.tree, cell), small_cell_ids)
+                    continue
+                end
+            end
+
+            # Create mortar between elements:
+            # 1 -> small element in negative coordinate direction
+            # 2 -> small element in positive coordinate direction
+            # 3 -> large element
+            count += 1
+
+            local_neighbor_ids = Vector{Int}()
+            local_neighbor_positions = Vector{Int}()
+            if is_own_cell(mesh.tree, lower_cell_id)
+                push!(local_neighbor_ids, c2e[lower_cell_id])
+                push!(local_neighbor_positions, 1)
+            end
+            if is_own_cell(mesh.tree, upper_cell_id)
+                push!(local_neighbor_ids, c2e[upper_cell_id])
+                push!(local_neighbor_positions, 2)
+            end
+            if is_own_cell(mesh.tree, large_cell_id)
+                push!(local_neighbor_ids, c2e[large_cell_id])
+                push!(local_neighbor_positions, 3)
+            end
+
+            mpi_mortars.local_neighbor_ids[count] = local_neighbor_ids
+            mpi_mortars.local_neighbor_positions[count] = local_neighbor_positions
+
+            # Set large side, which denotes the direction (1 -> negative, 2 -> positive) of the large side
+            # To prevent double counting, the mortars are always identified from the point of view of
+            # a large cell, if it is on this rank. In that case, direction points towards the small cells.
+            # If the large cell is not on this rank, the point of view of a small cell is taken instead,
+            # hence direction points towards the large cell in this case.
+            if iseven(direction)
+                mpi_mortars.large_sides[count] = is_own_cell(mesh.tree, large_cell_id) ?
+                                                 1 : 2
+            else
+                mpi_mortars.large_sides[count] = is_own_cell(mesh.tree, large_cell_id) ?
+                                                 2 : 1
+            end
+
+            # Set orientation (1, 2 -> x; 3, 4 -> y)
+            if direction in (1, 2)
+                mpi_mortars.orientations[count] = 1
+            else
+                mpi_mortars.orientations[count] = 2
+            end
         end
-      end
-
-      # Create mortar between elements:
-      # 1 -> small element in negative coordinate direction
-      # 2 -> small element in positive coordinate direction
-      # 3 -> large element
-      count += 1
-
-      local_neighbor_ids = Vector{Int}()
-      local_neighbor_positions = Vector{Int}()
-      if is_own_cell(mesh.tree, lower_cell_id)
-        push!(local_neighbor_ids, c2e[lower_cell_id])
-        push!(local_neighbor_positions, 1)
-      end
-      if is_own_cell(mesh.tree, upper_cell_id)
-        push!(local_neighbor_ids, c2e[upper_cell_id])
-        push!(local_neighbor_positions, 2)
-      end
-      if is_own_cell(mesh.tree, large_cell_id)
-        push!(local_neighbor_ids, c2e[large_cell_id])
-        push!(local_neighbor_positions, 3)
-      end
-
-      mpi_mortars.local_neighbor_ids[count] = local_neighbor_ids
-      mpi_mortars.local_neighbor_positions[count] = local_neighbor_positions
-
-      # Set large side, which denotes the direction (1 -> negative, 2 -> positive) of the large side
-      # To prevent double counting, the mortars are always identified from the point of view of
-      # a large cell, if it is on this rank. In that case, direction points towards the small cells.
-      # If the large cell is not on this rank, the point of view of a small cell is taken instead,
-      # hence direction points towards the large cell in this case.
-      if iseven(direction)
-        mpi_mortars.large_sides[count] = is_own_cell(mesh.tree, large_cell_id) ? 1 : 2
-      else
-        mpi_mortars.large_sides[count] = is_own_cell(mesh.tree, large_cell_id) ? 2 : 1
-      end
-
-      # Set orientation (1, 2 -> x; 3, 4 -> y)
-      if direction in (1, 2)
-        mpi_mortars.orientations[count] = 1
-      else
-        mpi_mortars.orientations[count] = 2
-      end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/containers_3d.jl b/src/solvers/dgsem_tree/containers_3d.jl
index bc88e931b31..0318946e34d 100644
--- a/src/solvers/dgsem_tree/containers_3d.jl
+++ b/src/solvers/dgsem_tree/containers_3d.jl
@@ -3,17 +3,17 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Container data structure (structure-of-arrays style) for DG elements
-mutable struct ElementContainer3D{RealT<:Real, uEltype<:Real} <: AbstractContainer
-  inverse_jacobian::Vector{RealT}        # [elements]
-  node_coordinates::Array{RealT, 5}      # [orientation, i, j, k, elements]
-  surface_flux_values::Array{uEltype, 5} # [variables, i, j, direction, elements]
-  cell_ids::Vector{Int}                  # [elements]
-  # internal `resize!`able storage
-  _node_coordinates::Vector{RealT}
-  _surface_flux_values::Vector{uEltype}
+mutable struct ElementContainer3D{RealT <: Real, uEltype <: Real} <: AbstractContainer
+    inverse_jacobian::Vector{RealT}        # [elements]
+    node_coordinates::Array{RealT, 5}      # [orientation, i, j, k, elements]
+    surface_flux_values::Array{uEltype, 5} # [variables, i, j, direction, elements]
+    cell_ids::Vector{Int}                  # [elements]
+    # internal `resize!`able storage
+    _node_coordinates::Vector{RealT}
+    _surface_flux_values::Vector{uEltype}
 end
 
 nvariables(elements::ElementContainer3D) = size(elements.surface_flux_values, 1)
@@ -26,51 +26,52 @@ Base.eltype(elements::ElementContainer3D) = eltype(elements.surface_flux_values)
 # `unsafe_wrap`ping multi-dimensional `Array`s around the
 # internal storage.
 function Base.resize!(elements::ElementContainer3D, capacity)
-  n_nodes = nnodes(elements)
-  n_variables = nvariables(elements)
-  @unpack _node_coordinates, _surface_flux_values,
-          inverse_jacobian, cell_ids = elements
+    n_nodes = nnodes(elements)
+    n_variables = nvariables(elements)
+    @unpack _node_coordinates, _surface_flux_values,
+    inverse_jacobian, cell_ids = elements
 
-  resize!(inverse_jacobian, capacity)
+    resize!(inverse_jacobian, capacity)
 
-  resize!(_node_coordinates, 3 * n_nodes * n_nodes * n_nodes * capacity)
-  elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-                                          (3, n_nodes, n_nodes, n_nodes, capacity))
+    resize!(_node_coordinates, 3 * n_nodes * n_nodes * n_nodes * capacity)
+    elements.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                            (3, n_nodes, n_nodes, n_nodes, capacity))
 
-  resize!(_surface_flux_values, n_variables * n_nodes * n_nodes * 2 * 3 * capacity)
-  elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
-                                             (n_variables, n_nodes, n_nodes, 2 * 3, capacity))
+    resize!(_surface_flux_values, n_variables * n_nodes * n_nodes * 2 * 3 * capacity)
+    elements.surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
+                                               (n_variables, n_nodes, n_nodes, 2 * 3,
+                                                capacity))
 
-  resize!(cell_ids, capacity)
+    resize!(cell_ids, capacity)
 
-  return nothing
+    return nothing
 end
 
+function ElementContainer3D{RealT, uEltype}(capacity::Integer, n_variables,
+                                            n_nodes) where {RealT <: Real,
+                                                            uEltype <: Real}
+    nan_RealT = convert(RealT, NaN)
+    nan_uEltype = convert(uEltype, NaN)
 
-function ElementContainer3D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real}
-  nan_RealT = convert(RealT, NaN)
-  nan_uEltype = convert(uEltype, NaN)
-
-  # Initialize fields with defaults
-  inverse_jacobian = fill(nan_RealT, capacity)
+    # Initialize fields with defaults
+    inverse_jacobian = fill(nan_RealT, capacity)
 
-  _node_coordinates = fill(nan_RealT, 3 * n_nodes * n_nodes * n_nodes * capacity)
-  node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-                                 (3, n_nodes, n_nodes, n_nodes, capacity))
+    _node_coordinates = fill(nan_RealT, 3 * n_nodes * n_nodes * n_nodes * capacity)
+    node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                   (3, n_nodes, n_nodes, n_nodes, capacity))
 
-  _surface_flux_values = fill(nan_uEltype, n_variables * n_nodes * n_nodes * 2 * 3 * capacity)
-  surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
-                                    (n_variables, n_nodes, n_nodes, 2 * 3, capacity))
+    _surface_flux_values = fill(nan_uEltype,
+                                n_variables * n_nodes * n_nodes * 2 * 3 * capacity)
+    surface_flux_values = unsafe_wrap(Array, pointer(_surface_flux_values),
+                                      (n_variables, n_nodes, n_nodes, 2 * 3, capacity))
 
-  cell_ids = fill(typemin(Int), capacity)
+    cell_ids = fill(typemin(Int), capacity)
 
-
-  return ElementContainer3D{RealT, uEltype}(
-    inverse_jacobian, node_coordinates, surface_flux_values, cell_ids,
-    _node_coordinates, _surface_flux_values)
+    return ElementContainer3D{RealT, uEltype}(inverse_jacobian, node_coordinates,
+                                              surface_flux_values, cell_ids,
+                                              _node_coordinates, _surface_flux_values)
 end
 
-
 # Return number of elements
 nelements(elements::ElementContainer3D) = length(elements.cell_ids)
 # TODO: Taal performance, 1:nelements(elements) vs. Base.OneTo(nelements(elements))
@@ -84,72 +85,76 @@ In particular, not the elements themselves are returned.
 @inline eachelement(elements::ElementContainer3D) = Base.OneTo(nelements(elements))
 @inline Base.real(elements::ElementContainer3D) = eltype(elements.node_coordinates)
 
-
 # Create element container and initialize element data
 function init_elements(cell_ids, mesh::TreeMesh3D,
                        equations::AbstractEquations{3},
-                       basis, ::Type{RealT}, ::Type{uEltype}) where {RealT<:Real, uEltype<:Real}
-  # Initialize container
-  n_elements = length(cell_ids)
-  elements = ElementContainer3D{RealT, uEltype}(
-    n_elements, nvariables(equations), nnodes(basis))
-
-  init_elements!(elements, cell_ids, mesh, basis)
-  return elements
+                       basis, ::Type{RealT},
+                       ::Type{uEltype}) where {RealT <: Real, uEltype <: Real}
+    # Initialize container
+    n_elements = length(cell_ids)
+    elements = ElementContainer3D{RealT, uEltype}(n_elements, nvariables(equations),
+                                                  nnodes(basis))
+
+    init_elements!(elements, cell_ids, mesh, basis)
+    return elements
 end
 
 function init_elements!(elements, cell_ids, mesh::TreeMesh3D, basis)
-  nodes = get_nodes(basis)
-  # Compute the length of the 1D reference interval by integrating
-  # the function with constant value unity on the corresponding
-  # element data type (using \circ)
-  reference_length = integrate(one ∘ eltype, nodes, basis)
-  # Compute the offset of the midpoint of the 1D reference interval
-  # (its difference from zero)
-  reference_offset = (first(nodes) + last(nodes)) / 2
-
-  # Store cell ids
-  elements.cell_ids .= cell_ids
-
-  # Calculate inverse Jacobian and node coordinates
-  for element in eachelement(elements)
-    # Get cell id
-    cell_id = cell_ids[element]
-
-    # Get cell length
-    dx = length_at_cell(mesh.tree, cell_id)
-
-    # Calculate inverse Jacobian
-    jacobian = dx / reference_length
-    elements.inverse_jacobian[element] = inv(jacobian)
-
-    # Calculate node coordinates
-    # Note that the `tree_coordinates` are the midpoints of the cells.
-    # Hence, we need to add an offset for `nodes` with a midpoint
-    # different from zero.
-    for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
-      elements.node_coordinates[1, i, j, k, element] = (
-          mesh.tree.coordinates[1, cell_id] + jacobian * (nodes[i] - reference_offset))
-      elements.node_coordinates[2, i, j, k, element] = (
-          mesh.tree.coordinates[2, cell_id] + jacobian * (nodes[j] - reference_offset))
-      elements.node_coordinates[3, i, j, k, element] = (
-          mesh.tree.coordinates[3, cell_id] + jacobian * (nodes[k] - reference_offset))
+    nodes = get_nodes(basis)
+    # Compute the length of the 1D reference interval by integrating
+    # the function with constant value unity on the corresponding
+    # element data type (using \circ)
+    reference_length = integrate(one ∘ eltype, nodes, basis)
+    # Compute the offset of the midpoint of the 1D reference interval
+    # (its difference from zero)
+    reference_offset = (first(nodes) + last(nodes)) / 2
+
+    # Store cell ids
+    elements.cell_ids .= cell_ids
+
+    # Calculate inverse Jacobian and node coordinates
+    for element in eachelement(elements)
+        # Get cell id
+        cell_id = cell_ids[element]
+
+        # Get cell length
+        dx = length_at_cell(mesh.tree, cell_id)
+
+        # Calculate inverse Jacobian
+        jacobian = dx / reference_length
+        elements.inverse_jacobian[element] = inv(jacobian)
+
+        # Calculate node coordinates
+        # Note that the `tree_coordinates` are the midpoints of the cells.
+        # Hence, we need to add an offset for `nodes` with a midpoint
+        # different from zero.
+        for k in eachnode(basis), j in eachnode(basis), i in eachnode(basis)
+            elements.node_coordinates[1, i, j, k, element] = (mesh.tree.coordinates[1,
+                                                                                    cell_id] +
+                                                              jacobian * (nodes[i] -
+                                                               reference_offset))
+            elements.node_coordinates[2, i, j, k, element] = (mesh.tree.coordinates[2,
+                                                                                    cell_id] +
+                                                              jacobian * (nodes[j] -
+                                                               reference_offset))
+            elements.node_coordinates[3, i, j, k, element] = (mesh.tree.coordinates[3,
+                                                                                    cell_id] +
+                                                              jacobian * (nodes[k] -
+                                                               reference_offset))
+        end
     end
-  end
 
-  return elements
+    return elements
 end
 
-
-
 # Container data structure (structure-of-arrays style) for DG interfaces
-mutable struct InterfaceContainer3D{uEltype<:Real} <: AbstractContainer
-  u::Array{uEltype, 5}      # [leftright, variables, i, j, interfaces]
-  neighbor_ids::Matrix{Int} # [leftright, interfaces]
-  orientations::Vector{Int} # [interfaces]
-  # internal `resize!`able storage
-  _u::Vector{uEltype}
-  _neighbor_ids::Vector{Int}
+mutable struct InterfaceContainer3D{uEltype <: Real} <: AbstractContainer
+    u::Array{uEltype, 5}      # [leftright, variables, i, j, interfaces]
+    neighbor_ids::Matrix{Int} # [leftright, interfaces]
+    orientations::Vector{Int} # [interfaces]
+    # internal `resize!`able storage
+    _u::Vector{uEltype}
+    _neighbor_ids::Vector{Int}
 end
 
 nvariables(interfaces::InterfaceContainer3D) = size(interfaces.u, 2)
@@ -158,160 +163,155 @@ Base.eltype(interfaces::InterfaceContainer3D) = eltype(interfaces.u)
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(interfaces::InterfaceContainer3D, capacity)
-  n_nodes = nnodes(interfaces)
-  n_variables = nvariables(interfaces)
-  @unpack _u, _neighbor_ids, orientations = interfaces
+    n_nodes = nnodes(interfaces)
+    n_variables = nvariables(interfaces)
+    @unpack _u, _neighbor_ids, orientations = interfaces
 
-  resize!(_u, 2 * n_variables * n_nodes * n_nodes * capacity)
-  interfaces.u = unsafe_wrap(Array, pointer(_u),
-                             (2, n_variables, n_nodes, n_nodes, capacity))
+    resize!(_u, 2 * n_variables * n_nodes * n_nodes * capacity)
+    interfaces.u = unsafe_wrap(Array, pointer(_u),
+                               (2, n_variables, n_nodes, n_nodes, capacity))
 
-  resize!(_neighbor_ids, 2 * capacity)
-  interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
-                                        (2, capacity))
+    resize!(_neighbor_ids, 2 * capacity)
+    interfaces.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+                                          (2, capacity))
 
-  resize!(orientations, capacity)
+    resize!(orientations, capacity)
 
-  return nothing
+    return nothing
 end
 
+function InterfaceContainer3D{uEltype}(capacity::Integer, n_variables,
+                                       n_nodes) where {uEltype <: Real}
+    nan = convert(uEltype, NaN)
 
-function InterfaceContainer3D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real}
-  nan = convert(uEltype, NaN)
-
-  # Initialize fields with defaults
-  _u = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity)
-  u = unsafe_wrap(Array, pointer(_u),
-                  (2, n_variables, n_nodes, n_nodes, capacity))
-
-  _neighbor_ids = fill(typemin(Int), 2 * capacity)
-  neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
-                             (2, capacity))
+    # Initialize fields with defaults
+    _u = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity)
+    u = unsafe_wrap(Array, pointer(_u),
+                    (2, n_variables, n_nodes, n_nodes, capacity))
 
-  orientations = fill(typemin(Int), capacity)
+    _neighbor_ids = fill(typemin(Int), 2 * capacity)
+    neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+                               (2, capacity))
 
+    orientations = fill(typemin(Int), capacity)
 
-  return InterfaceContainer3D{uEltype}(
-    u, neighbor_ids, orientations,
-    _u, _neighbor_ids)
+    return InterfaceContainer3D{uEltype}(u, neighbor_ids, orientations,
+                                         _u, _neighbor_ids)
 end
 
-
 # Return number of interfaces
 ninterfaces(interfaces::InterfaceContainer3D) = length(interfaces.orientations)
 
-
 # Create interface container and initialize interface data in `elements`.
 function init_interfaces(cell_ids, mesh::TreeMesh3D,
                          elements::ElementContainer3D)
-  # Initialize container
-  n_interfaces = count_required_interfaces(mesh, cell_ids)
-  interfaces = InterfaceContainer3D{eltype(elements)}(
-    n_interfaces, nvariables(elements), nnodes(elements))
-
-  # Connect elements with interfaces
-  init_interfaces!(interfaces, elements, mesh)
-  return interfaces
+    # Initialize container
+    n_interfaces = count_required_interfaces(mesh, cell_ids)
+    interfaces = InterfaceContainer3D{eltype(elements)}(n_interfaces,
+                                                        nvariables(elements),
+                                                        nnodes(elements))
+
+    # Connect elements with interfaces
+    init_interfaces!(interfaces, elements, mesh)
+    return interfaces
 end
 
 # Count the number of interfaces that need to be created
 function count_required_interfaces(mesh::TreeMesh3D, cell_ids)
-  count = 0
-
-  # Iterate over all cells
-  for cell_id in cell_ids
-    for direction in eachdirection(mesh.tree)
-      # Only count interfaces in positive direction to avoid double counting
-      if direction % 2 == 1
-        continue
-      end
-
-      # If no neighbor exists, current cell is small or at boundary and thus we need a mortar
-      if !has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # Skip if neighbor has children
-      neighbor_id = mesh.tree.neighbor_ids[direction, cell_id]
-      if has_children(mesh.tree, neighbor_id)
-        continue
-      end
-
-      count += 1
+    count = 0
+
+    # Iterate over all cells
+    for cell_id in cell_ids
+        for direction in eachdirection(mesh.tree)
+            # Only count interfaces in positive direction to avoid double counting
+            if direction % 2 == 1
+                continue
+            end
+
+            # If no neighbor exists, current cell is small or at boundary and thus we need a mortar
+            if !has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # Skip if neighbor has children
+            neighbor_id = mesh.tree.neighbor_ids[direction, cell_id]
+            if has_children(mesh.tree, neighbor_id)
+                continue
+            end
+
+            count += 1
+        end
     end
-  end
 
-  return count
+    return count
 end
 
 # Initialize connectivity between elements and interfaces
 function init_interfaces!(interfaces, elements, mesh::TreeMesh3D)
-  # Construct cell -> element mapping for easier algorithm implementation
-  tree = mesh.tree
-  c2e = zeros(Int, length(tree))
-  for element in eachelement(elements)
-    c2e[elements.cell_ids[element]] = element
-  end
-
-  # Reset interface count
-  count = 0
+    # Construct cell -> element mapping for easier algorithm implementation
+    tree = mesh.tree
+    c2e = zeros(Int, length(tree))
+    for element in eachelement(elements)
+        c2e[elements.cell_ids[element]] = element
+    end
 
-  # Iterate over all elements to find neighbors and to connect via interfaces
-  for element in eachelement(elements)
-    # Get cell id
-    cell_id = elements.cell_ids[element]
+    # Reset interface count
+    count = 0
 
-    # Loop over directions
-    for direction in eachdirection(mesh.tree)
-      # Only create interfaces in positive direction
-      if direction % 2 == 1
-        continue
-      end
-
-      # If no neighbor exists, current cell is small and thus we need a mortar
-      if !has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # Skip if neighbor has children
-      neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
-      if has_children(mesh.tree, neighbor_cell_id)
-        continue
-      end
-
-      # Create interface between elements (1 -> "left" of interface, 2 -> "right" of interface)
-      count += 1
-      interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id]
-      interfaces.neighbor_ids[1, count] = element
-
-      # Set orientation (x -> 1, y -> 2, z -> 3)
-      if direction in (1, 2)
-        interfaces.orientations[count] = 1
-      elseif direction in (3, 4)
-        interfaces.orientations[count] = 2
-      else
-        interfaces.orientations[count] = 3
-      end
+    # Iterate over all elements to find neighbors and to connect via interfaces
+    for element in eachelement(elements)
+        # Get cell id
+        cell_id = elements.cell_ids[element]
+
+        # Loop over directions
+        for direction in eachdirection(mesh.tree)
+            # Only create interfaces in positive direction
+            if direction % 2 == 1
+                continue
+            end
+
+            # If no neighbor exists, current cell is small and thus we need a mortar
+            if !has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # Skip if neighbor has children
+            neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
+            if has_children(mesh.tree, neighbor_cell_id)
+                continue
+            end
+
+            # Create interface between elements (1 -> "left" of interface, 2 -> "right" of interface)
+            count += 1
+            interfaces.neighbor_ids[2, count] = c2e[neighbor_cell_id]
+            interfaces.neighbor_ids[1, count] = element
+
+            # Set orientation (x -> 1, y -> 2, z -> 3)
+            if direction in (1, 2)
+                interfaces.orientations[count] = 1
+            elseif direction in (3, 4)
+                interfaces.orientations[count] = 2
+            else
+                interfaces.orientations[count] = 3
+            end
+        end
     end
-  end
 
-  @assert count == ninterfaces(interfaces) ("Actual interface count ($count) does not match " *
+    @assert count==ninterfaces(interfaces) ("Actual interface count ($count) does not match "*
                                             "expectations $(ninterfaces(interfaces))")
 end
 
-
-
 # Container data structure (structure-of-arrays style) for DG boundaries
-mutable struct BoundaryContainer3D{RealT<:Real, uEltype<:Real} <: AbstractContainer
-  u::Array{uEltype, 5}              # [leftright, variables, i, j, boundaries]
-  neighbor_ids::Vector{Int}         # [boundaries]
-  orientations::Vector{Int}         # [boundaries]
-  neighbor_sides::Vector{Int}       # [boundaries]
-  node_coordinates::Array{RealT, 4} # [orientation, i, j, elements]
-  n_boundaries_per_direction::SVector{6, Int} # [direction]
-  # internal `resize!`able storage
-  _u::Vector{uEltype}
-  _node_coordinates::Vector{RealT}
+mutable struct BoundaryContainer3D{RealT <: Real, uEltype <: Real} <: AbstractContainer
+    u::Array{uEltype, 5}              # [leftright, variables, i, j, boundaries]
+    neighbor_ids::Vector{Int}         # [boundaries]
+    orientations::Vector{Int}         # [boundaries]
+    neighbor_sides::Vector{Int}       # [boundaries]
+    node_coordinates::Array{RealT, 4} # [orientation, i, j, elements]
+    n_boundaries_per_direction::SVector{6, Int} # [direction]
+    # internal `resize!`able storage
+    _u::Vector{uEltype}
+    _node_coordinates::Vector{RealT}
 end
 
 nvariables(boundaries::BoundaryContainer3D) = size(boundaries.u, 2)
@@ -320,181 +320,183 @@ Base.eltype(boundaries::BoundaryContainer3D) = eltype(boundaries.u)
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(boundaries::BoundaryContainer3D, capacity)
-  n_nodes = nnodes(boundaries)
-  n_variables = nvariables(boundaries)
-  @unpack _u, _node_coordinates,
-          neighbor_ids, orientations, neighbor_sides = boundaries
+    n_nodes = nnodes(boundaries)
+    n_variables = nvariables(boundaries)
+    @unpack _u, _node_coordinates,
+    neighbor_ids, orientations, neighbor_sides = boundaries
 
-  resize!(_u, 2 * n_variables * n_nodes * n_nodes * capacity)
-  boundaries.u = unsafe_wrap(Array, pointer(_u),
-                             (2, n_variables, n_nodes, n_nodes, capacity))
+    resize!(_u, 2 * n_variables * n_nodes * n_nodes * capacity)
+    boundaries.u = unsafe_wrap(Array, pointer(_u),
+                               (2, n_variables, n_nodes, n_nodes, capacity))
 
-  resize!(_node_coordinates, 3 * n_nodes * n_nodes * capacity)
-  boundaries.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-                                            (3, n_nodes, n_nodes, capacity))
+    resize!(_node_coordinates, 3 * n_nodes * n_nodes * capacity)
+    boundaries.node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                              (3, n_nodes, n_nodes, capacity))
 
-  resize!(neighbor_ids, capacity)
+    resize!(neighbor_ids, capacity)
 
-  resize!(orientations, capacity)
+    resize!(orientations, capacity)
 
-  resize!(neighbor_sides, capacity)
+    resize!(neighbor_sides, capacity)
 
-  return nothing
+    return nothing
 end
 
+function BoundaryContainer3D{RealT, uEltype}(capacity::Integer, n_variables,
+                                             n_nodes) where {RealT <: Real,
+                                                             uEltype <: Real}
+    nan_RealT = convert(RealT, NaN)
+    nan_uEltype = convert(uEltype, NaN)
 
-function BoundaryContainer3D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real}
-  nan_RealT = convert(RealT, NaN)
-  nan_uEltype = convert(uEltype, NaN)
-
-  # Initialize fields with defaults
-  _u = fill(nan_uEltype, 2 * n_variables * n_nodes * n_nodes * capacity)
-  u = unsafe_wrap(Array, pointer(_u),
-                  (2, n_variables, n_nodes, n_nodes, capacity))
+    # Initialize fields with defaults
+    _u = fill(nan_uEltype, 2 * n_variables * n_nodes * n_nodes * capacity)
+    u = unsafe_wrap(Array, pointer(_u),
+                    (2, n_variables, n_nodes, n_nodes, capacity))
 
-  neighbor_ids = fill(typemin(Int), capacity)
+    neighbor_ids = fill(typemin(Int), capacity)
 
-  orientations = fill(typemin(Int), capacity)
+    orientations = fill(typemin(Int), capacity)
 
-  neighbor_sides = fill(typemin(Int), capacity)
+    neighbor_sides = fill(typemin(Int), capacity)
 
-  _node_coordinates = fill(nan_RealT, 3 * n_nodes * n_nodes * capacity)
-  node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
-                                 (3, n_nodes, n_nodes, capacity))
+    _node_coordinates = fill(nan_RealT, 3 * n_nodes * n_nodes * capacity)
+    node_coordinates = unsafe_wrap(Array, pointer(_node_coordinates),
+                                   (3, n_nodes, n_nodes, capacity))
 
-  n_boundaries_per_direction = SVector(0, 0, 0, 0, 0, 0)
+    n_boundaries_per_direction = SVector(0, 0, 0, 0, 0, 0)
 
-  return BoundaryContainer3D{RealT, uEltype}(
-    u, neighbor_ids, orientations, neighbor_sides,
-    node_coordinates, n_boundaries_per_direction,
-    _u, _node_coordinates)
+    return BoundaryContainer3D{RealT, uEltype}(u, neighbor_ids, orientations,
+                                               neighbor_sides,
+                                               node_coordinates,
+                                               n_boundaries_per_direction,
+                                               _u, _node_coordinates)
 end
 
-
 # Return number of boundaries
 nboundaries(boundaries::BoundaryContainer3D) = length(boundaries.orientations)
 
-
 # Create boundaries container and initialize boundary data in `elements`.
 function init_boundaries(cell_ids, mesh::TreeMesh3D,
                          elements::ElementContainer3D)
-  # Initialize container
-  n_boundaries = count_required_boundaries(mesh, cell_ids)
-  boundaries = BoundaryContainer3D{real(elements), eltype(elements)}(
-    n_boundaries, nvariables(elements), nnodes(elements))
-
-  # Connect elements with boundaries
-  init_boundaries!(boundaries, elements, mesh)
-  return boundaries
+    # Initialize container
+    n_boundaries = count_required_boundaries(mesh, cell_ids)
+    boundaries = BoundaryContainer3D{real(elements), eltype(elements)}(n_boundaries,
+                                                                       nvariables(elements),
+                                                                       nnodes(elements))
+
+    # Connect elements with boundaries
+    init_boundaries!(boundaries, elements, mesh)
+    return boundaries
 end
 
 # Count the number of boundaries that need to be created
 function count_required_boundaries(mesh::TreeMesh3D, cell_ids)
-  count = 0
-
-  # Iterate over all cells
-  for cell_id in cell_ids
-    for direction in eachdirection(mesh.tree)
-      # If neighbor exists, current cell is not at a boundary
-      if has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # If coarse neighbor exists, current cell is not at a boundary
-      if has_coarse_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # No neighbor exists in this direction -> must be a boundary
-      count += 1
+    count = 0
+
+    # Iterate over all cells
+    for cell_id in cell_ids
+        for direction in eachdirection(mesh.tree)
+            # If neighbor exists, current cell is not at a boundary
+            if has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # If coarse neighbor exists, current cell is not at a boundary
+            if has_coarse_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # No neighbor exists in this direction -> must be a boundary
+            count += 1
+        end
     end
-  end
 
-  return count
+    return count
 end
 
 # Initialize connectivity between elements and boundaries
 function init_boundaries!(boundaries, elements, mesh::TreeMesh3D)
-  # Reset boundaries count
-  count = 0
-
-  # Initialize boundary counts
-  counts_per_direction = MVector(0, 0, 0, 0, 0, 0)
-
-  # OBS! Iterate over directions first, then over elements, and count boundaries in each direction
-  # Rationale: This way the boundaries are internally sorted by the directions -x, +x, -y etc.,
-  #            obviating the need to store the boundary condition to be applied explicitly.
-  # Loop over directions
-  for direction in eachdirection(mesh.tree)
-    # Iterate over all elements to find missing neighbors and to connect to boundaries
-    for element in eachelement(elements)
-      # Get cell id
-      cell_id = elements.cell_ids[element]
-
-      # If neighbor exists, current cell is not at a boundary
-      if has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # If coarse neighbor exists, current cell is not at a boundary
-      if has_coarse_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # Create boundary
-      count += 1
-      counts_per_direction[direction] += 1
-
-      # Set neighbor element id
-      boundaries.neighbor_ids[count] = element
-
-      # Set neighbor side, which denotes the direction (1 -> negative, 2 -> positive) of the element
-      if iseven(direction)
-        boundaries.neighbor_sides[count] = 1
-      else
-        boundaries.neighbor_sides[count] = 2
-      end
-
-      # Set orientation (x -> 1, y -> 2)
-      if direction in (1, 2)
-        boundaries.orientations[count] = 1
-      elseif direction in (3, 4)
-        boundaries.orientations[count] = 2
-      else
-        boundaries.orientations[count] = 3
-      end
-
-      # Store node coordinates
-      enc = elements.node_coordinates
-      if direction == 1 # -x direction
-        boundaries.node_coordinates[:, :, :, count] .= enc[:, 1,   :,    :,   element]
-      elseif direction == 2 # +x direction
-        boundaries.node_coordinates[:, :, :, count] .= enc[:, end, :,    :,   element]
-      elseif direction == 3 # -y direction
-        boundaries.node_coordinates[:, :, :, count] .= enc[:, :,   1,    :,   element]
-      elseif direction == 4 # +y direction
-        boundaries.node_coordinates[:, :, :, count] .= enc[:, :,   end,  :,   element]
-      elseif direction == 5 # -z direction
-        boundaries.node_coordinates[:, :, :, count] .= enc[:, :,   :,    1,   element]
-      elseif direction == 6 # +z direction
-        boundaries.node_coordinates[:, :, :, count] .= enc[:, :,   :,    end, element]
-      else
-        error("should not happen")
-      end
+    # Reset boundaries count
+    count = 0
+
+    # Initialize boundary counts
+    counts_per_direction = MVector(0, 0, 0, 0, 0, 0)
+
+    # OBS! Iterate over directions first, then over elements, and count boundaries in each direction
+    # Rationale: This way the boundaries are internally sorted by the directions -x, +x, -y etc.,
+    #            obviating the need to store the boundary condition to be applied explicitly.
+    # Loop over directions
+    for direction in eachdirection(mesh.tree)
+        # Iterate over all elements to find missing neighbors and to connect to boundaries
+        for element in eachelement(elements)
+            # Get cell id
+            cell_id = elements.cell_ids[element]
+
+            # If neighbor exists, current cell is not at a boundary
+            if has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # If coarse neighbor exists, current cell is not at a boundary
+            if has_coarse_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # Create boundary
+            count += 1
+            counts_per_direction[direction] += 1
+
+            # Set neighbor element id
+            boundaries.neighbor_ids[count] = element
+
+            # Set neighbor side, which denotes the direction (1 -> negative, 2 -> positive) of the element
+            if iseven(direction)
+                boundaries.neighbor_sides[count] = 1
+            else
+                boundaries.neighbor_sides[count] = 2
+            end
+
+            # Set orientation (x -> 1, y -> 2)
+            if direction in (1, 2)
+                boundaries.orientations[count] = 1
+            elseif direction in (3, 4)
+                boundaries.orientations[count] = 2
+            else
+                boundaries.orientations[count] = 3
+            end
+
+            # Store node coordinates
+            enc = elements.node_coordinates
+            if direction == 1 # -x direction
+                boundaries.node_coordinates[:, :, :, count] .= enc[:, 1, :, :, element]
+            elseif direction == 2 # +x direction
+                boundaries.node_coordinates[:, :, :, count] .= enc[:, end, :, :,
+                                                                   element]
+            elseif direction == 3 # -y direction
+                boundaries.node_coordinates[:, :, :, count] .= enc[:, :, 1, :, element]
+            elseif direction == 4 # +y direction
+                boundaries.node_coordinates[:, :, :, count] .= enc[:, :, end, :,
+                                                                   element]
+            elseif direction == 5 # -z direction
+                boundaries.node_coordinates[:, :, :, count] .= enc[:, :, :, 1, element]
+            elseif direction == 6 # +z direction
+                boundaries.node_coordinates[:, :, :, count] .= enc[:, :, :, end,
+                                                                   element]
+            else
+                error("should not happen")
+            end
+        end
     end
-  end
 
-  @assert count == nboundaries(boundaries) ("Actual boundaries count ($count) does not match " *
+    @assert count==nboundaries(boundaries) ("Actual boundaries count ($count) does not match "*
                                             "expectations $(nboundaries(boundaries))")
-  @assert sum(counts_per_direction) == count
+    @assert sum(counts_per_direction) == count
 
-  boundaries.n_boundaries_per_direction = SVector(counts_per_direction)
+    boundaries.n_boundaries_per_direction = SVector(counts_per_direction)
 
-  return SVector(counts_per_direction)
+    return SVector(counts_per_direction)
 end
 
-
-
 # Container data structure (structure-of-arrays style) for DG L2 mortars
 # Positions/directions for orientations = 1, large_sides = 2:
 # mortar is orthogonal to x-axis, large side is in positive coordinate direction wrt mortar
@@ -517,21 +519,21 @@ end
 #
 # Left and right are used *both* for the numbering of the mortar faces *and* for the position of the
 # elements with respect to the axis orthogonal to the mortar.
-mutable struct L2MortarContainer3D{uEltype<:Real} <: AbstractContainer
-  u_upper_left ::Array{uEltype, 5} # [leftright, variables, i, j, mortars]
-  u_upper_right::Array{uEltype, 5} # [leftright, variables, i, j, mortars]
-  u_lower_left ::Array{uEltype, 5} # [leftright, variables, i, j, mortars]
-  u_lower_right::Array{uEltype, 5} # [leftright, variables, i, j, mortars]
-  neighbor_ids ::Array{Int, 2}     # [position, mortars]
-  # Large sides: left -> 1, right -> 2
-  large_sides ::Vector{Int}  # [mortars]
-  orientations::Vector{Int}  # [mortars]
-  # internal `resize!`able storage
-  _u_upper_left ::Vector{uEltype}
-  _u_upper_right::Vector{uEltype}
-  _u_lower_left ::Vector{uEltype}
-  _u_lower_right::Vector{uEltype}
-  _neighbor_ids ::Vector{Int}
+mutable struct L2MortarContainer3D{uEltype <: Real} <: AbstractContainer
+    u_upper_left::Array{uEltype, 5} # [leftright, variables, i, j, mortars]
+    u_upper_right::Array{uEltype, 5} # [leftright, variables, i, j, mortars]
+    u_lower_left::Array{uEltype, 5} # [leftright, variables, i, j, mortars]
+    u_lower_right::Array{uEltype, 5} # [leftright, variables, i, j, mortars]
+    neighbor_ids::Array{Int, 2}     # [position, mortars]
+    # Large sides: left -> 1, right -> 2
+    large_sides::Vector{Int}  # [mortars]
+    orientations::Vector{Int}  # [mortars]
+    # internal `resize!`able storage
+    _u_upper_left::Vector{uEltype}
+    _u_upper_right::Vector{uEltype}
+    _u_lower_left::Vector{uEltype}
+    _u_lower_right::Vector{uEltype}
+    _neighbor_ids::Vector{Int}
 end
 
 nvariables(mortars::L2MortarContainer3D) = size(mortars.u_upper_left, 2)
@@ -540,256 +542,274 @@ Base.eltype(mortars::L2MortarContainer3D) = eltype(mortars.u_upper_left)
 
 # See explanation of Base.resize! for the element container
 function Base.resize!(mortars::L2MortarContainer3D, capacity)
-  n_nodes = nnodes(mortars)
-  n_variables = nvariables(mortars)
-  @unpack _u_upper_left, _u_upper_right, _u_lower_left, _u_lower_right,
-          _neighbor_ids, large_sides, orientations = mortars
+    n_nodes = nnodes(mortars)
+    n_variables = nvariables(mortars)
+    @unpack _u_upper_left, _u_upper_right, _u_lower_left, _u_lower_right,
+    _neighbor_ids, large_sides, orientations = mortars
 
-  resize!(_u_upper_left, 2 * n_variables * n_nodes * n_nodes * capacity)
-  mortars.u_upper_left = unsafe_wrap(Array, pointer(_u_upper_left),
-                                     (2, n_variables, n_nodes, n_nodes, capacity))
+    resize!(_u_upper_left, 2 * n_variables * n_nodes * n_nodes * capacity)
+    mortars.u_upper_left = unsafe_wrap(Array, pointer(_u_upper_left),
+                                       (2, n_variables, n_nodes, n_nodes, capacity))
 
-  resize!(_u_upper_right, 2 * n_variables * n_nodes * n_nodes * capacity)
-  mortars.u_upper_right = unsafe_wrap(Array, pointer(_u_upper_right),
-                                      (2, n_variables, n_nodes, n_nodes, capacity))
+    resize!(_u_upper_right, 2 * n_variables * n_nodes * n_nodes * capacity)
+    mortars.u_upper_right = unsafe_wrap(Array, pointer(_u_upper_right),
+                                        (2, n_variables, n_nodes, n_nodes, capacity))
 
-  resize!(_u_lower_left, 2 * n_variables * n_nodes * n_nodes * capacity)
-  mortars.u_lower_left = unsafe_wrap(Array, pointer(_u_lower_left),
-                                     (2, n_variables, n_nodes, n_nodes, capacity))
+    resize!(_u_lower_left, 2 * n_variables * n_nodes * n_nodes * capacity)
+    mortars.u_lower_left = unsafe_wrap(Array, pointer(_u_lower_left),
+                                       (2, n_variables, n_nodes, n_nodes, capacity))
 
-  resize!(_u_lower_right, 2 * n_variables * n_nodes * n_nodes * capacity)
-  mortars.u_lower_right = unsafe_wrap(Array, pointer(_u_lower_right),
-                                      (2, n_variables, n_nodes, n_nodes, capacity))
+    resize!(_u_lower_right, 2 * n_variables * n_nodes * n_nodes * capacity)
+    mortars.u_lower_right = unsafe_wrap(Array, pointer(_u_lower_right),
+                                        (2, n_variables, n_nodes, n_nodes, capacity))
 
-  resize!(_neighbor_ids, 5 * capacity)
-  mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
-                                        (5, capacity))
+    resize!(_neighbor_ids, 5 * capacity)
+    mortars.neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+                                       (5, capacity))
 
-  resize!(large_sides, capacity)
+    resize!(large_sides, capacity)
 
-  resize!(orientations, capacity)
+    resize!(orientations, capacity)
 
-  return nothing
+    return nothing
 end
 
+function L2MortarContainer3D{uEltype}(capacity::Integer, n_variables,
+                                      n_nodes) where {uEltype <: Real}
+    nan = convert(uEltype, NaN)
 
-function L2MortarContainer3D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real}
-  nan = convert(uEltype, NaN)
-
-  # Initialize fields with defaults
-  _u_upper_left = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity)
-  u_upper_left = unsafe_wrap(Array, pointer(_u_upper_left),
-                             (2, n_variables, n_nodes, n_nodes, capacity))
+    # Initialize fields with defaults
+    _u_upper_left = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity)
+    u_upper_left = unsafe_wrap(Array, pointer(_u_upper_left),
+                               (2, n_variables, n_nodes, n_nodes, capacity))
 
-  _u_upper_right = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity)
-  u_upper_right = unsafe_wrap(Array, pointer(_u_upper_right),
-                              (2, n_variables, n_nodes, n_nodes, capacity))
+    _u_upper_right = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity)
+    u_upper_right = unsafe_wrap(Array, pointer(_u_upper_right),
+                                (2, n_variables, n_nodes, n_nodes, capacity))
 
-  _u_lower_left = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity)
-  u_lower_left = unsafe_wrap(Array, pointer(_u_lower_left),
-                             (2, n_variables, n_nodes, n_nodes, capacity))
+    _u_lower_left = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity)
+    u_lower_left = unsafe_wrap(Array, pointer(_u_lower_left),
+                               (2, n_variables, n_nodes, n_nodes, capacity))
 
-  _u_lower_right = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity)
-  u_lower_right = unsafe_wrap(Array, pointer(_u_lower_right),
-                              (2, n_variables, n_nodes, n_nodes, capacity))
+    _u_lower_right = fill(nan, 2 * n_variables * n_nodes * n_nodes * capacity)
+    u_lower_right = unsafe_wrap(Array, pointer(_u_lower_right),
+                                (2, n_variables, n_nodes, n_nodes, capacity))
 
-  _neighbor_ids = fill(typemin(Int), 5 * capacity)
-  neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
-                             (5, capacity))
+    _neighbor_ids = fill(typemin(Int), 5 * capacity)
+    neighbor_ids = unsafe_wrap(Array, pointer(_neighbor_ids),
+                               (5, capacity))
 
-  large_sides = fill(typemin(Int), capacity)
+    large_sides = fill(typemin(Int), capacity)
 
-  orientations = fill(typemin(Int), capacity)
+    orientations = fill(typemin(Int), capacity)
 
-  return L2MortarContainer3D{uEltype}(
-    u_upper_left, u_upper_right,
-    u_lower_left, u_lower_right,
-    neighbor_ids, large_sides, orientations,
-    _u_upper_left, _u_upper_right,
-    _u_lower_left, _u_lower_right,
-    _neighbor_ids)
+    return L2MortarContainer3D{uEltype}(u_upper_left, u_upper_right,
+                                        u_lower_left, u_lower_right,
+                                        neighbor_ids, large_sides, orientations,
+                                        _u_upper_left, _u_upper_right,
+                                        _u_lower_left, _u_lower_right,
+                                        _neighbor_ids)
 end
 
-
 # Return number of L2 mortars
 nmortars(l2mortars::L2MortarContainer3D) = length(l2mortars.orientations)
 
-
 # Allow printing container contents
 function Base.show(io::IO, ::MIME"text/plain", c::L2MortarContainer3D)
-  @nospecialize c # reduce precompilation time
-
-  println(io, '*'^20)
-  for idx in CartesianIndices(c.u_upper_left)
-    println(io, "c.u_upper_left[$idx] = $(c.u_upper_left[idx])")
-  end
-  for idx in CartesianIndices(c.u_upper_right)
-    println(io, "c.u_upper_right[$idx] = $(c.u_upper_right[idx])")
-  end
-  for idx in CartesianIndices(c.u_lower_left)
-    println(io, "c.u_lower_left[$idx] = $(c.u_lower_left[idx])")
-  end
-  for idx in CartesianIndices(c.u_lower_right)
-    println(io, "c.u_lower_right[$idx] = $(c.u_lower_right[idx])")
-  end
-  println(io, "transpose(c.neighbor_ids) = $(transpose(c.neighbor_ids))")
-  println(io, "c.large_sides = $(c.large_sides)")
-  println(io, "c.orientations = $(c.orientations)")
-  print(io,   '*'^20)
-end
+    @nospecialize c # reduce precompilation time
 
+    println(io, '*'^20)
+    for idx in CartesianIndices(c.u_upper_left)
+        println(io, "c.u_upper_left[$idx] = $(c.u_upper_left[idx])")
+    end
+    for idx in CartesianIndices(c.u_upper_right)
+        println(io, "c.u_upper_right[$idx] = $(c.u_upper_right[idx])")
+    end
+    for idx in CartesianIndices(c.u_lower_left)
+        println(io, "c.u_lower_left[$idx] = $(c.u_lower_left[idx])")
+    end
+    for idx in CartesianIndices(c.u_lower_right)
+        println(io, "c.u_lower_right[$idx] = $(c.u_lower_right[idx])")
+    end
+    println(io, "transpose(c.neighbor_ids) = $(transpose(c.neighbor_ids))")
+    println(io, "c.large_sides = $(c.large_sides)")
+    println(io, "c.orientations = $(c.orientations)")
+    print(io, '*'^20)
+end
 
 # Create mortar container and initialize mortar data in `elements`.
 function init_mortars(cell_ids, mesh::TreeMesh3D,
                       elements::ElementContainer3D,
                       mortar::LobattoLegendreMortarL2)
-  # Initialize containers
-  n_mortars = count_required_mortars(mesh, cell_ids)
-  mortars = L2MortarContainer3D{eltype(elements)}(
-    n_mortars, nvariables(elements), nnodes(elements))
-
-  # Connect elements with mortars
-  init_mortars!(mortars, elements, mesh)
-  return mortars
+    # Initialize containers
+    n_mortars = count_required_mortars(mesh, cell_ids)
+    mortars = L2MortarContainer3D{eltype(elements)}(n_mortars, nvariables(elements),
+                                                    nnodes(elements))
+
+    # Connect elements with mortars
+    init_mortars!(mortars, elements, mesh)
+    return mortars
 end
 
 # Count the number of mortars that need to be created
 function count_required_mortars(mesh::TreeMesh3D, cell_ids)
-  count = 0
-
-  # Iterate over all cells and count mortars from perspective of coarse cells
-  for cell_id in cell_ids
-    for direction in eachdirection(mesh.tree)
-      # If no neighbor exists, cell is small with large neighbor or at boundary -> do nothing
-      if !has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # If neighbor has no children, this is a conforming interface -> do nothing
-      neighbor_id = mesh.tree.neighbor_ids[direction, cell_id]
-      if !has_children(mesh.tree, neighbor_id)
-        continue
-      end
-
-      count +=1
+    count = 0
+
+    # Iterate over all cells and count mortars from perspective of coarse cells
+    for cell_id in cell_ids
+        for direction in eachdirection(mesh.tree)
+            # If no neighbor exists, cell is small with large neighbor or at boundary -> do nothing
+            if !has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # If neighbor has no children, this is a conforming interface -> do nothing
+            neighbor_id = mesh.tree.neighbor_ids[direction, cell_id]
+            if !has_children(mesh.tree, neighbor_id)
+                continue
+            end
+
+            count += 1
+        end
     end
-  end
 
-  return count
+    return count
 end
 
 # Initialize connectivity between elements and mortars
 function init_mortars!(mortars, elements, mesh::TreeMesh3D)
-  # Construct cell -> element mapping for easier algorithm implementation
-  tree = mesh.tree
-  c2e = zeros(Int, length(tree))
-  for element in eachelement(elements)
-    c2e[elements.cell_ids[element]] = element
-  end
-
-  # Reset interface count
-  count = 0
+    # Construct cell -> element mapping for easier algorithm implementation
+    tree = mesh.tree
+    c2e = zeros(Int, length(tree))
+    for element in eachelement(elements)
+        c2e[elements.cell_ids[element]] = element
+    end
 
-  # Iterate over all elements to find neighbors and to connect via interfaces
-  for element in eachelement(elements)
-    # Get cell id
-    cell_id = elements.cell_ids[element]
+    # Reset interface count
+    count = 0
 
-    for direction in eachdirection(mesh.tree)
-      # If no neighbor exists, cell is small with large neighbor -> do nothing
-      if !has_neighbor(mesh.tree, cell_id, direction)
-        continue
-      end
-
-      # If neighbor has no children, this is a conforming interface -> do nothing
-      neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
-      if !has_children(mesh.tree, neighbor_cell_id)
-        continue
-      end
-
-      # Create mortar between elements (3 possible orientations):
-      #
-      # mortar in x-direction:
-      # 1 -> small element in lower, left position  (-y, -z)
-      # 2 -> small element in lower, right position (+y, -z)
-      # 3 -> small element in upper, left position  (-y, +z)
-      # 4 -> small element in upper, right position (+y, +z)
-      #
-      # mortar in y-direction:
-      # 1 -> small element in lower, left position  (-x, -z)
-      # 2 -> small element in lower, right position (+x, -z)
-      # 3 -> small element in upper, left position  (-x, +z)
-      # 4 -> small element in upper, right position (+x, +z)
-      #
-      # mortar in z-direction:
-      # 1 -> small element in lower, left position  (-x, -y)
-      # 2 -> small element in lower, right position (+x, -y)
-      # 3 -> small element in upper, left position  (-x, +y)
-      # 4 -> small element in upper, right position (+x, +y)
-      #
-      # Always the case:
-      # 5 -> large element
-      #
-      count += 1
-      mortars.neighbor_ids[5, count] = element
-
-      # Directions are from the perspective of the large element
-      # ("Where are the small elements? Ah, in the ... direction!")
-      if direction == 1 # -x
-        mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[2, neighbor_cell_id]]
-        mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4, neighbor_cell_id]]
-        mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[6, neighbor_cell_id]]
-        mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[8, neighbor_cell_id]]
-      elseif direction == 2 # +x
-        mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]]
-        mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[3, neighbor_cell_id]]
-        mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[5, neighbor_cell_id]]
-        mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[7, neighbor_cell_id]]
-      elseif direction == 3 # -y
-        mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[3, neighbor_cell_id]]
-        mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4, neighbor_cell_id]]
-        mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[7, neighbor_cell_id]]
-        mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[8, neighbor_cell_id]]
-      elseif direction == 4 # +y
-        mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]]
-        mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[2, neighbor_cell_id]]
-        mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[5, neighbor_cell_id]]
-        mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[6, neighbor_cell_id]]
-      elseif direction == 5 # -z
-        mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[5, neighbor_cell_id]]
-        mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[6, neighbor_cell_id]]
-        mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[7, neighbor_cell_id]]
-        mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[8, neighbor_cell_id]]
-      elseif direction == 6 # +z
-        mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]]
-        mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[2, neighbor_cell_id]]
-        mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[3, neighbor_cell_id]]
-        mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[4, neighbor_cell_id]]
-      else
-        error("should not happen")
-      end
-
-      # Set large side, which denotes the direction (1 -> negative, 2 -> positive) of the large side
-      if iseven(direction)
-        mortars.large_sides[count] = 1
-      else
-        mortars.large_sides[count] = 2
-      end
-
-      # Set orientation (x -> 1, y -> 2, z -> 3)
-      if direction in (1, 2)
-        mortars.orientations[count] = 1
-      elseif direction in (3, 4)
-        mortars.orientations[count] = 2
-      else
-        mortars.orientations[count] = 3
-      end
+    # Iterate over all elements to find neighbors and to connect via interfaces
+    for element in eachelement(elements)
+        # Get cell id
+        cell_id = elements.cell_ids[element]
+
+        for direction in eachdirection(mesh.tree)
+            # If no neighbor exists, cell is small with large neighbor -> do nothing
+            if !has_neighbor(mesh.tree, cell_id, direction)
+                continue
+            end
+
+            # If neighbor has no children, this is a conforming interface -> do nothing
+            neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
+            if !has_children(mesh.tree, neighbor_cell_id)
+                continue
+            end
+
+            # Create mortar between elements (3 possible orientations):
+            #
+            # mortar in x-direction:
+            # 1 -> small element in lower, left position  (-y, -z)
+            # 2 -> small element in lower, right position (+y, -z)
+            # 3 -> small element in upper, left position  (-y, +z)
+            # 4 -> small element in upper, right position (+y, +z)
+            #
+            # mortar in y-direction:
+            # 1 -> small element in lower, left position  (-x, -z)
+            # 2 -> small element in lower, right position (+x, -z)
+            # 3 -> small element in upper, left position  (-x, +z)
+            # 4 -> small element in upper, right position (+x, +z)
+            #
+            # mortar in z-direction:
+            # 1 -> small element in lower, left position  (-x, -y)
+            # 2 -> small element in lower, right position (+x, -y)
+            # 3 -> small element in upper, left position  (-x, +y)
+            # 4 -> small element in upper, right position (+x, +y)
+            #
+            # Always the case:
+            # 5 -> large element
+            #
+            count += 1
+            mortars.neighbor_ids[5, count] = element
+
+            # Directions are from the perspective of the large element
+            # ("Where are the small elements? Ah, in the ... direction!")
+            if direction == 1 # -x
+                mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[2,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[6,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[8,
+                                                                         neighbor_cell_id]]
+            elseif direction == 2 # +x
+                mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[3,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[5,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[7,
+                                                                         neighbor_cell_id]]
+            elseif direction == 3 # -y
+                mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[3,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[4,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[7,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[8,
+                                                                         neighbor_cell_id]]
+            elseif direction == 4 # +y
+                mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[2,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[5,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[6,
+                                                                         neighbor_cell_id]]
+            elseif direction == 5 # -z
+                mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[5,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[6,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[7,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[8,
+                                                                         neighbor_cell_id]]
+            elseif direction == 6 # +z
+                mortars.neighbor_ids[1, count] = c2e[mesh.tree.child_ids[1,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[2, count] = c2e[mesh.tree.child_ids[2,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[3, count] = c2e[mesh.tree.child_ids[3,
+                                                                         neighbor_cell_id]]
+                mortars.neighbor_ids[4, count] = c2e[mesh.tree.child_ids[4,
+                                                                         neighbor_cell_id]]
+            else
+                error("should not happen")
+            end
+
+            # Set large side, which denotes the direction (1 -> negative, 2 -> positive) of the large side
+            if iseven(direction)
+                mortars.large_sides[count] = 1
+            else
+                mortars.large_sides[count] = 2
+            end
+
+            # Set orientation (x -> 1, y -> 2, z -> 3)
+            if direction in (1, 2)
+                mortars.orientations[count] = 1
+            elseif direction in (3, 4)
+                mortars.orientations[count] = 2
+            else
+                mortars.orientations[count] = 3
+            end
+        end
     end
-  end
 
-  @assert count == nmortars(mortars) ("Actual mortar count ($count) does not match " *
+    @assert count==nmortars(mortars) ("Actual mortar count ($count) does not match "*
                                       "expectations $(nmortars(mortars))")
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg.jl b/src/solvers/dgsem_tree/dg.jl
index 074745f66ca..cb28dad968c 100644
--- a/src/solvers/dgsem_tree/dg.jl
+++ b/src/solvers/dgsem_tree/dg.jl
@@ -3,48 +3,45 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # du .= zero(eltype(du)) doesn't scale when using multiple threads.
 # See https://github.com/trixi-framework/Trixi.jl/pull/924 for a performance comparison.
 function reset_du!(du, dg, cache)
-  @threaded for element in eachelement(dg, cache)
-      du[.., element] .= zero(eltype(du))
-  end
+    @threaded for element in eachelement(dg, cache)
+        du[.., element] .= zero(eltype(du))
+    end
 
-  return du
+    return du
 end
 
-
 #     pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache)
 #
 # Given blending factors `alpha` and the solver `dg`, fill
 # `element_ids_dg` with the IDs of elements using a pure DG scheme and
 # `element_ids_dgfv` with the IDs of elements using a blended DG-FV scheme.
-function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg::DG, cache)
-  empty!(element_ids_dg)
-  empty!(element_ids_dgfv)
-
-  for element in eachelement(dg, cache)
-    # Clip blending factor for values close to zero (-> pure DG)
-    dg_only = isapprox(alpha[element], 0, atol=1e-12)
-    if dg_only
-      push!(element_ids_dg, element)
-    else
-      push!(element_ids_dgfv, element)
+function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg::DG,
+                                       cache)
+    empty!(element_ids_dg)
+    empty!(element_ids_dgfv)
+
+    for element in eachelement(dg, cache)
+        # Clip blending factor for values close to zero (-> pure DG)
+        dg_only = isapprox(alpha[element], 0, atol = 1e-12)
+        if dg_only
+            push!(element_ids_dg, element)
+        else
+            push!(element_ids_dgfv, element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function volume_jacobian(element, mesh::TreeMesh, cache)
-  return inv(cache.elements.inverse_jacobian[element])^ndims(mesh)
+    return inv(cache.elements.inverse_jacobian[element])^ndims(mesh)
 end
 
-
-
 # Indicators used for shock-capturing and AMR
 include("indicators.jl")
 include("indicators_1d.jl")
@@ -74,6 +71,4 @@ include("dg_3d_parabolic.jl")
 # as well as specialized implementations used to improve performance
 include("dg_2d_compressible_euler.jl")
 include("dg_3d_compressible_euler.jl")
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl
index a3346a4f15c..c66f427cce3 100644
--- a/src/solvers/dgsem_tree/dg_1d.jl
+++ b/src/solvers/dgsem_tree/dg_1d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # everything related to a DG semidiscretization in 1D,
 # currently limited to Lobatto-Legendre nodes
@@ -13,216 +13,235 @@
 # the RHS etc.
 function create_cache(mesh::TreeMesh{1}, equations,
                       dg::DG, RealT, uEltype)
-  # Get cells for which an element needs to be created (i.e. all leaf cells)
-  leaf_cell_ids = local_leaf_cells(mesh.tree)
+    # Get cells for which an element needs to be created (i.e. all leaf cells)
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
 
-  elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)
+    elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)
 
-  interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
+    interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
 
-  boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
+    boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
 
-  cache = (; elements, interfaces, boundaries)
+    cache = (; elements, interfaces, boundaries)
 
-  # Add specialized parts of the cache required to compute the volume integral etc.
-  cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
+    # Add specialized parts of the cache required to compute the volume integral etc.
+    cache = (; cache...,
+             create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
 
-  return cache
+    return cache
 end
 
-
 # The methods below are specialized on the volume integral type
 # and called from the basic `create_cache` method at the top.
-function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}}, equations,
+function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}},
+                      equations,
                       volume_integral::VolumeIntegralFluxDifferencing, dg::DG, uEltype)
-  NamedTuple()
+    NamedTuple()
 end
 
-
-function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}}, equations,
+function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}},
+                      equations,
                       volume_integral::VolumeIntegralShockCapturingHG, dg::DG, uEltype)
-  element_ids_dg   = Int[]
-  element_ids_dgfv = Int[]
+    element_ids_dg = Int[]
+    element_ids_dgfv = Int[]
 
-  cache = create_cache(mesh, equations,
-                       VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
-                       dg, uEltype)
+    cache = create_cache(mesh, equations,
+                         VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
+                         dg, uEltype)
 
-  A2dp1_x = Array{uEltype, 2}
-  fstar1_L_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg)+1) for _ in 1:Threads.nthreads()]
-  fstar1_R_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg)+1) for _ in 1:Threads.nthreads()]
+    A2dp1_x = Array{uEltype, 2}
+    fstar1_L_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1)
+                                for _ in 1:Threads.nthreads()]
+    fstar1_R_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1)
+                                for _ in 1:Threads.nthreads()]
 
-  return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded, fstar1_R_threaded)
+    return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded,
+            fstar1_R_threaded)
 end
 
-
-function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}}, equations,
-                      volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG, uEltype)
-
-  A2dp1_x = Array{uEltype, 2}
-  fstar1_L_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg)+1) for _ in 1:Threads.nthreads()]
-  fstar1_R_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg)+1) for _ in 1:Threads.nthreads()]
-
-  return (; fstar1_L_threaded, fstar1_R_threaded)
+function create_cache(mesh::Union{TreeMesh{1}, StructuredMesh{1}, P4estMesh{1}},
+                      equations,
+                      volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG,
+                      uEltype)
+    A2dp1_x = Array{uEltype, 2}
+    fstar1_L_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1)
+                                for _ in 1:Threads.nthreads()]
+    fstar1_R_threaded = A2dp1_x[A2dp1_x(undef, nvariables(equations), nnodes(dg) + 1)
+                                for _ in 1:Threads.nthreads()]
+
+    return (; fstar1_L_threaded, fstar1_R_threaded)
 end
 
-
 # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer?
 
 function rhs!(du, u, t,
               mesh::TreeMesh{1}, equations,
               initial_condition, boundary_conditions, source_terms::Source,
               dg::DG, cache) where {Source}
-  # Reset du
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
-
-  # Prolong solution to interfaces
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.surface_integral, dg, cache)
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, t, boundary_conditions, mesh,
-    equations, dg.surface_integral, dg)
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations, dg.surface_integral, dg, cache)
-
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian!(
-    du, mesh, equations, dg, cache)
-
-  # Calculate source terms
-  @trixi_timeit timer() "source terms" calc_sources!(
-    du, u, t, source_terms, equations, dg, cache)
-
-  return nothing
-end
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, u, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.volume_integral, dg, cache)
+    end
+
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache, u, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache.elements.surface_flux_values, mesh,
+                             have_nonconservative_terms(equations), equations,
+                             dg.surface_integral, dg, cache)
+    end
+
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache, u, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations,
+                               dg.surface_integral, dg, cache)
+    end
+
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache)
 
+    # Calculate source terms
+    @trixi_timeit timer() "source terms" begin
+        calc_sources!(du, u, t, source_terms, equations, dg, cache)
+    end
+
+    return nothing
+end
 
 function calc_volume_integral!(du, u,
                                mesh::Union{TreeMesh{1}, StructuredMesh{1}},
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralWeakForm,
                                dg::DGSEM, cache)
+    @threaded for element in eachelement(dg, cache)
+        weak_form_kernel!(du, u, element, mesh,
+                          nonconservative_terms, equations,
+                          dg, cache)
+    end
 
-  @threaded for element in eachelement(dg, cache)
-    weak_form_kernel!(du, u, element, mesh,
-                      nonconservative_terms, equations,
-                      dg, cache)
-  end
-
-  return nothing
+    return nothing
 end
 
 @inline function weak_form_kernel!(du, u,
                                    element, mesh::Union{TreeMesh{1}, StructuredMesh{1}},
                                    nonconservative_terms::False, equations,
-                                   dg::DGSEM, cache, alpha=true)
-  # true * [some floating point value] == [exactly the same floating point value]
-  # This can (hopefully) be optimized away due to constant propagation.
-  @unpack derivative_dhat = dg.basis
+                                   dg::DGSEM, cache, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    @unpack derivative_dhat = dg.basis
 
-  for i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, element)
+    for i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, element)
 
-    flux1 = flux(u_node, 1, equations)
-    for ii in eachnode(dg)
-      multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1, equations, dg, ii, element)
+        flux1 = flux(u_node, 1, equations)
+        for ii in eachnode(dg)
+            multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1,
+                                       equations, dg, ii, element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_volume_integral!(du, u,
                                mesh::Union{TreeMesh{1}, StructuredMesh{1}},
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralFluxDifferencing,
                                dg::DGSEM, cache)
-  @threaded for element in eachelement(dg, cache)
-    flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, equations,
-                              volume_integral.volume_flux, dg, cache)
-  end
+    @threaded for element in eachelement(dg, cache)
+        flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms,
+                                  equations,
+                                  volume_integral.volume_flux, dg, cache)
+    end
 end
 
 @inline function flux_differencing_kernel!(du, u,
-                                           element, mesh::Union{TreeMesh{1}, StructuredMesh{1}},
+                                           element,
+                                           mesh::Union{TreeMesh{1}, StructuredMesh{1}},
                                            nonconservative_terms::False, equations,
-                                           volume_flux, dg::DGSEM, cache, alpha=true)
-  # true * [some floating point value] == [exactly the same floating point value]
-  # This can (hopefully) be optimized away due to constant propagation.
-  @unpack derivative_split = dg.basis
-
-  # Calculate volume integral in one element
-  for i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, element)
-
-    # All diagonal entries of `derivative_split` are zero. Thus, we can skip
-    # the computation of the diagonal terms. In addition, we use the symmetry
-    # of the `volume_flux` to save half of the possible two-point flux
-    # computations.
-
-    # x direction
-    for ii in (i+1):nnodes(dg)
-      u_node_ii = get_node_vars(u, equations, dg, ii, element)
-      flux1 = volume_flux(u_node, u_node_ii, 1, equations)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], flux1, equations, dg, i,  element)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], flux1, equations, dg, ii, element)
+                                           volume_flux, dg::DGSEM, cache, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    @unpack derivative_split = dg.basis
+
+    # Calculate volume integral in one element
+    for i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, element)
+
+        # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+        # the computation of the diagonal terms. In addition, we use the symmetry
+        # of the `volume_flux` to save half of the possible two-point flux
+        # computations.
+
+        # x direction
+        for ii in (i + 1):nnodes(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, element)
+            flux1 = volume_flux(u_node, u_node_ii, 1, equations)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], flux1,
+                                       equations, dg, i, element)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], flux1,
+                                       equations, dg, ii, element)
+        end
     end
-  end
 end
 
 @inline function flux_differencing_kernel!(du, u,
-                                           element, mesh::Union{TreeMesh{1}, StructuredMesh{1}},
+                                           element,
+                                           mesh::Union{TreeMesh{1}, StructuredMesh{1}},
                                            nonconservative_terms::True, equations,
-                                           volume_flux, dg::DGSEM, cache, alpha=true)
-  # true * [some floating point value] == [exactly the same floating point value]
-  # This can (hopefully) be optimized away due to constant propagation.
-  @unpack derivative_split = dg.basis
-  symmetric_flux, nonconservative_flux = volume_flux
-
-  # Apply the symmetric flux as usual
-  flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, dg, cache, alpha)
-
-  # Calculate the remaining volume terms using the nonsymmetric generalized flux
-  for i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, element)
-
-    # The diagonal terms are zero since the diagonal of `derivative_split`
-    # is zero. We ignore this for now.
-
-    # x direction
-    integral_contribution = zero(u_node)
-    for ii in eachnode(dg)
-      u_node_ii = get_node_vars(u, equations, dg, ii, element)
-      noncons_flux1 = nonconservative_flux(u_node, u_node_ii, 1, equations)
-      integral_contribution = integral_contribution + derivative_split[i, ii] * noncons_flux1
-    end
+                                           volume_flux, dg::DGSEM, cache, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    @unpack derivative_split = dg.basis
+    symmetric_flux, nonconservative_flux = volume_flux
 
-    # The factor 0.5 cancels the factor 2 in the flux differencing form
-    multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, dg, i, element)
-  end
-end
+    # Apply the symmetric flux as usual
+    flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux,
+                              dg, cache, alpha)
 
+    # Calculate the remaining volume terms using the nonsymmetric generalized flux
+    for i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, element)
+
+        # The diagonal terms are zero since the diagonal of `derivative_split`
+        # is zero. We ignore this for now.
+
+        # x direction
+        integral_contribution = zero(u_node)
+        for ii in eachnode(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, element)
+            noncons_flux1 = nonconservative_flux(u_node, u_node_ii, 1, equations)
+            integral_contribution = integral_contribution +
+                                    derivative_split[i, ii] * noncons_flux1
+        end
+
+        # The factor 0.5 cancels the factor 2 in the flux differencing form
+        multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations,
+                                   dg, i, element)
+    end
+end
 
 # TODO: Taal dimension agnostic
 function calc_volume_integral!(du, u,
@@ -230,37 +249,40 @@ function calc_volume_integral!(du, u,
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralShockCapturingHG,
                                dg::DGSEM, cache)
-  @unpack element_ids_dg, element_ids_dgfv = cache
-  @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral
-
-  # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α
-  alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache)
-
-  # Determine element ids for DG-only and blended DG-FV volume integral
-  pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache)
-
-  # Loop over pure DG elements
-  @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg)
-    element = element_ids_dg[idx_element]
-    flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, equations,
-                              volume_flux_dg, dg, cache)
-  end
+    @unpack element_ids_dg, element_ids_dgfv = cache
+    @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral
+
+    # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α
+    alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg,
+                                                               cache)
+
+    # Determine element ids for DG-only and blended DG-FV volume integral
+    pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache)
+
+    # Loop over pure DG elements
+    @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg)
+        element = element_ids_dg[idx_element]
+        flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms,
+                                  equations,
+                                  volume_flux_dg, dg, cache)
+    end
 
-  # Loop over blended DG-FV elements
-  @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv)
-    element = element_ids_dgfv[idx_element]
-    alpha_element = alpha[element]
+    # Loop over blended DG-FV elements
+    @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv)
+        element = element_ids_dgfv[idx_element]
+        alpha_element = alpha[element]
 
-    # Calculate DG volume integral contribution
-    flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms, equations,
-                              volume_flux_dg, dg, cache, 1 - alpha_element)
+        # Calculate DG volume integral contribution
+        flux_differencing_kernel!(du, u, element, mesh, nonconservative_terms,
+                                  equations,
+                                  volume_flux_dg, dg, cache, 1 - alpha_element)
 
-    # Calculate FV volume integral contribution
-    fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv,
-               dg, cache, element, alpha_element)
-  end
+        # Calculate FV volume integral contribution
+        fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv,
+                   dg, cache, element, alpha_element)
+    end
 
-  return nothing
+    return nothing
 end
 
 # TODO: Taal dimension agnostic
@@ -269,369 +291,367 @@ function calc_volume_integral!(du, u,
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralPureLGLFiniteVolume,
                                dg::DGSEM, cache)
-  @unpack volume_flux_fv = volume_integral
+    @unpack volume_flux_fv = volume_integral
 
-  # Calculate LGL FV volume integral
-  @threaded for element in eachelement(dg, cache)
-    fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv,
-               dg, cache, element, true)
-  end
+    # Calculate LGL FV volume integral
+    @threaded for element in eachelement(dg, cache)
+        fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv,
+                   dg, cache, element, true)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 @inline function fv_kernel!(du, u,
                             mesh::Union{TreeMesh{1}, StructuredMesh{1}},
                             nonconservative_terms, equations,
-                            volume_flux_fv, dg::DGSEM, cache, element, alpha=true)
-  @unpack fstar1_L_threaded, fstar1_R_threaded = cache
-  @unpack inverse_weights = dg.basis
-
-  # Calculate FV two-point fluxes
-  fstar1_L = fstar1_L_threaded[Threads.threadid()]
-  fstar1_R = fstar1_R_threaded[Threads.threadid()]
-  calcflux_fv!(fstar1_L, fstar1_R, u, mesh, nonconservative_terms, equations, volume_flux_fv,
-               dg, element, cache)
-
-  # Calculate FV volume integral contribution
-  for i in eachnode(dg)
-    for v in eachvariable(equations)
-      du[v, i, element] += ( alpha *
-                             (inverse_weights[i] * (fstar1_L[v, i+1] - fstar1_R[v, i])) )
+                            volume_flux_fv, dg::DGSEM, cache, element, alpha = true)
+    @unpack fstar1_L_threaded, fstar1_R_threaded = cache
+    @unpack inverse_weights = dg.basis
 
+    # Calculate FV two-point fluxes
+    fstar1_L = fstar1_L_threaded[Threads.threadid()]
+    fstar1_R = fstar1_R_threaded[Threads.threadid()]
+    calcflux_fv!(fstar1_L, fstar1_R, u, mesh, nonconservative_terms, equations,
+                 volume_flux_fv,
+                 dg, element, cache)
+
+    # Calculate FV volume integral contribution
+    for i in eachnode(dg)
+        for v in eachvariable(equations)
+            du[v, i, element] += (alpha *
+                                  (inverse_weights[i] *
+                                   (fstar1_L[v, i + 1] - fstar1_R[v, i])))
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
-@inline function calcflux_fv!(fstar1_L, fstar1_R, u::AbstractArray{<:Any,3},
+@inline function calcflux_fv!(fstar1_L, fstar1_R, u::AbstractArray{<:Any, 3},
                               mesh::Union{TreeMesh{1}, StructuredMesh{1}},
                               nonconservative_terms::False,
                               equations, volume_flux_fv, dg::DGSEM, element, cache)
+    fstar1_L[:, 1] .= zero(eltype(fstar1_L))
+    fstar1_L[:, nnodes(dg) + 1] .= zero(eltype(fstar1_L))
+    fstar1_R[:, 1] .= zero(eltype(fstar1_R))
+    fstar1_R[:, nnodes(dg) + 1] .= zero(eltype(fstar1_R))
+
+    for i in 2:nnodes(dg)
+        u_ll = get_node_vars(u, equations, dg, i - 1, element)
+        u_rr = get_node_vars(u, equations, dg, i, element)
+        flux = volume_flux_fv(u_ll, u_rr, 1, equations) # orientation 1: x direction
+        set_node_vars!(fstar1_L, flux, equations, dg, i)
+        set_node_vars!(fstar1_R, flux, equations, dg, i)
+    end
 
-  fstar1_L[:, 1           ] .= zero(eltype(fstar1_L))
-  fstar1_L[:, nnodes(dg)+1] .= zero(eltype(fstar1_L))
-  fstar1_R[:, 1           ] .= zero(eltype(fstar1_R))
-  fstar1_R[:, nnodes(dg)+1] .= zero(eltype(fstar1_R))
-
-  for i in 2:nnodes(dg)
-    u_ll = get_node_vars(u, equations, dg, i-1, element)
-    u_rr = get_node_vars(u, equations, dg, i  , element)
-    flux = volume_flux_fv(u_ll, u_rr, 1, equations) # orientation 1: x direction
-    set_node_vars!(fstar1_L, flux, equations, dg, i)
-    set_node_vars!(fstar1_R, flux, equations, dg, i)
-  end
-
-  return nothing
+    return nothing
 end
 
-
-@inline function calcflux_fv!(fstar1_L, fstar1_R, u::AbstractArray{<:Any,3},
+@inline function calcflux_fv!(fstar1_L, fstar1_R, u::AbstractArray{<:Any, 3},
                               mesh::TreeMesh{1},
                               nonconservative_terms::True,
                               equations, volume_flux_fv, dg::DGSEM, element, cache)
-  volume_flux, nonconservative_flux = volume_flux_fv
-
-  fstar1_L[:, 1           ] .= zero(eltype(fstar1_L))
-  fstar1_L[:, nnodes(dg)+1] .= zero(eltype(fstar1_L))
-  fstar1_R[:, 1           ] .= zero(eltype(fstar1_R))
-  fstar1_R[:, nnodes(dg)+1] .= zero(eltype(fstar1_R))
-
-  for i in 2:nnodes(dg)
-    u_ll = get_node_vars(u, equations, dg, i-1, element)
-    u_rr = get_node_vars(u, equations, dg, i  , element)
-
-    # Compute conservative part
-    f1 = volume_flux(u_ll, u_rr, 1, equations) # orientation 1: x direction
-
-    # Compute nonconservative part
-    # Note the factor 0.5 necessary for the nonconservative fluxes based on
-    # the interpretation of global SBP operators coupled discontinuously via
-    # central fluxes/SATs
-    f1_L = f1 + 0.5 * nonconservative_flux(u_ll, u_rr, 1, equations)
-    f1_R = f1 + 0.5 * nonconservative_flux(u_rr, u_ll, 1, equations)
-
-    # Copy to temporary storage
-    set_node_vars!(fstar1_L, f1_L, equations, dg, i)
-    set_node_vars!(fstar1_R, f1_R, equations, dg, i)
-  end
+    volume_flux, nonconservative_flux = volume_flux_fv
+
+    fstar1_L[:, 1] .= zero(eltype(fstar1_L))
+    fstar1_L[:, nnodes(dg) + 1] .= zero(eltype(fstar1_L))
+    fstar1_R[:, 1] .= zero(eltype(fstar1_R))
+    fstar1_R[:, nnodes(dg) + 1] .= zero(eltype(fstar1_R))
+
+    for i in 2:nnodes(dg)
+        u_ll = get_node_vars(u, equations, dg, i - 1, element)
+        u_rr = get_node_vars(u, equations, dg, i, element)
+
+        # Compute conservative part
+        f1 = volume_flux(u_ll, u_rr, 1, equations) # orientation 1: x direction
+
+        # Compute nonconservative part
+        # Note the factor 0.5 necessary for the nonconservative fluxes based on
+        # the interpretation of global SBP operators coupled discontinuously via
+        # central fluxes/SATs
+        f1_L = f1 + 0.5 * nonconservative_flux(u_ll, u_rr, 1, equations)
+        f1_R = f1 + 0.5 * nonconservative_flux(u_rr, u_ll, 1, equations)
+
+        # Copy to temporary storage
+        set_node_vars!(fstar1_L, f1_L, equations, dg, i)
+        set_node_vars!(fstar1_R, f1_R, equations, dg, i)
+    end
 
-   return nothing
+    return nothing
 end
 
-
 # We pass the `surface_integral` argument solely for dispatch
 function prolong2interfaces!(cache, u,
                              mesh::TreeMesh{1}, equations, surface_integral, dg::DG)
-  @unpack interfaces = cache
+    @unpack interfaces = cache
 
-  @threaded for interface in eachinterface(dg, cache)
-    left_element  = interfaces.neighbor_ids[1, interface]
-    right_element = interfaces.neighbor_ids[2, interface]
+    @threaded for interface in eachinterface(dg, cache)
+        left_element = interfaces.neighbor_ids[1, interface]
+        right_element = interfaces.neighbor_ids[2, interface]
 
-    # interface in x-direction
-    for v in eachvariable(equations)
-      interfaces.u[1, v, interface] = u[v, nnodes(dg), left_element]
-      interfaces.u[2, v, interface] = u[v,          1, right_element]
+        # interface in x-direction
+        for v in eachvariable(equations)
+            interfaces.u[1, v, interface] = u[v, nnodes(dg), left_element]
+            interfaces.u[2, v, interface] = u[v, 1, right_element]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 function calc_interface_flux!(surface_flux_values,
                               mesh::TreeMesh{1},
                               nonconservative_terms::False, equations,
                               surface_integral, dg::DG, cache)
-  @unpack surface_flux = surface_integral
-  @unpack u, neighbor_ids, orientations = cache.interfaces
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Get neighboring elements
-    left_id  = neighbor_ids[1, interface]
-    right_id = neighbor_ids[2, interface]
-
-    # Determine interface direction with respect to elements:
-    # orientation = 1: left -> 2, right -> 1
-    left_direction  = 2 * orientations[interface]
-    right_direction = 2 * orientations[interface] - 1
-
-    # Call pointwise Riemann solver
-    u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface)
-    flux = surface_flux(u_ll, u_rr, orientations[interface], equations)
-
-    # Copy flux to left and right element storage
-    for v in eachvariable(equations)
-      surface_flux_values[v, left_direction,  left_id]  = flux[v]
-      surface_flux_values[v, right_direction, right_id] = flux[v]
+    @unpack surface_flux = surface_integral
+    @unpack u, neighbor_ids, orientations = cache.interfaces
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Get neighboring elements
+        left_id = neighbor_ids[1, interface]
+        right_id = neighbor_ids[2, interface]
+
+        # Determine interface direction with respect to elements:
+        # orientation = 1: left -> 2, right -> 1
+        left_direction = 2 * orientations[interface]
+        right_direction = 2 * orientations[interface] - 1
+
+        # Call pointwise Riemann solver
+        u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface)
+        flux = surface_flux(u_ll, u_rr, orientations[interface], equations)
+
+        # Copy flux to left and right element storage
+        for v in eachvariable(equations)
+            surface_flux_values[v, left_direction, left_id] = flux[v]
+            surface_flux_values[v, right_direction, right_id] = flux[v]
+        end
     end
-  end
 end
 
 function calc_interface_flux!(surface_flux_values,
                               mesh::TreeMesh{1},
                               nonconservative_terms::True, equations,
                               surface_integral, dg::DG, cache)
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-  @unpack u, neighbor_ids, orientations = cache.interfaces
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Get neighboring elements
-    left_id  = neighbor_ids[1, interface]
-    right_id = neighbor_ids[2, interface]
-
-    # Determine interface direction with respect to elements:
-    # orientation = 1: left -> 2, right -> 1
-    # orientation = 2: left -> 4, right -> 3
-    left_direction  = 2 * orientations[interface]
-    right_direction = 2 * orientations[interface] - 1
-
-    # Call pointwise Riemann solver
-    orientation = orientations[interface]
-    u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface)
-    flux = surface_flux(u_ll, u_rr, orientation, equations)
-
-    # Compute both nonconservative fluxes
-    noncons_left  = nonconservative_flux(u_ll, u_rr, orientation, equations)
-    noncons_right = nonconservative_flux(u_rr, u_ll, orientation, equations)
-
-    # Copy flux to left and right element storage
-    for v in eachvariable(equations)
-      # Note the factor 0.5 necessary for the nonconservative fluxes based on
-      # the interpretation of global SBP operators coupled discontinuously via
-      # central fluxes/SATs
-      surface_flux_values[v, left_direction,  left_id]  = flux[v] + 0.5 * noncons_left[v]
-      surface_flux_values[v, right_direction, right_id] = flux[v] + 0.5 * noncons_right[v]
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack u, neighbor_ids, orientations = cache.interfaces
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Get neighboring elements
+        left_id = neighbor_ids[1, interface]
+        right_id = neighbor_ids[2, interface]
+
+        # Determine interface direction with respect to elements:
+        # orientation = 1: left -> 2, right -> 1
+        # orientation = 2: left -> 4, right -> 3
+        left_direction = 2 * orientations[interface]
+        right_direction = 2 * orientations[interface] - 1
+
+        # Call pointwise Riemann solver
+        orientation = orientations[interface]
+        u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface)
+        flux = surface_flux(u_ll, u_rr, orientation, equations)
+
+        # Compute both nonconservative fluxes
+        noncons_left = nonconservative_flux(u_ll, u_rr, orientation, equations)
+        noncons_right = nonconservative_flux(u_rr, u_ll, orientation, equations)
+
+        # Copy flux to left and right element storage
+        for v in eachvariable(equations)
+            # Note the factor 0.5 necessary for the nonconservative fluxes based on
+            # the interpretation of global SBP operators coupled discontinuously via
+            # central fluxes/SATs
+            surface_flux_values[v, left_direction, left_id] = flux[v] +
+                                                              0.5 * noncons_left[v]
+            surface_flux_values[v, right_direction, right_id] = flux[v] +
+                                                                0.5 * noncons_right[v]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function prolong2boundaries!(cache, u,
                              mesh::TreeMesh{1}, equations, surface_integral, dg::DG)
-  @unpack boundaries = cache
-  @unpack neighbor_sides = boundaries
-
-  @threaded for boundary in eachboundary(dg, cache)
-    element = boundaries.neighbor_ids[boundary]
-
-    # boundary in x-direction
-    if neighbor_sides[boundary] == 1
-      # element in -x direction of boundary
-      for v in eachvariable(equations)
-        boundaries.u[1, v, boundary] = u[v, nnodes(dg), element]
-      end
-    else # Element in +x direction of boundary
-      for v in eachvariable(equations)
-        boundaries.u[2, v, boundary] = u[v, 1,          element]
-      end
+    @unpack boundaries = cache
+    @unpack neighbor_sides = boundaries
+
+    @threaded for boundary in eachboundary(dg, cache)
+        element = boundaries.neighbor_ids[boundary]
+
+        # boundary in x-direction
+        if neighbor_sides[boundary] == 1
+            # element in -x direction of boundary
+            for v in eachvariable(equations)
+                boundaries.u[1, v, boundary] = u[v, nnodes(dg), element]
+            end
+        else # Element in +x direction of boundary
+            for v in eachvariable(equations)
+                boundaries.u[2, v, boundary] = u[v, 1, element]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 # TODO: Taal dimension agnostic
 function calc_boundary_flux!(cache, t, boundary_condition::BoundaryConditionPeriodic,
                              mesh::TreeMesh{1}, equations, surface_integral, dg::DG)
-  @assert isempty(eachboundary(dg, cache))
+    @assert isempty(eachboundary(dg, cache))
 end
 
-
 function calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple,
                              mesh::TreeMesh{1}, equations, surface_integral, dg::DG)
-  @unpack surface_flux_values = cache.elements
-  @unpack n_boundaries_per_direction = cache.boundaries
-
-  # Calculate indices
-  lasts = accumulate(+, n_boundaries_per_direction)
-  firsts = lasts - n_boundaries_per_direction .+ 1
-
-  # Calc boundary fluxes in each direction
-  calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[1],
-                                   have_nonconservative_terms(equations), equations, surface_integral, dg, cache,
-                                   1, firsts[1], lasts[1])
-  calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[2],
-                                   have_nonconservative_terms(equations), equations, surface_integral, dg, cache,
-                                   2, firsts[2], lasts[2])
+    @unpack surface_flux_values = cache.elements
+    @unpack n_boundaries_per_direction = cache.boundaries
+
+    # Calculate indices
+    lasts = accumulate(+, n_boundaries_per_direction)
+    firsts = lasts - n_boundaries_per_direction .+ 1
+
+    # Calc boundary fluxes in each direction
+    calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[1],
+                                     have_nonconservative_terms(equations), equations,
+                                     surface_integral, dg, cache,
+                                     1, firsts[1], lasts[1])
+    calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[2],
+                                     have_nonconservative_terms(equations), equations,
+                                     surface_integral, dg, cache,
+                                     2, firsts[2], lasts[2])
 end
 
-
-function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any,3}, t,
-                                          boundary_condition, nonconservative_terms::False, equations,
+function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any, 3},
+                                          t,
+                                          boundary_condition,
+                                          nonconservative_terms::False, equations,
                                           surface_integral, dg::DG, cache,
                                           direction, first_boundary, last_boundary)
-
-  @unpack surface_flux = surface_integral
-  @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
-
-  @threaded for boundary in first_boundary:last_boundary
-    # Get neighboring element
-    neighbor = neighbor_ids[boundary]
-
-    # Get boundary flux
-    u_ll, u_rr = get_surface_node_vars(u, equations, dg, boundary)
-    if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
-      u_inner = u_ll
-    else # Element is on the right, boundary on the left
-      u_inner = u_rr
-    end
-    x = get_node_coords(node_coordinates, equations, dg, boundary)
-    flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, surface_flux,
-                              equations)
-
-    # Copy flux to left and right element storage
-    for v in eachvariable(equations)
-      surface_flux_values[v, direction, neighbor] = flux[v]
+    @unpack surface_flux = surface_integral
+    @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
+
+    @threaded for boundary in first_boundary:last_boundary
+        # Get neighboring element
+        neighbor = neighbor_ids[boundary]
+
+        # Get boundary flux
+        u_ll, u_rr = get_surface_node_vars(u, equations, dg, boundary)
+        if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
+            u_inner = u_ll
+        else # Element is on the right, boundary on the left
+            u_inner = u_rr
+        end
+        x = get_node_coords(node_coordinates, equations, dg, boundary)
+        flux = boundary_condition(u_inner, orientations[boundary], direction, x, t,
+                                  surface_flux,
+                                  equations)
+
+        # Copy flux to left and right element storage
+        for v in eachvariable(equations)
+            surface_flux_values[v, direction, neighbor] = flux[v]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any,3}, t,
-                                          boundary_condition, nonconservative_terms::True, equations,
+function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any, 3},
+                                          t,
+                                          boundary_condition,
+                                          nonconservative_terms::True, equations,
                                           surface_integral, dg::DG, cache,
                                           direction, first_boundary, last_boundary)
-
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-  @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
-
-  @threaded for boundary in first_boundary:last_boundary
-    # Get neighboring element
-    neighbor = neighbor_ids[boundary]
-
-    # Get boundary flux
-    u_ll, u_rr = get_surface_node_vars(u, equations, dg, boundary)
-    if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
-      u_inner = u_ll
-    else # Element is on the right, boundary on the left
-      u_inner = u_rr
-    end
-    x = get_node_coords(node_coordinates, equations, dg, boundary)
-    flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, surface_flux,
-                              equations)
-    noncons_flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, nonconservative_flux,
-                              equations)
-
-    # Copy flux to left and right element storage
-    for v in eachvariable(equations)
-      surface_flux_values[v, direction, neighbor] = flux[v] + 0.5 * noncons_flux[v]
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
+
+    @threaded for boundary in first_boundary:last_boundary
+        # Get neighboring element
+        neighbor = neighbor_ids[boundary]
+
+        # Get boundary flux
+        u_ll, u_rr = get_surface_node_vars(u, equations, dg, boundary)
+        if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
+            u_inner = u_ll
+        else # Element is on the right, boundary on the left
+            u_inner = u_rr
+        end
+        x = get_node_coords(node_coordinates, equations, dg, boundary)
+        flux = boundary_condition(u_inner, orientations[boundary], direction, x, t,
+                                  surface_flux,
+                                  equations)
+        noncons_flux = boundary_condition(u_inner, orientations[boundary], direction, x,
+                                          t, nonconservative_flux,
+                                          equations)
+
+        # Copy flux to left and right element storage
+        for v in eachvariable(equations)
+            surface_flux_values[v, direction, neighbor] = flux[v] +
+                                                          0.5 * noncons_flux[v]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 function calc_surface_integral!(du, u, mesh::Union{TreeMesh{1}, StructuredMesh{1}},
                                 equations, surface_integral, dg::DGSEM, cache)
-  @unpack boundary_interpolation = dg.basis
-  @unpack surface_flux_values = cache.elements
-
-  # Note that all fluxes have been computed with outward-pointing normal vectors.
-  # Access the factors only once before beginning the loop to increase performance.
-  # We also use explicit assignments instead of `+=` to let `@muladd` turn these
-  # into FMAs (see comment at the top of the file).
-  factor_1 = boundary_interpolation[1,          1]
-  factor_2 = boundary_interpolation[nnodes(dg), 2]
-  @threaded for element in eachelement(dg, cache)
-    for v in eachvariable(equations)
-      # surface at -x
-      du[v, 1,          element] = (
-        du[v, 1,          element] - surface_flux_values[v, 1, element] * factor_1)
-
-      # surface at +x
-      du[v, nnodes(dg), element] = (
-        du[v, nnodes(dg), element] + surface_flux_values[v, 2, element] * factor_2)
+    @unpack boundary_interpolation = dg.basis
+    @unpack surface_flux_values = cache.elements
+
+    # Note that all fluxes have been computed with outward-pointing normal vectors.
+    # Access the factors only once before beginning the loop to increase performance.
+    # We also use explicit assignments instead of `+=` to let `@muladd` turn these
+    # into FMAs (see comment at the top of the file).
+    factor_1 = boundary_interpolation[1, 1]
+    factor_2 = boundary_interpolation[nnodes(dg), 2]
+    @threaded for element in eachelement(dg, cache)
+        for v in eachvariable(equations)
+            # surface at -x
+            du[v, 1, element] = (du[v, 1, element] -
+                                 surface_flux_values[v, 1, element] * factor_1)
+
+            # surface at +x
+            du[v, nnodes(dg), element] = (du[v, nnodes(dg), element] +
+                                          surface_flux_values[v, 2, element] * factor_2)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function apply_jacobian!(du, mesh::Union{TreeMesh{1}, StructuredMesh{1}},
                          equations, dg::DG, cache)
-
-  @threaded for element in eachelement(dg, cache)
-    factor = -cache.elements.inverse_jacobian[element]
-
-    for i in eachnode(dg)
-      for v in eachvariable(equations)
-        du[v, i, element] *= factor
-      end
+    @threaded for element in eachelement(dg, cache)
+        factor = -cache.elements.inverse_jacobian[element]
+
+        for i in eachnode(dg)
+            for v in eachvariable(equations)
+                du[v, i, element] *= factor
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # TODO: Taal dimension agnostic
 function calc_sources!(du, u, t, source_terms::Nothing,
                        equations::AbstractEquations{1}, dg::DG, cache)
-  return nothing
+    return nothing
 end
 
 function calc_sources!(du, u, t, source_terms,
                        equations::AbstractEquations{1}, dg::DG, cache)
-
-  @threaded for element in eachelement(dg, cache)
-    for i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, element)
-      x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i, element)
-      du_local = source_terms(u_local, x_local, t, equations)
-      add_to_node_vars!(du, du_local, equations, dg, i, element)
+    @threaded for element in eachelement(dg, cache)
+        for i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, element)
+            x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i,
+                                      element)
+            du_local = source_terms(u_local, x_local, t, equations)
+            add_to_node_vars!(du, du_local, equations, dg, i, element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_1d_parabolic.jl b/src/solvers/dgsem_tree/dg_1d_parabolic.jl
index 1bec34568d8..c2aa75388c8 100644
--- a/src/solvers/dgsem_tree/dg_1d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_1d_parabolic.jl
@@ -3,6 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 # This file collects all methods that have been updated to work with parabolic systems of equations
 #
@@ -12,71 +13,90 @@
 #               2. compute f(u, grad(u))
 #               3. compute div(f(u, grad(u))) (i.e., the "regular" rhs! call)
 # boundary conditions will be applied to both grad(u) and div(f(u, grad(u))).
-function rhs_parabolic!(du, u, t, mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic,
+function rhs_parabolic!(du, u, t, mesh::TreeMesh{1},
+                        equations_parabolic::AbstractEquationsParabolic,
                         initial_condition, boundary_conditions_parabolic, source_terms,
                         dg::DG, parabolic_scheme, cache, cache_parabolic)
-  @unpack u_transformed, gradients, flux_viscous = cache_parabolic
-
-  # Convert conservative variables to a form more suitable for viscous flux calculations
-  @trixi_timeit timer() "transform variables" transform_variables!(
-    u_transformed, u, mesh, equations_parabolic, dg, parabolic_scheme, cache, cache_parabolic)
-
-  # Compute the gradients of the transformed variables
-  @trixi_timeit timer() "calculate gradient" calc_gradient!(
-    gradients, u_transformed, t, mesh, equations_parabolic, boundary_conditions_parabolic, dg,
-    cache, cache_parabolic)
-
-  # Compute and store the viscous fluxes
-  @trixi_timeit timer() "calculate viscous fluxes" calc_viscous_fluxes!(
-    flux_viscous, gradients, u_transformed, mesh, equations_parabolic, dg, cache, cache_parabolic)
-
-  # The remainder of this function is essentially a regular rhs! for parabolic equations (i.e., it
-  # computes the divergence of the viscous fluxes)
-  #
-  # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have
-  # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the
-  # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the
-  # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it
-  # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values*
-  # and *not the solution*.  The advantage is that a) we do not need to allocate more storage, b) we
-  # do not need to recreate the existing data structure only with a different name, and c) we do not
-  # need to interpolate solutions *and* gradients to the surfaces.
-
-  # TODO: parabolic; reconsider current data structure reuse strategy
-
-  # Reset du
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, flux_viscous, mesh, equations_parabolic, dg, cache)
-
-  # Prolong solution to interfaces
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache_parabolic, flux_viscous, mesh, equations_parabolic, dg.surface_integral, dg, cache)
-
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache_parabolic.elements.surface_flux_values, mesh, equations_parabolic, dg, cache_parabolic)
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache_parabolic, flux_viscous, mesh, equations_parabolic, dg.surface_integral, dg, cache)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux_divergence!(
-    cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic,
-    dg.surface_integral, dg)
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic)
-
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian_parabolic!(
-    du, mesh, equations_parabolic, dg, cache_parabolic)
-
-  return nothing
+    @unpack u_transformed, gradients, flux_viscous = cache_parabolic
+
+    # Convert conservative variables to a form more suitable for viscous flux calculations
+    @trixi_timeit timer() "transform variables" begin
+        transform_variables!(u_transformed, u, mesh, equations_parabolic,
+                             dg, parabolic_scheme, cache, cache_parabolic)
+    end
+
+    # Compute the gradients of the transformed variables
+    @trixi_timeit timer() "calculate gradient" begin
+        calc_gradient!(gradients, u_transformed, t, mesh, equations_parabolic,
+                       boundary_conditions_parabolic, dg, cache, cache_parabolic)
+    end
+
+    # Compute and store the viscous fluxes
+    @trixi_timeit timer() "calculate viscous fluxes" begin
+        calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh,
+                             equations_parabolic, dg, cache, cache_parabolic)
+    end
+
+    # The remainder of this function is essentially a regular rhs! for
+    # parabolic equations (i.e., it computes the divergence of the viscous fluxes)
+    #
+    # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have
+    # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the
+    # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the
+    # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it
+    # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values*
+    # and *not the solution*.  The advantage is that a) we do not need to allocate more storage, b) we
+    # do not need to recreate the existing data structure only with a different name, and c) we do not
+    # need to interpolate solutions *and* gradients to the surfaces.
+
+    # TODO: parabolic; reconsider current data structure reuse strategy
+
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, flux_viscous, mesh, equations_parabolic, dg, cache)
+    end
+
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache_parabolic, flux_viscous, mesh, equations_parabolic,
+                            dg.surface_integral, dg, cache)
+    end
+
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache_parabolic.elements.surface_flux_values, mesh,
+                             equations_parabolic, dg, cache_parabolic)
+    end
+
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache_parabolic, flux_viscous, mesh, equations_parabolic,
+                            dg.surface_integral, dg, cache)
+    end
+
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux_divergence!(cache_parabolic, t,
+                                       boundary_conditions_parabolic, mesh,
+                                       equations_parabolic,
+                                       dg.surface_integral, dg)
+    end
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations_parabolic,
+                               dg.surface_integral, dg, cache_parabolic)
+    end
+
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" begin
+        apply_jacobian_parabolic!(du, mesh, equations_parabolic, dg, cache_parabolic)
+    end
+
+    return nothing
 end
 
 # Transform solution variables prior to taking the gradient
@@ -85,418 +105,463 @@ end
 function transform_variables!(u_transformed, u, mesh::TreeMesh{1},
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, parabolic_scheme, cache, cache_parabolic)
-  @threaded for element in eachelement(dg, cache)
-    # Calculate volume terms in one element
-    for i in eachnode(dg)
-      u_node = get_node_vars(u, equations_parabolic, dg, i, element)
-      u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node, equations_parabolic)
-      set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg, i, element)
+    @threaded for element in eachelement(dg, cache)
+        # Calculate volume terms in one element
+        for i in eachnode(dg)
+            u_node = get_node_vars(u, equations_parabolic, dg, i, element)
+            u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node,
+                                                                                       equations_parabolic)
+            set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg,
+                           i, element)
+        end
     end
-  end
 end
 
 # This is the version used when calculating the divergence of the viscous fluxes
 function calc_volume_integral!(du, flux_viscous,
-                               mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic,
+                               mesh::TreeMesh{1},
+                               equations_parabolic::AbstractEquationsParabolic,
                                dg::DGSEM, cache)
-  @unpack derivative_dhat = dg.basis
-
-  @threaded for element in eachelement(dg, cache)
-    # Calculate volume terms in one element
-    for i in eachnode(dg)
-      flux_1_node = get_node_vars(flux_viscous, equations_parabolic, dg, i, element)
+    @unpack derivative_dhat = dg.basis
 
-      for ii in eachnode(dg)
-        multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node, equations_parabolic, dg, ii, element)
-      end
+    @threaded for element in eachelement(dg, cache)
+        # Calculate volume terms in one element
+        for i in eachnode(dg)
+            flux_1_node = get_node_vars(flux_viscous, equations_parabolic, dg, i,
+                                        element)
+
+            for ii in eachnode(dg)
+                multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node,
+                                           equations_parabolic, dg, ii, element)
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # This is the version used when calculating the divergence of the viscous fluxes
 # We pass the `surface_integral` argument solely for dispatch
 function prolong2interfaces!(cache_parabolic, flux_viscous,
-                             mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic,
+                             mesh::TreeMesh{1},
+                             equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
-  @unpack interfaces = cache_parabolic
+    @unpack interfaces = cache_parabolic
 
-  @threaded for interface in eachinterface(dg, cache)
-    left_element  = interfaces.neighbor_ids[1, interface]
-    right_element = interfaces.neighbor_ids[2, interface]
+    @threaded for interface in eachinterface(dg, cache)
+        left_element = interfaces.neighbor_ids[1, interface]
+        right_element = interfaces.neighbor_ids[2, interface]
 
-    # interface in x-direction
-    for v in eachvariable(equations_parabolic)
-     # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-      interfaces.u[1, v, interface] = flux_viscous[v, nnodes(dg), left_element]
-      interfaces.u[2, v, interface] = flux_viscous[v,          1, right_element]
+        # interface in x-direction
+        for v in eachvariable(equations_parabolic)
+            # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
+            interfaces.u[1, v, interface] = flux_viscous[v, nnodes(dg), left_element]
+            interfaces.u[2, v, interface] = flux_viscous[v, 1, right_element]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # This is the version used when calculating the divergence of the viscous fluxes
 function calc_interface_flux!(surface_flux_values,
                               mesh::TreeMesh{1}, equations_parabolic,
                               dg::DG, cache_parabolic)
-  @unpack neighbor_ids, orientations = cache_parabolic.interfaces
-
-  @threaded for interface in eachinterface(dg, cache_parabolic)
-    # Get neighboring elements
-    left_id  = neighbor_ids[1, interface]
-    right_id = neighbor_ids[2, interface]
-
-    # Determine interface direction with respect to elements:
-    # orientation = 1: left -> 2, right -> 1
-    left_direction  = 2 * orientations[interface]
-    right_direction = 2 * orientations[interface] - 1
-
-    # Get precomputed fluxes at interfaces
-    flux_ll, flux_rr = get_surface_node_vars(cache_parabolic.interfaces.u, equations_parabolic,
-                                               dg, interface)
-
-    # Compute interface flux as mean of left and right viscous fluxes
-    # TODO: parabolic; only BR1 at the moment
-    flux = 0.5 * (flux_ll + flux_rr)
-
-    # Copy flux to left and right element storage
-    for v in eachvariable(equations_parabolic)
-        surface_flux_values[v, left_direction,  left_id]  = flux[v]
-        surface_flux_values[v, right_direction, right_id] = flux[v]
-    end
-  end
+    @unpack neighbor_ids, orientations = cache_parabolic.interfaces
 
-  return nothing
-end
+    @threaded for interface in eachinterface(dg, cache_parabolic)
+        # Get neighboring elements
+        left_id = neighbor_ids[1, interface]
+        right_id = neighbor_ids[2, interface]
 
+        # Determine interface direction with respect to elements:
+        # orientation = 1: left -> 2, right -> 1
+        left_direction = 2 * orientations[interface]
+        right_direction = 2 * orientations[interface] - 1
 
-# This is the version used when calculating the divergence of the viscous fluxes
-function prolong2boundaries!(cache_parabolic, flux_viscous,
-                             mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic,
-                             surface_integral, dg::DG, cache)
-  @unpack boundaries = cache_parabolic
-  @unpack neighbor_sides = boundaries
+        # Get precomputed fluxes at interfaces
+        flux_ll, flux_rr = get_surface_node_vars(cache_parabolic.interfaces.u,
+                                                 equations_parabolic,
+                                                 dg, interface)
 
-  @threaded for boundary in eachboundary(dg, cache_parabolic)
-    element = boundaries.neighbor_ids[boundary]
+        # Compute interface flux as mean of left and right viscous fluxes
+        # TODO: parabolic; only BR1 at the moment
+        flux = 0.5 * (flux_ll + flux_rr)
 
-    if neighbor_sides[boundary] == 1
-        # element in -x direction of boundary
+        # Copy flux to left and right element storage
         for v in eachvariable(equations_parabolic)
-          # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-          boundaries.u[1, v, boundary] = flux_viscous[v, nnodes(dg), element]
+            surface_flux_values[v, left_direction, left_id] = flux[v]
+            surface_flux_values[v, right_direction, right_id] = flux[v]
         end
-      else # Element in +x direction of boundary
-        for v in eachvariable(equations_parabolic)
-          # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-          boundaries.u[2, v, boundary] = flux_viscous[v, 1,          element]
-        end
-      end
     end
 
-  return nothing
+    return nothing
 end
 
+# This is the version used when calculating the divergence of the viscous fluxes
+function prolong2boundaries!(cache_parabolic, flux_viscous,
+                             mesh::TreeMesh{1},
+                             equations_parabolic::AbstractEquationsParabolic,
+                             surface_integral, dg::DG, cache)
+    @unpack boundaries = cache_parabolic
+    @unpack neighbor_sides = boundaries
+
+    @threaded for boundary in eachboundary(dg, cache_parabolic)
+        element = boundaries.neighbor_ids[boundary]
+
+        if neighbor_sides[boundary] == 1
+            # element in -x direction of boundary
+            for v in eachvariable(equations_parabolic)
+                # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
+                boundaries.u[1, v, boundary] = flux_viscous[v, nnodes(dg), element]
+            end
+        else # Element in +x direction of boundary
+            for v in eachvariable(equations_parabolic)
+                # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
+                boundaries.u[2, v, boundary] = flux_viscous[v, 1, element]
+            end
+        end
+    end
+
+    return nothing
+end
 
 function calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh::TreeMesh{1},
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, cache, cache_parabolic)
-
-  @threaded for element in eachelement(dg, cache)
-    for i in eachnode(dg)
-      # Get solution and gradients
-      u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, element)
-      gradients_1_node = get_node_vars(gradients, equations_parabolic, dg, i, element)
-
-      # Calculate viscous flux and store each component for later use
-      flux_viscous_node = flux(u_node, gradients_1_node, 1, equations_parabolic)
-      set_node_vars!(flux_viscous, flux_viscous_node, equations_parabolic, dg, i, element)
+    @threaded for element in eachelement(dg, cache)
+        for i in eachnode(dg)
+            # Get solution and gradients
+            u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, element)
+            gradients_1_node = get_node_vars(gradients, equations_parabolic, dg, i,
+                                             element)
+
+            # Calculate viscous flux and store each component for later use
+            flux_viscous_node = flux(u_node, gradients_1_node, 1, equations_parabolic)
+            set_node_vars!(flux_viscous, flux_viscous_node, equations_parabolic, dg, i,
+                           element)
+        end
     end
-  end
 end
 
-
-function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic,
-                                      mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic,
-                                      surface_integral, dg::DG)
-  return nothing
+function calc_boundary_flux_gradients!(cache, t,
+                                       boundary_conditions_parabolic::BoundaryConditionPeriodic,
+                                       mesh::TreeMesh{1},
+                                       equations_parabolic::AbstractEquationsParabolic,
+                                       surface_integral, dg::DG)
+    return nothing
 end
 
-function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic,
-                                        mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic,
+function calc_boundary_flux_divergence!(cache, t,
+                                        boundary_conditions_parabolic::BoundaryConditionPeriodic,
+                                        mesh::TreeMesh{1},
+                                        equations_parabolic::AbstractEquationsParabolic,
                                         surface_integral, dg::DG)
-  return nothing
+    return nothing
 end
 
-function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::NamedTuple,
-                                      mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic,
-                                      surface_integral, dg::DG)
-  @unpack surface_flux_values = cache.elements
-  @unpack n_boundaries_per_direction = cache.boundaries
-
-  # Calculate indices
-  lasts = accumulate(+, n_boundaries_per_direction)
-  firsts = lasts - n_boundaries_per_direction .+ 1
-
-  # Calc boundary fluxes in each direction
-  calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[1],
-                                            equations_parabolic, surface_integral, dg, cache,
-                                            1, firsts[1], lasts[1])
-  calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[2],
-                                            equations_parabolic, surface_integral, dg, cache,
-                                            2, firsts[2], lasts[2])
+function calc_boundary_flux_gradients!(cache, t,
+                                       boundary_conditions_parabolic::NamedTuple,
+                                       mesh::TreeMesh{1},
+                                       equations_parabolic::AbstractEquationsParabolic,
+                                       surface_integral, dg::DG)
+    @unpack surface_flux_values = cache.elements
+    @unpack n_boundaries_per_direction = cache.boundaries
+
+    # Calculate indices
+    lasts = accumulate(+, n_boundaries_per_direction)
+    firsts = lasts - n_boundaries_per_direction .+ 1
+
+    # Calc boundary fluxes in each direction
+    calc_boundary_flux_by_direction_gradient!(surface_flux_values, t,
+                                              boundary_conditions_parabolic[1],
+                                              equations_parabolic, surface_integral, dg,
+                                              cache,
+                                              1, firsts[1], lasts[1])
+    calc_boundary_flux_by_direction_gradient!(surface_flux_values, t,
+                                              boundary_conditions_parabolic[2],
+                                              equations_parabolic, surface_integral, dg,
+                                              cache,
+                                              2, firsts[2], lasts[2])
 end
 
-function calc_boundary_flux_by_direction_gradient!(surface_flux_values::AbstractArray{<:Any,3}, t,
+function calc_boundary_flux_by_direction_gradient!(surface_flux_values::AbstractArray{
+                                                                                      <:Any,
+                                                                                      3
+                                                                                      },
+                                                   t,
                                                    boundary_condition,
                                                    equations_parabolic::AbstractEquationsParabolic,
                                                    surface_integral, dg::DG, cache,
-                                                   direction, first_boundary, last_boundary)
-  @unpack surface_flux = surface_integral
-  @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
-
-  @threaded for boundary in first_boundary:last_boundary
-    # Get neighboring element
-    neighbor = neighbor_ids[boundary]
-
-   
-    # Get boundary flux
-    u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, boundary)
-    if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
-      u_inner = u_ll
-    else # Element is on the right, boundary on the left
-      u_inner = u_rr
-    end
+                                                   direction, first_boundary,
+                                                   last_boundary)
+    @unpack surface_flux = surface_integral
+    @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
+
+    @threaded for boundary in first_boundary:last_boundary
+        # Get neighboring element
+        neighbor = neighbor_ids[boundary]
+
+        # Get boundary flux
+        u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, boundary)
+        if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
+            u_inner = u_ll
+        else # Element is on the right, boundary on the left
+            u_inner = u_rr
+        end
 
-    # TODO: revisit if we want more general boundary treatments.
-    # This assumes the gradient numerical flux at the boundary is the gradient variable,
-    # which is consistent with BR1, LDG.
-    flux_inner = u_inner
+        # TODO: revisit if we want more general boundary treatments.
+        # This assumes the gradient numerical flux at the boundary is the gradient variable,
+        # which is consistent with BR1, LDG.
+        flux_inner = u_inner
 
-    x = get_node_coords(node_coordinates, equations_parabolic, dg, boundary)
-    flux = boundary_condition(flux_inner, u_inner, orientations[boundary], direction,
-                                x, t, Gradient(), equations_parabolic)
+        x = get_node_coords(node_coordinates, equations_parabolic, dg, boundary)
+        flux = boundary_condition(flux_inner, u_inner, orientations[boundary],
+                                  direction,
+                                  x, t, Gradient(), equations_parabolic)
 
-    # Copy flux to left and right element storage
-    for v in eachvariable(equations_parabolic)
-      surface_flux_values[v, direction, neighbor] = flux[v]
+        # Copy flux to left and right element storage
+        for v in eachvariable(equations_parabolic)
+            surface_flux_values[v, direction, neighbor] = flux[v]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::NamedTuple,
-                                        mesh::TreeMesh{1}, equations_parabolic::AbstractEquationsParabolic,
+function calc_boundary_flux_divergence!(cache, t,
+                                        boundary_conditions_parabolic::NamedTuple,
+                                        mesh::TreeMesh{1},
+                                        equations_parabolic::AbstractEquationsParabolic,
                                         surface_integral, dg::DG)
-  @unpack surface_flux_values = cache.elements
-  @unpack n_boundaries_per_direction = cache.boundaries
-
-  # Calculate indices
-  lasts = accumulate(+, n_boundaries_per_direction)
-  firsts = lasts - n_boundaries_per_direction .+ 1
-
-  # Calc boundary fluxes in each direction
-  calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[1],
-                                              equations_parabolic, surface_integral, dg, cache,
-                                              1, firsts[1], lasts[1])
-  calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[2],
-                                              equations_parabolic, surface_integral, dg, cache,
-                                              2, firsts[2], lasts[2])
+    @unpack surface_flux_values = cache.elements
+    @unpack n_boundaries_per_direction = cache.boundaries
+
+    # Calculate indices
+    lasts = accumulate(+, n_boundaries_per_direction)
+    firsts = lasts - n_boundaries_per_direction .+ 1
+
+    # Calc boundary fluxes in each direction
+    calc_boundary_flux_by_direction_divergence!(surface_flux_values, t,
+                                                boundary_conditions_parabolic[1],
+                                                equations_parabolic, surface_integral,
+                                                dg, cache,
+                                                1, firsts[1], lasts[1])
+    calc_boundary_flux_by_direction_divergence!(surface_flux_values, t,
+                                                boundary_conditions_parabolic[2],
+                                                equations_parabolic, surface_integral,
+                                                dg, cache,
+                                                2, firsts[2], lasts[2])
 end
-function calc_boundary_flux_by_direction_divergence!(surface_flux_values::AbstractArray{<:Any,3}, t,
+function calc_boundary_flux_by_direction_divergence!(surface_flux_values::AbstractArray{
+                                                                                        <:Any,
+                                                                                        3
+                                                                                        },
+                                                     t,
                                                      boundary_condition,
                                                      equations_parabolic::AbstractEquationsParabolic,
                                                      surface_integral, dg::DG, cache,
-                                                     direction, first_boundary, last_boundary)
-  @unpack surface_flux = surface_integral
-
-  # Note: cache.boundaries.u contains the unsigned normal component (using "orientation", not "direction")
-  # of the viscous flux, as computed in `prolong2boundaries!`
-  @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
-
-  @threaded for boundary in first_boundary:last_boundary
-    # Get neighboring element
-    neighbor = neighbor_ids[boundary]
-
-    # Get viscous boundary fluxes
-    flux_ll, flux_rr = get_surface_node_vars(u, equations_parabolic, dg, boundary)
-    if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
-      flux_inner = flux_ll
-    else # Element is on the right, boundary on the left
-      flux_inner = flux_rr
-    end
+                                                     direction, first_boundary,
+                                                     last_boundary)
+    @unpack surface_flux = surface_integral
+
+    # Note: cache.boundaries.u contains the unsigned normal component (using "orientation", not "direction")
+    # of the viscous flux, as computed in `prolong2boundaries!`
+    @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
+
+    @threaded for boundary in first_boundary:last_boundary
+        # Get neighboring element
+        neighbor = neighbor_ids[boundary]
+
+        # Get viscous boundary fluxes
+        flux_ll, flux_rr = get_surface_node_vars(u, equations_parabolic, dg, boundary)
+        if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
+            flux_inner = flux_ll
+        else # Element is on the right, boundary on the left
+            flux_inner = flux_rr
+        end
 
-    x = get_node_coords(node_coordinates, equations_parabolic, dg, boundary)
+        x = get_node_coords(node_coordinates, equations_parabolic, dg, boundary)
 
-    # TODO: add a field in `cache.boundaries` for gradient information.
-    # Here, we pass in `u_inner = nothing` since we overwrite cache.boundaries.u with gradient information.
-    # This currently works with Dirichlet/Neuman boundary conditions for LaplaceDiffusion2D and
-    # NoSlipWall/Adiabatic boundary conditions for CompressibleNavierStokesDiffusion2D as of 2022-6-27.
-    # It will not work with implementations which utilize `u_inner` to impose boundary conditions.
-    flux = boundary_condition(flux_inner, nothing, orientations[boundary], direction,
-                                x, t, Divergence(), equations_parabolic)
+        # TODO: add a field in `cache.boundaries` for gradient information.
+        # Here, we pass in `u_inner = nothing` since we overwrite cache.boundaries.u with gradient information.
+        # This currently works with Dirichlet/Neuman boundary conditions for LaplaceDiffusion2D and
+        # NoSlipWall/Adiabatic boundary conditions for CompressibleNavierStokesDiffusion2D as of 2022-6-27.
+        # It will not work with implementations which utilize `u_inner` to impose boundary conditions.
+        flux = boundary_condition(flux_inner, nothing, orientations[boundary],
+                                  direction,
+                                  x, t, Divergence(), equations_parabolic)
 
-    # Copy flux to left and right element storage
-    for v in eachvariable(equations_parabolic)
-      surface_flux_values[v, direction, neighbor] = flux[v]
+        # Copy flux to left and right element storage
+        for v in eachvariable(equations_parabolic)
+            surface_flux_values[v, direction, neighbor] = flux[v]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Calculate the gradient of the transformed variables
 function calc_gradient!(gradients, u_transformed, t,
                         mesh::TreeMesh{1}, equations_parabolic,
                         boundary_conditions_parabolic, dg::DG, cache, cache_parabolic)
 
-  # Reset du
-  @trixi_timeit timer() "reset gradients" begin
-    reset_du!(gradients, dg, cache)
-  end
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" begin
-    @unpack derivative_dhat = dg.basis
-    @threaded for element in eachelement(dg, cache)
-
-      # Calculate volume terms in one element
-      for i in eachnode(dg)
-        u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, element)
+    # Reset du
+    @trixi_timeit timer() "reset gradients" begin
+        reset_du!(gradients, dg, cache)
+    end
 
-        for ii in eachnode(dg)
-          multiply_add_to_node_vars!(gradients, derivative_dhat[ii, i], u_node, equations_parabolic, dg, ii, element)
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        @unpack derivative_dhat = dg.basis
+        @threaded for element in eachelement(dg, cache)
+
+            # Calculate volume terms in one element
+            for i in eachnode(dg)
+                u_node = get_node_vars(u_transformed, equations_parabolic, dg, i,
+                                       element)
+
+                for ii in eachnode(dg)
+                    multiply_add_to_node_vars!(gradients, derivative_dhat[ii, i],
+                                               u_node, equations_parabolic, dg, ii,
+                                               element)
+                end
+            end
         end
-      end
     end
-  end
 
-  # Prolong solution to interfaces
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg)
-
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" begin
-    @unpack surface_flux_values = cache_parabolic.elements
-    @unpack neighbor_ids, orientations = cache_parabolic.interfaces
-
-    @threaded for interface in eachinterface(dg, cache_parabolic)
-      # Get neighboring elements
-      left_id  = neighbor_ids[1, interface]
-      right_id = neighbor_ids[2, interface]
-
-      # Determine interface direction with respect to elements:
-      # orientation = 1: left -> 2, right -> 1
-      left_direction  = 2 * orientations[interface]
-      right_direction = 2 * orientations[interface] - 1
-
-      # Call pointwise Riemann solver
-      u_ll, u_rr = get_surface_node_vars(cache_parabolic.interfaces.u,
-                                           equations_parabolic, dg, interface)
-      flux = 0.5 * (u_ll + u_rr)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations_parabolic)
-        surface_flux_values[v, left_direction,  left_id]  = flux[v]
-        surface_flux_values[v, right_direction, right_id] = flux[v]
-      end
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(cache_parabolic,
+                                                                   u_transformed, mesh,
+                                                                   equations_parabolic,
+                                                                   dg.surface_integral,
+                                                                   dg)
+
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        @unpack surface_flux_values = cache_parabolic.elements
+        @unpack neighbor_ids, orientations = cache_parabolic.interfaces
+
+        @threaded for interface in eachinterface(dg, cache_parabolic)
+            # Get neighboring elements
+            left_id = neighbor_ids[1, interface]
+            right_id = neighbor_ids[2, interface]
+
+            # Determine interface direction with respect to elements:
+            # orientation = 1: left -> 2, right -> 1
+            left_direction = 2 * orientations[interface]
+            right_direction = 2 * orientations[interface] - 1
+
+            # Call pointwise Riemann solver
+            u_ll, u_rr = get_surface_node_vars(cache_parabolic.interfaces.u,
+                                               equations_parabolic, dg, interface)
+            flux = 0.5 * (u_ll + u_rr)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations_parabolic)
+                surface_flux_values[v, left_direction, left_id] = flux[v]
+                surface_flux_values[v, right_direction, right_id] = flux[v]
+            end
+        end
     end
-  end
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux_gradients!(
-    cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic,
-    dg.surface_integral, dg)
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" begin
-    @unpack boundary_interpolation = dg.basis
-    @unpack surface_flux_values = cache_parabolic.elements
-
-    # Note that all fluxes have been computed with outward-pointing normal vectors.
-    # Access the factors only once before beginning the loop to increase performance.
-    # We also use explicit assignments instead of `+=` to let `@muladd` turn these
-    # into FMAs (see comment at the top of the file).
-    factor_1 = boundary_interpolation[1,          1]
-    factor_2 = boundary_interpolation[nnodes(dg), 2]
-    @threaded for element in eachelement(dg, cache)
-      for v in eachvariable(equations_parabolic)
-        # surface at -x
-        gradients[v, 1, element] = (
-         gradients[v, 1, element] - surface_flux_values[v, 1, element] * factor_1)
-
-        # surface at +x
-        gradients[v, nnodes(dg), element] = (
-          gradients[v, nnodes(dg), element] + surface_flux_values[v, 2, element] * factor_2)
-      end
+
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(cache_parabolic,
+                                                                   u_transformed, mesh,
+                                                                   equations_parabolic,
+                                                                   dg.surface_integral,
+                                                                   dg)
+
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" calc_boundary_flux_gradients!(cache_parabolic,
+                                                                        t,
+                                                                        boundary_conditions_parabolic,
+                                                                        mesh,
+                                                                        equations_parabolic,
+                                                                        dg.surface_integral,
+                                                                        dg)
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        @unpack boundary_interpolation = dg.basis
+        @unpack surface_flux_values = cache_parabolic.elements
+
+        # Note that all fluxes have been computed with outward-pointing normal vectors.
+        # Access the factors only once before beginning the loop to increase performance.
+        # We also use explicit assignments instead of `+=` to let `@muladd` turn these
+        # into FMAs (see comment at the top of the file).
+        factor_1 = boundary_interpolation[1, 1]
+        factor_2 = boundary_interpolation[nnodes(dg), 2]
+        @threaded for element in eachelement(dg, cache)
+            for v in eachvariable(equations_parabolic)
+                # surface at -x
+                gradients[v, 1, element] = (gradients[v, 1, element] -
+                                            surface_flux_values[v, 1, element] *
+                                            factor_1)
+
+                # surface at +x
+                gradients[v, nnodes(dg), element] = (gradients[v, nnodes(dg), element] +
+                                                     surface_flux_values[v, 2,
+                                                                         element] *
+                                                     factor_2)
+            end
+        end
     end
-  end
 
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" begin
-    apply_jacobian_parabolic!(gradients, mesh, equations_parabolic, dg, cache_parabolic)
-  end
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" begin
+        apply_jacobian_parabolic!(gradients, mesh, equations_parabolic, dg,
+                                  cache_parabolic)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 # This method is called when a SemidiscretizationHyperbolic is constructed.
 # It constructs the basic `cache` used throughout the simulation to compute
 # the RHS etc.
-function create_cache_parabolic(mesh::TreeMesh{1}, equations_hyperbolic::AbstractEquations,
+function create_cache_parabolic(mesh::TreeMesh{1},
+                                equations_hyperbolic::AbstractEquations,
                                 equations_parabolic::AbstractEquationsParabolic,
                                 dg::DG, parabolic_scheme, RealT, uEltype)
-  # Get cells for which an element needs to be created (i.e. all leaf cells)
-  leaf_cell_ids = local_leaf_cells(mesh.tree)
+    # Get cells for which an element needs to be created (i.e. all leaf cells)
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
 
-  elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT, uEltype)
+    elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT,
+                             uEltype)
 
-  n_vars = nvariables(equations_hyperbolic)
-  n_nodes = nnodes(elements)
-  n_elements = nelements(elements)
-  u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_elements)
-  gradients = similar(u_transformed)
-  flux_viscous = similar(u_transformed)
+    n_vars = nvariables(equations_hyperbolic)
+    n_nodes = nnodes(elements)
+    n_elements = nelements(elements)
+    u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_elements)
+    gradients = similar(u_transformed)
+    flux_viscous = similar(u_transformed)
 
-  interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
+    interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
 
-  boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
+    boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
 
-  cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed)
+    cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed)
 
-  return cache
+    return cache
 end
 
-
 # Needed to *not* flip the sign of the inverse Jacobian.
 # This is because the parabolic fluxes are assumed to be of the form
 #   `du/dt + df/dx = dg/dx + source(x,t)`,
 # where f(u) is the inviscid flux and g(u) is the viscous flux.
 function apply_jacobian_parabolic!(du, mesh::TreeMesh{1},
                                    equations::AbstractEquationsParabolic, dg::DG, cache)
+    @threaded for element in eachelement(dg, cache)
+        factor = cache.elements.inverse_jacobian[element]
 
-  @threaded for element in eachelement(dg, cache)
-    factor = cache.elements.inverse_jacobian[element]
-
-    for i in eachnode(dg)
-      for v in eachvariable(equations)
-        du[v, i, element] *= factor
-      end
+        for i in eachnode(dg)
+            for v in eachvariable(equations)
+                du[v, i, element] *= factor
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl
index 3ce9f611a8b..d3227710686 100644
--- a/src/solvers/dgsem_tree/dg_2d.jl
+++ b/src/solvers/dgsem_tree/dg_2d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # everything related to a DG semidiscretization in 2D,
 # currently limited to Lobatto-Legendre nodes
@@ -13,318 +13,353 @@
 # the RHS etc.
 function create_cache(mesh::TreeMesh{2}, equations,
                       dg::DG, RealT, uEltype)
-  # Get cells for which an element needs to be created (i.e. all leaf cells)
-  leaf_cell_ids = local_leaf_cells(mesh.tree)
+    # Get cells for which an element needs to be created (i.e. all leaf cells)
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
 
-  elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)
+    elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)
 
-  interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
+    interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
 
-  boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
+    boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
 
-  mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
+    mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
 
-  cache = (; elements, interfaces, boundaries, mortars)
+    cache = (; elements, interfaces, boundaries, mortars)
 
-  # Add specialized parts of the cache required to compute the volume integral etc.
-  cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
-  cache = (;cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
+    # Add specialized parts of the cache required to compute the volume integral etc.
+    cache = (; cache...,
+             create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
+    cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
 
-  return cache
+    return cache
 end
 
-
 # The methods below are specialized on the volume integral type
 # and called from the basic `create_cache` method at the top.
-function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
-                      equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DG, uEltype)
-  NamedTuple()
+function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
+                                  P4estMesh{2}},
+                      equations, volume_integral::VolumeIntegralFluxDifferencing,
+                      dg::DG, uEltype)
+    NamedTuple()
 end
 
-
-function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, equations,
+function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
+                                  P4estMesh{2}}, equations,
                       volume_integral::VolumeIntegralShockCapturingHG, dg::DG, uEltype)
-  element_ids_dg   = Int[]
-  element_ids_dgfv = Int[]
-
-  cache = create_cache(mesh, equations,
-                       VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
-                       dg, uEltype)
-
-  A3dp1_x = Array{uEltype, 3}
-  A3dp1_y = Array{uEltype, 3}
-
-  fstar1_L_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg)) for _ in 1:Threads.nthreads()]
-  fstar1_R_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg)) for _ in 1:Threads.nthreads()]
-  fstar2_L_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1) for _ in 1:Threads.nthreads()]
-  fstar2_R_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1) for _ in 1:Threads.nthreads()]
-
-  return (; cache..., element_ids_dg, element_ids_dgfv,
-          fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded)
+    element_ids_dg = Int[]
+    element_ids_dgfv = Int[]
+
+    cache = create_cache(mesh, equations,
+                         VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
+                         dg, uEltype)
+
+    A3dp1_x = Array{uEltype, 3}
+    A3dp1_y = Array{uEltype, 3}
+
+    fstar1_L_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg) + 1,
+                                        nnodes(dg)) for _ in 1:Threads.nthreads()]
+    fstar1_R_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg) + 1,
+                                        nnodes(dg)) for _ in 1:Threads.nthreads()]
+    fstar2_L_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg),
+                                        nnodes(dg) + 1) for _ in 1:Threads.nthreads()]
+    fstar2_R_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg),
+                                        nnodes(dg) + 1) for _ in 1:Threads.nthreads()]
+
+    return (; cache..., element_ids_dg, element_ids_dgfv,
+            fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded)
 end
 
-
-function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}}, equations,
-                      volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG, uEltype)
-
-  A3dp1_x = Array{uEltype, 3}
-  A3dp1_y = Array{uEltype, 3}
-
-  fstar1_L_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg)) for _ in 1:Threads.nthreads()]
-  fstar1_R_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg)) for _ in 1:Threads.nthreads()]
-  fstar2_L_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1) for _ in 1:Threads.nthreads()]
-  fstar2_R_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1) for _ in 1:Threads.nthreads()]
-
-  return (; fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded)
+function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
+                                  P4estMesh{2}}, equations,
+                      volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG,
+                      uEltype)
+    A3dp1_x = Array{uEltype, 3}
+    A3dp1_y = Array{uEltype, 3}
+
+    fstar1_L_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg) + 1,
+                                        nnodes(dg)) for _ in 1:Threads.nthreads()]
+    fstar1_R_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg) + 1,
+                                        nnodes(dg)) for _ in 1:Threads.nthreads()]
+    fstar2_L_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg),
+                                        nnodes(dg) + 1) for _ in 1:Threads.nthreads()]
+    fstar2_R_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg),
+                                        nnodes(dg) + 1) for _ in 1:Threads.nthreads()]
+
+    return (; fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded,
+            fstar2_R_threaded)
 end
 
 # The methods below are specialized on the mortar type
 # and called from the basic `create_cache` method at the top.
-function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
+                                  P4estMesh{2}},
                       equations, mortar_l2::LobattoLegendreMortarL2, uEltype)
-  # TODO: Taal performance using different types
-  MA2d = MArray{Tuple{nvariables(equations), nnodes(mortar_l2)}, uEltype, 2, nvariables(equations) * nnodes(mortar_l2)}
-  fstar_upper_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()]
-  fstar_lower_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()]
+    # TODO: Taal performance using different types
+    MA2d = MArray{Tuple{nvariables(equations), nnodes(mortar_l2)}, uEltype, 2,
+                  nvariables(equations) * nnodes(mortar_l2)}
+    fstar_upper_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()]
+    fstar_lower_threaded = MA2d[MA2d(undef) for _ in 1:Threads.nthreads()]
 
-  # A2d = Array{uEltype, 2}
-  # fstar_upper_threaded = [A2d(undef, nvariables(equations), nnodes(mortar_l2)) for _ in 1:Threads.nthreads()]
-  # fstar_lower_threaded = [A2d(undef, nvariables(equations), nnodes(mortar_l2)) for _ in 1:Threads.nthreads()]
+    # A2d = Array{uEltype, 2}
+    # fstar_upper_threaded = [A2d(undef, nvariables(equations), nnodes(mortar_l2)) for _ in 1:Threads.nthreads()]
+    # fstar_lower_threaded = [A2d(undef, nvariables(equations), nnodes(mortar_l2)) for _ in 1:Threads.nthreads()]
 
-  (; fstar_upper_threaded, fstar_lower_threaded)
+    (; fstar_upper_threaded, fstar_lower_threaded)
 end
 
-
 # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer?
 
 function rhs!(du, u, t,
               mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations,
               initial_condition, boundary_conditions, source_terms::Source,
               dg::DG, cache) where {Source}
-  # Reset du
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
-
-  # Prolong solution to interfaces
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.surface_integral, dg, cache)
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg)
-
-  # Prolong solution to mortars
-  @trixi_timeit timer() "prolong2mortars" prolong2mortars!(
-    cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg)
-
-  # Calculate mortar fluxes
-  @trixi_timeit timer() "mortar flux" calc_mortar_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.mortar, dg.surface_integral, dg, cache)
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations, dg.surface_integral, dg, cache)
-
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian!(
-    du, mesh, equations, dg, cache)
-
-  # Calculate source terms
-  @trixi_timeit timer() "source terms" calc_sources!(
-    du, u, t, source_terms, equations, dg, cache)
-
-  return nothing
-end
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, u, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.volume_integral, dg, cache)
+    end
+
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache, u, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache.elements.surface_flux_values, mesh,
+                             have_nonconservative_terms(equations), equations,
+                             dg.surface_integral, dg, cache)
+    end
+
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache, u, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Prolong solution to mortars
+    @trixi_timeit timer() "prolong2mortars" begin
+        prolong2mortars!(cache, u, mesh, equations,
+                         dg.mortar, dg.surface_integral, dg)
+    end
+
+    # Calculate mortar fluxes
+    @trixi_timeit timer() "mortar flux" begin
+        calc_mortar_flux!(cache.elements.surface_flux_values, mesh,
+                          have_nonconservative_terms(equations), equations,
+                          dg.mortar, dg.surface_integral, dg, cache)
+    end
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations,
+                               dg.surface_integral, dg, cache)
+    end
+
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache)
 
+    # Calculate source terms
+    @trixi_timeit timer() "source terms" begin
+        calc_sources!(du, u, t, source_terms, equations, dg, cache)
+    end
+
+    return nothing
+end
 
 function calc_volume_integral!(du, u,
-                               mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                               mesh::Union{TreeMesh{2}, StructuredMesh{2},
+                                           UnstructuredMesh2D, P4estMesh{2}},
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralWeakForm,
                                dg::DGSEM, cache)
+    @threaded for element in eachelement(dg, cache)
+        weak_form_kernel!(du, u, element, mesh,
+                          nonconservative_terms, equations,
+                          dg, cache)
+    end
 
-  @threaded for element in eachelement(dg, cache)
-    weak_form_kernel!(du, u, element, mesh,
-                      nonconservative_terms, equations,
-                      dg, cache)
-  end
-
-  return nothing
+    return nothing
 end
 
 @inline function weak_form_kernel!(du, u,
                                    element, mesh::TreeMesh{2},
                                    nonconservative_terms::False, equations,
-                                   dg::DGSEM, cache, alpha=true)
-  # true * [some floating point value] == [exactly the same floating point value]
-  # This can (hopefully) be optimized away due to constant propagation.
-  @unpack derivative_dhat = dg.basis
-
-  # Calculate volume terms in one element
-  for j in eachnode(dg), i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, j, element)
-
-    flux1 = flux(u_node, 1, equations)
-    for ii in eachnode(dg)
-      multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1, equations, dg, ii, j, element)
-    end
+                                   dg::DGSEM, cache, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    @unpack derivative_dhat = dg.basis
 
-    flux2 = flux(u_node, 2, equations)
-    for jj in eachnode(dg)
-      multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], flux2, equations, dg, i, jj, element)
+    # Calculate volume terms in one element
+    for j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, element)
+
+        flux1 = flux(u_node, 1, equations)
+        for ii in eachnode(dg)
+            multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1,
+                                       equations, dg, ii, j, element)
+        end
+
+        flux2 = flux(u_node, 2, equations)
+        for jj in eachnode(dg)
+            multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], flux2,
+                                       equations, dg, i, jj, element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # flux differencing volume integral. For curved meshes averaging of the
 # mapping terms, stored in `cache.elements.contravariant_vectors`, is peeled apart
 # from the evaluation of the physical fluxes in each Cartesian direction
 function calc_volume_integral!(du, u,
-                               mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                               mesh::Union{TreeMesh{2}, StructuredMesh{2},
+                                           UnstructuredMesh2D, P4estMesh{2}},
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralFluxDifferencing,
                                dg::DGSEM, cache)
-  @threaded for element in eachelement(dg, cache)
-    flux_differencing_kernel!(du, u, element, mesh,
-                              nonconservative_terms, equations,
-                              volume_integral.volume_flux, dg, cache)
-  end
+    @threaded for element in eachelement(dg, cache)
+        flux_differencing_kernel!(du, u, element, mesh,
+                                  nonconservative_terms, equations,
+                                  volume_integral.volume_flux, dg, cache)
+    end
 end
 
 @inline function flux_differencing_kernel!(du, u,
                                            element, mesh::TreeMesh{2},
                                            nonconservative_terms::False, equations,
-                                           volume_flux, dg::DGSEM, cache, alpha=true)
-  # true * [some floating point value] == [exactly the same floating point value]
-  # This can (hopefully) be optimized away due to constant propagation.
-  @unpack derivative_split = dg.basis
-
-  # Calculate volume integral in one element
-  for j in eachnode(dg), i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, j, element)
-
-    # All diagonal entries of `derivative_split` are zero. Thus, we can skip
-    # the computation of the diagonal terms. In addition, we use the symmetry
-    # of the `volume_flux` to save half of the possible two-point flux
-    # computations.
-
-    # x direction
-    for ii in (i+1):nnodes(dg)
-      u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
-      flux1 = volume_flux(u_node, u_node_ii, 1, equations)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], flux1, equations, dg, i,  j, element)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], flux1, equations, dg, ii, j, element)
-    end
+                                           volume_flux, dg::DGSEM, cache, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    @unpack derivative_split = dg.basis
+
+    # Calculate volume integral in one element
+    for j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, element)
+
+        # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+        # the computation of the diagonal terms. In addition, we use the symmetry
+        # of the `volume_flux` to save half of the possible two-point flux
+        # computations.
+
+        # x direction
+        for ii in (i + 1):nnodes(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
+            flux1 = volume_flux(u_node, u_node_ii, 1, equations)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], flux1,
+                                       equations, dg, i, j, element)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], flux1,
+                                       equations, dg, ii, j, element)
+        end
 
-    # y direction
-    for jj in (j+1):nnodes(dg)
-      u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
-      flux2 = volume_flux(u_node, u_node_jj, 2, equations)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], flux2, equations, dg, i, j,  element)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], flux2, equations, dg, i, jj, element)
+        # y direction
+        for jj in (j + 1):nnodes(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
+            flux2 = volume_flux(u_node, u_node_jj, 2, equations)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], flux2,
+                                       equations, dg, i, j, element)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], flux2,
+                                       equations, dg, i, jj, element)
+        end
     end
-  end
 end
 
 @inline function flux_differencing_kernel!(du, u,
                                            element, mesh::TreeMesh{2},
                                            nonconservative_terms::True, equations,
-                                           volume_flux, dg::DGSEM, cache, alpha=true)
-  # true * [some floating point value] == [exactly the same floating point value]
-  # This can (hopefully) be optimized away due to constant propagation.
-  @unpack derivative_split = dg.basis
-  symmetric_flux, nonconservative_flux = volume_flux
-
-  # Apply the symmetric flux as usual
-  flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, dg, cache, alpha)
-
-  # Calculate the remaining volume terms using the nonsymmetric generalized flux
-  for j in eachnode(dg), i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, j, element)
-
-    # The diagonal terms are zero since the diagonal of `derivative_split`
-    # is zero. We ignore this for now.
-
-    # x direction
-    integral_contribution = zero(u_node)
-    for ii in eachnode(dg)
-      u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
-      noncons_flux1 = nonconservative_flux(u_node, u_node_ii, 1, equations)
-      integral_contribution = integral_contribution + derivative_split[i, ii] * noncons_flux1
-    end
+                                           volume_flux, dg::DGSEM, cache, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    @unpack derivative_split = dg.basis
+    symmetric_flux, nonconservative_flux = volume_flux
 
-    # y direction
-    for jj in eachnode(dg)
-      u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
-      noncons_flux2 = nonconservative_flux(u_node, u_node_jj, 2, equations)
-      integral_contribution = integral_contribution + derivative_split[j, jj] * noncons_flux2
-    end
+    # Apply the symmetric flux as usual
+    flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux,
+                              dg, cache, alpha)
 
-    # The factor 0.5 cancels the factor 2 in the flux differencing form
-    multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, dg, i, j, element)
-  end
-end
+    # Calculate the remaining volume terms using the nonsymmetric generalized flux
+    for j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, element)
+
+        # The diagonal terms are zero since the diagonal of `derivative_split`
+        # is zero. We ignore this for now.
+
+        # x direction
+        integral_contribution = zero(u_node)
+        for ii in eachnode(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
+            noncons_flux1 = nonconservative_flux(u_node, u_node_ii, 1, equations)
+            integral_contribution = integral_contribution +
+                                    derivative_split[i, ii] * noncons_flux1
+        end
 
+        # y direction
+        for jj in eachnode(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
+            noncons_flux2 = nonconservative_flux(u_node, u_node_jj, 2, equations)
+            integral_contribution = integral_contribution +
+                                    derivative_split[j, jj] * noncons_flux2
+        end
+
+        # The factor 0.5 cancels the factor 2 in the flux differencing form
+        multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations,
+                                   dg, i, j, element)
+    end
+end
 
 # TODO: Taal dimension agnostic
 function calc_volume_integral!(du, u,
-                               mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                               mesh::Union{TreeMesh{2}, StructuredMesh{2},
+                                           UnstructuredMesh2D, P4estMesh{2}},
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralShockCapturingHG,
                                dg::DGSEM, cache)
-  @unpack element_ids_dg, element_ids_dgfv = cache
-  @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral
-
-  # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α
-  alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache)
-
-  # Determine element ids for DG-only and blended DG-FV volume integral
-  pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache)
-
-  # Loop over pure DG elements
-  @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg)
-    element = element_ids_dg[idx_element]
-    flux_differencing_kernel!(du, u, element, mesh,
-                              nonconservative_terms, equations,
-                              volume_flux_dg, dg, cache)
-  end
+    @unpack element_ids_dg, element_ids_dgfv = cache
+    @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral
+
+    # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α
+    alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg,
+                                                               cache)
+
+    # Determine element ids for DG-only and blended DG-FV volume integral
+    pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache)
+
+    # Loop over pure DG elements
+    @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg)
+        element = element_ids_dg[idx_element]
+        flux_differencing_kernel!(du, u, element, mesh,
+                                  nonconservative_terms, equations,
+                                  volume_flux_dg, dg, cache)
+    end
 
-  # Loop over blended DG-FV elements
-  @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv)
-    element = element_ids_dgfv[idx_element]
-    alpha_element = alpha[element]
+    # Loop over blended DG-FV elements
+    @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv)
+        element = element_ids_dgfv[idx_element]
+        alpha_element = alpha[element]
 
-    # Calculate DG volume integral contribution
-    flux_differencing_kernel!(du, u, element, mesh,
-                              nonconservative_terms, equations,
-                              volume_flux_dg, dg, cache, 1 - alpha_element)
+        # Calculate DG volume integral contribution
+        flux_differencing_kernel!(du, u, element, mesh,
+                                  nonconservative_terms, equations,
+                                  volume_flux_dg, dg, cache, 1 - alpha_element)
 
-    # Calculate FV volume integral contribution
-    fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv,
-               dg, cache, element, alpha_element)
-  end
+        # Calculate FV volume integral contribution
+        fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv,
+                   dg, cache, element, alpha_element)
+    end
 
-  return nothing
+    return nothing
 end
 
 # TODO: Taal dimension agnostic
@@ -333,47 +368,47 @@ function calc_volume_integral!(du, u,
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralPureLGLFiniteVolume,
                                dg::DGSEM, cache)
-  @unpack volume_flux_fv = volume_integral
+    @unpack volume_flux_fv = volume_integral
 
-  # Calculate LGL FV volume integral
-  @threaded for element in eachelement(dg, cache)
-    fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv,
-               dg, cache, element, true)
-  end
+    # Calculate LGL FV volume integral
+    @threaded for element in eachelement(dg, cache)
+        fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv,
+                   dg, cache, element, true)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 @inline function fv_kernel!(du, u,
-                            mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                            mesh::Union{TreeMesh{2}, StructuredMesh{2},
+                                        UnstructuredMesh2D, P4estMesh{2}},
                             nonconservative_terms, equations,
-                            volume_flux_fv, dg::DGSEM, cache, element, alpha=true)
-  @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded = cache
-  @unpack inverse_weights = dg.basis
-
-  # Calculate FV two-point fluxes
-  fstar1_L = fstar1_L_threaded[Threads.threadid()]
-  fstar2_L = fstar2_L_threaded[Threads.threadid()]
-  fstar1_R = fstar1_R_threaded[Threads.threadid()]
-  fstar2_R = fstar2_R_threaded[Threads.threadid()]
-  calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, mesh,
-               nonconservative_terms, equations, volume_flux_fv, dg, element, cache)
-
-  # Calculate FV volume integral contribution
-  for j in eachnode(dg), i in eachnode(dg)
-    for v in eachvariable(equations)
-      du[v, i, j, element] += ( alpha *
-                                (inverse_weights[i] * (fstar1_L[v, i+1, j] - fstar1_R[v, i, j]) +
-                                 inverse_weights[j] * (fstar2_L[v, i, j+1] - fstar2_R[v, i, j])) )
+                            volume_flux_fv, dg::DGSEM, cache, element, alpha = true)
+    @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded = cache
+    @unpack inverse_weights = dg.basis
+
+    # Calculate FV two-point fluxes
+    fstar1_L = fstar1_L_threaded[Threads.threadid()]
+    fstar2_L = fstar2_L_threaded[Threads.threadid()]
+    fstar1_R = fstar1_R_threaded[Threads.threadid()]
+    fstar2_R = fstar2_R_threaded[Threads.threadid()]
+    calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, mesh,
+                 nonconservative_terms, equations, volume_flux_fv, dg, element, cache)
+
+    # Calculate FV volume integral contribution
+    for j in eachnode(dg), i in eachnode(dg)
+        for v in eachvariable(equations)
+            du[v, i, j, element] += (alpha *
+                                     (inverse_weights[i] *
+                                      (fstar1_L[v, i + 1, j] - fstar1_R[v, i, j]) +
+                                      inverse_weights[j] *
+                                      (fstar2_L[v, i, j + 1] - fstar2_R[v, i, j])))
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
-
 #     calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u_leftright,
 #                  nonconservative_terms::False, equations,
 #                  volume_flux_fv, dg, element)
@@ -385,37 +420,38 @@ end
 # - `fstar1_R::AbstractArray{<:Real, 3}`
 # - `fstar2_L::AbstractArray{<:Real, 3}`
 # - `fstar2_R::AbstractArray{<:Real, 3}`
-@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u::AbstractArray{<:Any,4},
-                              mesh::TreeMesh{2}, nonconservative_terms::False, equations,
+@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R,
+                              u::AbstractArray{<:Any, 4},
+                              mesh::TreeMesh{2}, nonconservative_terms::False,
+                              equations,
                               volume_flux_fv, dg::DGSEM, element, cache)
+    fstar1_L[:, 1, :] .= zero(eltype(fstar1_L))
+    fstar1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_L))
+    fstar1_R[:, 1, :] .= zero(eltype(fstar1_R))
+    fstar1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_R))
+
+    for j in eachnode(dg), i in 2:nnodes(dg)
+        u_ll = get_node_vars(u, equations, dg, i - 1, j, element)
+        u_rr = get_node_vars(u, equations, dg, i, j, element)
+        flux = volume_flux_fv(u_ll, u_rr, 1, equations) # orientation 1: x direction
+        set_node_vars!(fstar1_L, flux, equations, dg, i, j)
+        set_node_vars!(fstar1_R, flux, equations, dg, i, j)
+    end
+
+    fstar2_L[:, :, 1] .= zero(eltype(fstar2_L))
+    fstar2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_L))
+    fstar2_R[:, :, 1] .= zero(eltype(fstar2_R))
+    fstar2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_R))
+
+    for j in 2:nnodes(dg), i in eachnode(dg)
+        u_ll = get_node_vars(u, equations, dg, i, j - 1, element)
+        u_rr = get_node_vars(u, equations, dg, i, j, element)
+        flux = volume_flux_fv(u_ll, u_rr, 2, equations) # orientation 2: y direction
+        set_node_vars!(fstar2_L, flux, equations, dg, i, j)
+        set_node_vars!(fstar2_R, flux, equations, dg, i, j)
+    end
 
-  fstar1_L[:, 1,            :] .= zero(eltype(fstar1_L))
-  fstar1_L[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_L))
-  fstar1_R[:, 1,            :] .= zero(eltype(fstar1_R))
-  fstar1_R[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_R))
-
-  for j in eachnode(dg), i in 2:nnodes(dg)
-    u_ll = get_node_vars(u, equations, dg, i-1, j, element)
-    u_rr = get_node_vars(u, equations, dg, i,   j, element)
-    flux = volume_flux_fv(u_ll, u_rr, 1, equations) # orientation 1: x direction
-    set_node_vars!(fstar1_L, flux, equations, dg, i, j)
-    set_node_vars!(fstar1_R, flux, equations, dg, i, j)
-  end
-
-  fstar2_L[:, :, 1           ] .= zero(eltype(fstar2_L))
-  fstar2_L[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_L))
-  fstar2_R[:, :, 1           ] .= zero(eltype(fstar2_R))
-  fstar2_R[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_R))
-
-  for j in 2:nnodes(dg), i in eachnode(dg)
-    u_ll = get_node_vars(u, equations, dg, i, j-1, element)
-    u_rr = get_node_vars(u, equations, dg, i, j,   element)
-    flux = volume_flux_fv(u_ll, u_rr, 2, equations) # orientation 2: y direction
-    set_node_vars!(fstar2_L, flux, equations, dg, i, j)
-    set_node_vars!(fstar2_R, flux, equations, dg, i, j)
-  end
-
-  return nothing
+    return nothing
 end
 
 #     calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u_leftright,
@@ -430,423 +466,449 @@ end
 # - `fstar2_L::AbstractArray{<:Real, 3}`:
 # - `fstar2_R::AbstractArray{<:Real, 3}`:
 # - `u_leftright::AbstractArray{<:Real, 4}`
-@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u::AbstractArray{<:Any,4},
+@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R,
+                              u::AbstractArray{<:Any, 4},
                               mesh::TreeMesh{2}, nonconservative_terms::True, equations,
                               volume_flux_fv, dg::DGSEM, element, cache)
-  volume_flux, nonconservative_flux = volume_flux_fv
-
-  # Fluxes in x
-  fstar1_L[:, 1,            :] .= zero(eltype(fstar1_L))
-  fstar1_L[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_L))
-  fstar1_R[:, 1,            :] .= zero(eltype(fstar1_R))
-  fstar1_R[:, nnodes(dg)+1, :] .= zero(eltype(fstar1_R))
-
-  for j in eachnode(dg), i in 2:nnodes(dg)
-    u_ll = get_node_vars(u, equations, dg, i-1, j, element)
-    u_rr = get_node_vars(u, equations, dg, i,   j, element)
-
-    # Compute conservative part
-    f1 = volume_flux(u_ll, u_rr, 1, equations) # orientation 1: x direction
-
-    # Compute nonconservative part
-    # Note the factor 0.5 necessary for the nonconservative fluxes based on
-    # the interpretation of global SBP operators coupled discontinuously via
-    # central fluxes/SATs
-    f1_L = f1 + 0.5 * nonconservative_flux(u_ll, u_rr, 1, equations)
-    f1_R = f1 + 0.5 * nonconservative_flux(u_rr, u_ll, 1, equations)
-
-    # Copy to temporary storage
-    set_node_vars!(fstar1_L, f1_L, equations, dg, i, j)
-    set_node_vars!(fstar1_R, f1_R, equations, dg, i, j)
-  end
-
-  # Fluxes in y
-  fstar2_L[:, :, 1           ] .= zero(eltype(fstar2_L))
-  fstar2_L[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_L))
-  fstar2_R[:, :, 1           ] .= zero(eltype(fstar2_R))
-  fstar2_R[:, :, nnodes(dg)+1] .= zero(eltype(fstar2_R))
-
-  # Compute inner fluxes
-  for j in 2:nnodes(dg), i in eachnode(dg)
-    u_ll = get_node_vars(u, equations, dg, i, j-1, element)
-    u_rr = get_node_vars(u, equations, dg, i, j,   element)
-
-    # Compute conservative part
-    f2 = volume_flux(u_ll, u_rr, 2, equations) # orientation 2: y direction
-
-    # Compute nonconservative part
-    # Note the factor 0.5 necessary for the nonconservative fluxes based on
-    # the interpretation of global SBP operators coupled discontinuously via
-    # central fluxes/SATs
-    f2_L = f2 + 0.5 * nonconservative_flux(u_ll, u_rr, 2, equations)
-    f2_R = f2 + 0.5 * nonconservative_flux(u_rr, u_ll, 2, equations)
-
-    # Copy to temporary storage
-    set_node_vars!(fstar2_L, f2_L, equations, dg, i, j)
-    set_node_vars!(fstar2_R, f2_R, equations, dg, i, j)
-  end
-
-  return nothing
-end
+    volume_flux, nonconservative_flux = volume_flux_fv
+
+    # Fluxes in x
+    fstar1_L[:, 1, :] .= zero(eltype(fstar1_L))
+    fstar1_L[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_L))
+    fstar1_R[:, 1, :] .= zero(eltype(fstar1_R))
+    fstar1_R[:, nnodes(dg) + 1, :] .= zero(eltype(fstar1_R))
+
+    for j in eachnode(dg), i in 2:nnodes(dg)
+        u_ll = get_node_vars(u, equations, dg, i - 1, j, element)
+        u_rr = get_node_vars(u, equations, dg, i, j, element)
+
+        # Compute conservative part
+        f1 = volume_flux(u_ll, u_rr, 1, equations) # orientation 1: x direction
+
+        # Compute nonconservative part
+        # Note the factor 0.5 necessary for the nonconservative fluxes based on
+        # the interpretation of global SBP operators coupled discontinuously via
+        # central fluxes/SATs
+        f1_L = f1 + 0.5 * nonconservative_flux(u_ll, u_rr, 1, equations)
+        f1_R = f1 + 0.5 * nonconservative_flux(u_rr, u_ll, 1, equations)
+
+        # Copy to temporary storage
+        set_node_vars!(fstar1_L, f1_L, equations, dg, i, j)
+        set_node_vars!(fstar1_R, f1_R, equations, dg, i, j)
+    end
+
+    # Fluxes in y
+    fstar2_L[:, :, 1] .= zero(eltype(fstar2_L))
+    fstar2_L[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_L))
+    fstar2_R[:, :, 1] .= zero(eltype(fstar2_R))
+    fstar2_R[:, :, nnodes(dg) + 1] .= zero(eltype(fstar2_R))
+
+    # Compute inner fluxes
+    for j in 2:nnodes(dg), i in eachnode(dg)
+        u_ll = get_node_vars(u, equations, dg, i, j - 1, element)
+        u_rr = get_node_vars(u, equations, dg, i, j, element)
+
+        # Compute conservative part
+        f2 = volume_flux(u_ll, u_rr, 2, equations) # orientation 2: y direction
 
+        # Compute nonconservative part
+        # Note the factor 0.5 necessary for the nonconservative fluxes based on
+        # the interpretation of global SBP operators coupled discontinuously via
+        # central fluxes/SATs
+        f2_L = f2 + 0.5 * nonconservative_flux(u_ll, u_rr, 2, equations)
+        f2_R = f2 + 0.5 * nonconservative_flux(u_rr, u_ll, 2, equations)
+
+        # Copy to temporary storage
+        set_node_vars!(fstar2_L, f2_L, equations, dg, i, j)
+        set_node_vars!(fstar2_R, f2_R, equations, dg, i, j)
+    end
+
+    return nothing
+end
 
 function prolong2interfaces!(cache, u,
                              mesh::TreeMesh{2}, equations, surface_integral, dg::DG)
-  @unpack interfaces = cache
-  @unpack orientations = interfaces
-
-  @threaded for interface in eachinterface(dg, cache)
-    left_element  = interfaces.neighbor_ids[1, interface]
-    right_element = interfaces.neighbor_ids[2, interface]
-
-    if orientations[interface] == 1
-      # interface in x-direction
-      for j in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[1, v, j, interface] = u[v, nnodes(dg), j, left_element]
-        interfaces.u[2, v, j, interface] = u[v,          1, j, right_element]
-      end
-    else # if orientations[interface] == 2
-      # interface in y-direction
-      for i in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg), left_element]
-        interfaces.u[2, v, i, interface] = u[v, i,          1, right_element]
-      end
+    @unpack interfaces = cache
+    @unpack orientations = interfaces
+
+    @threaded for interface in eachinterface(dg, cache)
+        left_element = interfaces.neighbor_ids[1, interface]
+        right_element = interfaces.neighbor_ids[2, interface]
+
+        if orientations[interface] == 1
+            # interface in x-direction
+            for j in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[1, v, j, interface] = u[v, nnodes(dg), j, left_element]
+                interfaces.u[2, v, j, interface] = u[v, 1, j, right_element]
+            end
+        else # if orientations[interface] == 2
+            # interface in y-direction
+            for i in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg), left_element]
+                interfaces.u[2, v, i, interface] = u[v, i, 1, right_element]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 function calc_interface_flux!(surface_flux_values,
                               mesh::TreeMesh{2},
                               nonconservative_terms::False, equations,
                               surface_integral, dg::DG, cache)
-  @unpack surface_flux = surface_integral
-  @unpack u, neighbor_ids, orientations = cache.interfaces
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Get neighboring elements
-    left_id  = neighbor_ids[1, interface]
-    right_id = neighbor_ids[2, interface]
-
-    # Determine interface direction with respect to elements:
-    # orientation = 1: left -> 2, right -> 1
-    # orientation = 2: left -> 4, right -> 3
-    left_direction  = 2 * orientations[interface]
-    right_direction = 2 * orientations[interface] - 1
-
-    for i in eachnode(dg)
-      # Call pointwise Riemann solver
-      u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface)
-      flux = surface_flux(u_ll, u_rr, orientations[interface], equations)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations)
-        surface_flux_values[v, i, left_direction,  left_id]  = flux[v]
-        surface_flux_values[v, i, right_direction, right_id] = flux[v]
-      end
+    @unpack surface_flux = surface_integral
+    @unpack u, neighbor_ids, orientations = cache.interfaces
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Get neighboring elements
+        left_id = neighbor_ids[1, interface]
+        right_id = neighbor_ids[2, interface]
+
+        # Determine interface direction with respect to elements:
+        # orientation = 1: left -> 2, right -> 1
+        # orientation = 2: left -> 4, right -> 3
+        left_direction = 2 * orientations[interface]
+        right_direction = 2 * orientations[interface] - 1
+
+        for i in eachnode(dg)
+            # Call pointwise Riemann solver
+            u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface)
+            flux = surface_flux(u_ll, u_rr, orientations[interface], equations)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, left_direction, left_id] = flux[v]
+                surface_flux_values[v, i, right_direction, right_id] = flux[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 function calc_interface_flux!(surface_flux_values,
                               mesh::TreeMesh{2},
                               nonconservative_terms::True, equations,
                               surface_integral, dg::DG, cache)
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-  @unpack u, neighbor_ids, orientations = cache.interfaces
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Get neighboring elements
-    left_id  = neighbor_ids[1, interface]
-    right_id = neighbor_ids[2, interface]
-
-    # Determine interface direction with respect to elements:
-    # orientation = 1: left -> 2, right -> 1
-    # orientation = 2: left -> 4, right -> 3
-    left_direction  = 2 * orientations[interface]
-    right_direction = 2 * orientations[interface] - 1
-
-    for i in eachnode(dg)
-      # Call pointwise Riemann solver
-      orientation = orientations[interface]
-      u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface)
-      flux = surface_flux(u_ll, u_rr, orientation, equations)
-
-      # Compute both nonconservative fluxes
-      noncons_left  = nonconservative_flux(u_ll, u_rr, orientation, equations)
-      noncons_right = nonconservative_flux(u_rr, u_ll, orientation, equations)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations)
-        # Note the factor 0.5 necessary for the nonconservative fluxes based on
-        # the interpretation of global SBP operators coupled discontinuously via
-        # central fluxes/SATs
-        surface_flux_values[v, i, left_direction,  left_id]  = flux[v] + 0.5 * noncons_left[v]
-        surface_flux_values[v, i, right_direction, right_id] = flux[v] + 0.5 * noncons_right[v]
-      end
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack u, neighbor_ids, orientations = cache.interfaces
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Get neighboring elements
+        left_id = neighbor_ids[1, interface]
+        right_id = neighbor_ids[2, interface]
+
+        # Determine interface direction with respect to elements:
+        # orientation = 1: left -> 2, right -> 1
+        # orientation = 2: left -> 4, right -> 3
+        left_direction = 2 * orientations[interface]
+        right_direction = 2 * orientations[interface] - 1
+
+        for i in eachnode(dg)
+            # Call pointwise Riemann solver
+            orientation = orientations[interface]
+            u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface)
+            flux = surface_flux(u_ll, u_rr, orientation, equations)
+
+            # Compute both nonconservative fluxes
+            noncons_left = nonconservative_flux(u_ll, u_rr, orientation, equations)
+            noncons_right = nonconservative_flux(u_rr, u_ll, orientation, equations)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations)
+                # Note the factor 0.5 necessary for the nonconservative fluxes based on
+                # the interpretation of global SBP operators coupled discontinuously via
+                # central fluxes/SATs
+                surface_flux_values[v, i, left_direction, left_id] = flux[v] +
+                                                                     0.5 *
+                                                                     noncons_left[v]
+                surface_flux_values[v, i, right_direction, right_id] = flux[v] +
+                                                                       0.5 *
+                                                                       noncons_right[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function prolong2boundaries!(cache, u,
                              mesh::TreeMesh{2}, equations, surface_integral, dg::DG)
-  @unpack boundaries = cache
-  @unpack orientations, neighbor_sides = boundaries
-
-  @threaded for boundary in eachboundary(dg, cache)
-    element = boundaries.neighbor_ids[boundary]
-
-    if orientations[boundary] == 1
-      # boundary in x-direction
-      if neighbor_sides[boundary] == 1
-        # element in -x direction of boundary
-        for l in eachnode(dg), v in eachvariable(equations)
-          boundaries.u[1, v, l, boundary] = u[v, nnodes(dg), l, element]
-        end
-      else # Element in +x direction of boundary
-        for l in eachnode(dg), v in eachvariable(equations)
-          boundaries.u[2, v, l, boundary] = u[v, 1,          l, element]
-        end
-      end
-    else # if orientations[boundary] == 2
-      # boundary in y-direction
-      if neighbor_sides[boundary] == 1
-        # element in -y direction of boundary
-        for l in eachnode(dg), v in eachvariable(equations)
-          boundaries.u[1, v, l, boundary] = u[v, l, nnodes(dg), element]
+    @unpack boundaries = cache
+    @unpack orientations, neighbor_sides = boundaries
+
+    @threaded for boundary in eachboundary(dg, cache)
+        element = boundaries.neighbor_ids[boundary]
+
+        if orientations[boundary] == 1
+            # boundary in x-direction
+            if neighbor_sides[boundary] == 1
+                # element in -x direction of boundary
+                for l in eachnode(dg), v in eachvariable(equations)
+                    boundaries.u[1, v, l, boundary] = u[v, nnodes(dg), l, element]
+                end
+            else # Element in +x direction of boundary
+                for l in eachnode(dg), v in eachvariable(equations)
+                    boundaries.u[2, v, l, boundary] = u[v, 1, l, element]
+                end
+            end
+        else # if orientations[boundary] == 2
+            # boundary in y-direction
+            if neighbor_sides[boundary] == 1
+                # element in -y direction of boundary
+                for l in eachnode(dg), v in eachvariable(equations)
+                    boundaries.u[1, v, l, boundary] = u[v, l, nnodes(dg), element]
+                end
+            else
+                # element in +y direction of boundary
+                for l in eachnode(dg), v in eachvariable(equations)
+                    boundaries.u[2, v, l, boundary] = u[v, l, 1, element]
+                end
+            end
         end
-      else
-        # element in +y direction of boundary
-        for l in eachnode(dg), v in eachvariable(equations)
-          boundaries.u[2, v, l, boundary] = u[v, l, 1,          element]
-        end
-      end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 # TODO: Taal dimension agnostic
 function calc_boundary_flux!(cache, t, boundary_condition::BoundaryConditionPeriodic,
                              mesh::TreeMesh{2}, equations, surface_integral, dg::DG)
-  @assert isempty(eachboundary(dg, cache))
+    @assert isempty(eachboundary(dg, cache))
 end
 
 function calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple,
                              mesh::TreeMesh{2}, equations, surface_integral, dg::DG)
-  @unpack surface_flux_values = cache.elements
-  @unpack n_boundaries_per_direction = cache.boundaries
-
-  # Calculate indices
-  lasts = accumulate(+, n_boundaries_per_direction)
-  firsts = lasts - n_boundaries_per_direction .+ 1
-
-  # Calc boundary fluxes in each direction
-  calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[1],
-                                   have_nonconservative_terms(equations),
-                                   equations, surface_integral, dg, cache,
-                                   1, firsts[1], lasts[1])
-  calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[2],
-                                   have_nonconservative_terms(equations),
-                                   equations, surface_integral, dg, cache,
-                                   2, firsts[2], lasts[2])
-  calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[3],
-                                   have_nonconservative_terms(equations),
-                                   equations, surface_integral, dg, cache,
-                                   3, firsts[3], lasts[3])
-  calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[4],
-                                   have_nonconservative_terms(equations),
-                                   equations, surface_integral, dg, cache,
-                                   4, firsts[4], lasts[4])
+    @unpack surface_flux_values = cache.elements
+    @unpack n_boundaries_per_direction = cache.boundaries
+
+    # Calculate indices
+    lasts = accumulate(+, n_boundaries_per_direction)
+    firsts = lasts - n_boundaries_per_direction .+ 1
+
+    # Calc boundary fluxes in each direction
+    calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[1],
+                                     have_nonconservative_terms(equations),
+                                     equations, surface_integral, dg, cache,
+                                     1, firsts[1], lasts[1])
+    calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[2],
+                                     have_nonconservative_terms(equations),
+                                     equations, surface_integral, dg, cache,
+                                     2, firsts[2], lasts[2])
+    calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[3],
+                                     have_nonconservative_terms(equations),
+                                     equations, surface_integral, dg, cache,
+                                     3, firsts[3], lasts[3])
+    calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[4],
+                                     have_nonconservative_terms(equations),
+                                     equations, surface_integral, dg, cache,
+                                     4, firsts[4], lasts[4])
 end
 
-function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any,4}, t,
-                                          boundary_condition, nonconservative_terms::False, equations,
-                                          surface_integral ,dg::DG, cache,
+function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any, 4},
+                                          t,
+                                          boundary_condition,
+                                          nonconservative_terms::False, equations,
+                                          surface_integral, dg::DG, cache,
                                           direction, first_boundary, last_boundary)
-  @unpack surface_flux = surface_integral
-  @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
-
-  @threaded for boundary in first_boundary:last_boundary
-    # Get neighboring element
-    neighbor = neighbor_ids[boundary]
-
-    for i in eachnode(dg)
-      # Get boundary flux
-      u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, boundary)
-      if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
-        u_inner = u_ll
-      else # Element is on the right, boundary on the left
-        u_inner = u_rr
-      end
-      x = get_node_coords(node_coordinates, equations, dg, i, boundary)
-      flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, surface_flux,
-                                equations)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations)
-        surface_flux_values[v, i, direction, neighbor] = flux[v]
-      end
+    @unpack surface_flux = surface_integral
+    @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
+
+    @threaded for boundary in first_boundary:last_boundary
+        # Get neighboring element
+        neighbor = neighbor_ids[boundary]
+
+        for i in eachnode(dg)
+            # Get boundary flux
+            u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, boundary)
+            if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
+                u_inner = u_ll
+            else # Element is on the right, boundary on the left
+                u_inner = u_rr
+            end
+            x = get_node_coords(node_coordinates, equations, dg, i, boundary)
+            flux = boundary_condition(u_inner, orientations[boundary], direction, x, t,
+                                      surface_flux,
+                                      equations)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, direction, neighbor] = flux[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any,4}, t,
-                                          boundary_condition, nonconservative_terms::True, equations,
+function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any, 4},
+                                          t,
+                                          boundary_condition,
+                                          nonconservative_terms::True, equations,
                                           surface_integral, dg::DG, cache,
                                           direction, first_boundary, last_boundary)
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-  @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
-
-  @threaded for boundary in first_boundary:last_boundary
-  # Get neighboring element
-    neighbor = neighbor_ids[boundary]
-
-    for i in eachnode(dg)
-      # Get boundary flux
-      u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, boundary)
-      if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
-        u_inner = u_ll
-      else # Element is on the right, boundary on the left
-        u_inner = u_rr
-      end
-      x = get_node_coords(node_coordinates, equations, dg, i, boundary)
-      flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, surface_flux,
-                                equations)
-      noncons_flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, nonconservative_flux,
-                                        equations)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations)
-        surface_flux_values[v, i, direction, neighbor] = flux[v] + 0.5 * noncons_flux[v]
-      end
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
+
+    @threaded for boundary in first_boundary:last_boundary
+        # Get neighboring element
+        neighbor = neighbor_ids[boundary]
+
+        for i in eachnode(dg)
+            # Get boundary flux
+            u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, boundary)
+            if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
+                u_inner = u_ll
+            else # Element is on the right, boundary on the left
+                u_inner = u_rr
+            end
+            x = get_node_coords(node_coordinates, equations, dg, i, boundary)
+            flux = boundary_condition(u_inner, orientations[boundary], direction, x, t,
+                                      surface_flux,
+                                      equations)
+            noncons_flux = boundary_condition(u_inner, orientations[boundary],
+                                              direction, x, t, nonconservative_flux,
+                                              equations)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, direction, neighbor] = flux[v] +
+                                                                 0.5 * noncons_flux[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function prolong2mortars!(cache, u,
                           mesh::TreeMesh{2}, equations,
-                          mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DGSEM)
-
-  @threaded for mortar in eachmortar(dg, cache)
-
-    large_element = cache.mortars.neighbor_ids[3, mortar]
-    upper_element = cache.mortars.neighbor_ids[2, mortar]
-    lower_element = cache.mortars.neighbor_ids[1, mortar]
-
-    # Copy solution small to small
-    if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
-      if cache.mortars.orientations[mortar] == 1
-        # L2 mortars in x-direction
-        for l in eachnode(dg)
-          for v in eachvariable(equations)
-            cache.mortars.u_upper[2, v, l, mortar] = u[v, 1, l, upper_element]
-            cache.mortars.u_lower[2, v, l, mortar] = u[v, 1, l, lower_element]
-          end
-        end
-      else
-        # L2 mortars in y-direction
-        for l in eachnode(dg)
-          for v in eachvariable(equations)
-            cache.mortars.u_upper[2, v, l, mortar] = u[v, l, 1, upper_element]
-            cache.mortars.u_lower[2, v, l, mortar] = u[v, l, 1, lower_element]
-          end
-        end
-      end
-    else # large_sides[mortar] == 2 -> small elements on left side
-      if cache.mortars.orientations[mortar] == 1
-        # L2 mortars in x-direction
-        for l in eachnode(dg)
-          for v in eachvariable(equations)
-            cache.mortars.u_upper[1, v, l, mortar] = u[v, nnodes(dg), l, upper_element]
-            cache.mortars.u_lower[1, v, l, mortar] = u[v, nnodes(dg), l, lower_element]
-          end
+                          mortar_l2::LobattoLegendreMortarL2, surface_integral,
+                          dg::DGSEM)
+    @threaded for mortar in eachmortar(dg, cache)
+        large_element = cache.mortars.neighbor_ids[3, mortar]
+        upper_element = cache.mortars.neighbor_ids[2, mortar]
+        lower_element = cache.mortars.neighbor_ids[1, mortar]
+
+        # Copy solution small to small
+        if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                for l in eachnode(dg)
+                    for v in eachvariable(equations)
+                        cache.mortars.u_upper[2, v, l, mortar] = u[v, 1, l,
+                                                                   upper_element]
+                        cache.mortars.u_lower[2, v, l, mortar] = u[v, 1, l,
+                                                                   lower_element]
+                    end
+                end
+            else
+                # L2 mortars in y-direction
+                for l in eachnode(dg)
+                    for v in eachvariable(equations)
+                        cache.mortars.u_upper[2, v, l, mortar] = u[v, l, 1,
+                                                                   upper_element]
+                        cache.mortars.u_lower[2, v, l, mortar] = u[v, l, 1,
+                                                                   lower_element]
+                    end
+                end
+            end
+        else # large_sides[mortar] == 2 -> small elements on left side
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                for l in eachnode(dg)
+                    for v in eachvariable(equations)
+                        cache.mortars.u_upper[1, v, l, mortar] = u[v, nnodes(dg), l,
+                                                                   upper_element]
+                        cache.mortars.u_lower[1, v, l, mortar] = u[v, nnodes(dg), l,
+                                                                   lower_element]
+                    end
+                end
+            else
+                # L2 mortars in y-direction
+                for l in eachnode(dg)
+                    for v in eachvariable(equations)
+                        cache.mortars.u_upper[1, v, l, mortar] = u[v, l, nnodes(dg),
+                                                                   upper_element]
+                        cache.mortars.u_lower[1, v, l, mortar] = u[v, l, nnodes(dg),
+                                                                   lower_element]
+                    end
+                end
+            end
         end
-      else
-        # L2 mortars in y-direction
-        for l in eachnode(dg)
-          for v in eachvariable(equations)
-            cache.mortars.u_upper[1, v, l, mortar] = u[v, l, nnodes(dg), upper_element]
-            cache.mortars.u_lower[1, v, l, mortar] = u[v, l, nnodes(dg), lower_element]
-          end
-        end
-      end
-    end
 
-    # Interpolate large element face data to small interface locations
-    if cache.mortars.large_sides[mortar] == 1 # -> large element on left side
-      leftright = 1
-      if cache.mortars.orientations[mortar] == 1
-        # L2 mortars in x-direction
-        u_large = view(u, :, nnodes(dg), :, large_element)
-        element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large)
-      else
-        # L2 mortars in y-direction
-        u_large = view(u, :, :, nnodes(dg), large_element)
-        element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large)
-      end
-    else # large_sides[mortar] == 2 -> large element on right side
-      leftright = 2
-      if cache.mortars.orientations[mortar] == 1
-        # L2 mortars in x-direction
-        u_large = view(u, :, 1, :, large_element)
-        element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large)
-      else
-        # L2 mortars in y-direction
-        u_large = view(u, :, :, 1, large_element)
-        element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large)
-      end
+        # Interpolate large element face data to small interface locations
+        if cache.mortars.large_sides[mortar] == 1 # -> large element on left side
+            leftright = 1
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                u_large = view(u, :, nnodes(dg), :, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large)
+            else
+                # L2 mortars in y-direction
+                u_large = view(u, :, :, nnodes(dg), large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large)
+            end
+        else # large_sides[mortar] == 2 -> large element on right side
+            leftright = 2
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                u_large = view(u, :, 1, :, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large)
+            else
+                # L2 mortars in y-direction
+                u_large = view(u, :, :, 1, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large)
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-@inline function element_solutions_to_mortars!(mortars, mortar_l2::LobattoLegendreMortarL2, leftright, mortar,
-                                               u_large::AbstractArray{<:Any,2})
-  multiply_dimensionwise!(view(mortars.u_upper, leftright, :, :, mortar), mortar_l2.forward_upper, u_large)
-  multiply_dimensionwise!(view(mortars.u_lower, leftright, :, :, mortar), mortar_l2.forward_lower, u_large)
-  return nothing
+@inline function element_solutions_to_mortars!(mortars,
+                                               mortar_l2::LobattoLegendreMortarL2,
+                                               leftright, mortar,
+                                               u_large::AbstractArray{<:Any, 2})
+    multiply_dimensionwise!(view(mortars.u_upper, leftright, :, :, mortar),
+                            mortar_l2.forward_upper, u_large)
+    multiply_dimensionwise!(view(mortars.u_lower, leftright, :, :, mortar),
+                            mortar_l2.forward_lower, u_large)
+    return nothing
 end
 
-
 function calc_mortar_flux!(surface_flux_values,
                            mesh::TreeMesh{2},
                            nonconservative_terms::False, equations,
                            mortar_l2::LobattoLegendreMortarL2,
                            surface_integral, dg::DG, cache)
-  @unpack surface_flux = surface_integral
-  @unpack u_lower, u_upper, orientations = cache.mortars
-  @unpack fstar_upper_threaded, fstar_lower_threaded = cache
-
-  @threaded for mortar in eachmortar(dg, cache)
-    # Choose thread-specific pre-allocated container
-    fstar_upper = fstar_upper_threaded[Threads.threadid()]
-    fstar_lower = fstar_lower_threaded[Threads.threadid()]
-
-    # Calculate fluxes
-    orientation = orientations[mortar]
-    calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar, orientation)
-    calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar, orientation)
-
-    mortar_fluxes_to_elements!(surface_flux_values,
-                               mesh, equations, mortar_l2, dg, cache,
-                               mortar, fstar_upper, fstar_lower)
-  end
-
-  return nothing
+    @unpack surface_flux = surface_integral
+    @unpack u_lower, u_upper, orientations = cache.mortars
+    @unpack fstar_upper_threaded, fstar_lower_threaded = cache
+
+    @threaded for mortar in eachmortar(dg, cache)
+        # Choose thread-specific pre-allocated container
+        fstar_upper = fstar_upper_threaded[Threads.threadid()]
+        fstar_lower = fstar_lower_threaded[Threads.threadid()]
+
+        # Calculate fluxes
+        orientation = orientations[mortar]
+        calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar,
+                    orientation)
+        calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar,
+                    orientation)
+
+        mortar_fluxes_to_elements!(surface_flux_values,
+                                   mesh, equations, mortar_l2, dg, cache,
+                                   mortar, fstar_upper, fstar_lower)
+    end
+
+    return nothing
 end
 
 function calc_mortar_flux!(surface_flux_values,
@@ -854,76 +916,88 @@ function calc_mortar_flux!(surface_flux_values,
                            nonconservative_terms::True, equations,
                            mortar_l2::LobattoLegendreMortarL2,
                            surface_integral, dg::DG, cache)
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-  @unpack u_lower, u_upper, orientations, large_sides = cache.mortars
-  @unpack fstar_upper_threaded, fstar_lower_threaded = cache
-
-  @threaded for mortar in eachmortar(dg, cache)
-    # Choose thread-specific pre-allocated container
-    fstar_upper = fstar_upper_threaded[Threads.threadid()]
-    fstar_lower = fstar_lower_threaded[Threads.threadid()]
-
-    # Calculate fluxes
-    orientation = orientations[mortar]
-    calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar, orientation)
-    calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar, orientation)
-
-    # Add nonconservative fluxes.
-    # These need to be adapted on the geometry (left/right) since the order of
-    # the arguments matters, based on the global SBP operator interpretation.
-    # The same interpretation (global SBP operators coupled discontinuously via
-    # central fluxes/SATs) explains why we need the factor 0.5.
-    # Alternatively, you can also follow the argumentation of Bohm et al. 2018
-    # ("nonconservative diamond flux")
-    if large_sides[mortar] == 1 # -> small elements on right side
-      for i in eachnode(dg)
-        # Pull the left and right solutions
-        u_upper_ll, u_upper_rr = get_surface_node_vars(u_upper, equations, dg, i, mortar)
-        u_lower_ll, u_lower_rr = get_surface_node_vars(u_lower, equations, dg, i, mortar)
-        # Call pointwise nonconservative term
-        noncons_upper = nonconservative_flux(u_upper_ll, u_upper_rr, orientation, equations)
-        noncons_lower = nonconservative_flux(u_lower_ll, u_lower_rr, orientation, equations)
-        # Add to primary and secondary temporary storage
-        multiply_add_to_node_vars!(fstar_upper, 0.5, noncons_upper, equations, dg, i)
-        multiply_add_to_node_vars!(fstar_lower, 0.5, noncons_lower, equations, dg, i)
-      end
-    else # large_sides[mortar] == 2 -> small elements on the left
-      for i in eachnode(dg)
-        # Pull the left and right solutions
-        u_upper_ll, u_upper_rr = get_surface_node_vars(u_upper, equations, dg, i, mortar)
-        u_lower_ll, u_lower_rr = get_surface_node_vars(u_lower, equations, dg, i, mortar)
-        # Call pointwise nonconservative term
-        noncons_upper = nonconservative_flux(u_upper_rr, u_upper_ll, orientation, equations)
-        noncons_lower = nonconservative_flux(u_lower_rr, u_lower_ll, orientation, equations)
-        # Add to primary and secondary temporary storage
-        multiply_add_to_node_vars!(fstar_upper, 0.5, noncons_upper, equations, dg, i)
-        multiply_add_to_node_vars!(fstar_lower, 0.5, noncons_lower, equations, dg, i)
-      end
-    end
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack u_lower, u_upper, orientations, large_sides = cache.mortars
+    @unpack fstar_upper_threaded, fstar_lower_threaded = cache
+
+    @threaded for mortar in eachmortar(dg, cache)
+        # Choose thread-specific pre-allocated container
+        fstar_upper = fstar_upper_threaded[Threads.threadid()]
+        fstar_lower = fstar_lower_threaded[Threads.threadid()]
+
+        # Calculate fluxes
+        orientation = orientations[mortar]
+        calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar,
+                    orientation)
+        calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar,
+                    orientation)
+
+        # Add nonconservative fluxes.
+        # These need to be adapted on the geometry (left/right) since the order of
+        # the arguments matters, based on the global SBP operator interpretation.
+        # The same interpretation (global SBP operators coupled discontinuously via
+        # central fluxes/SATs) explains why we need the factor 0.5.
+        # Alternatively, you can also follow the argumentation of Bohm et al. 2018
+        # ("nonconservative diamond flux")
+        if large_sides[mortar] == 1 # -> small elements on right side
+            for i in eachnode(dg)
+                # Pull the left and right solutions
+                u_upper_ll, u_upper_rr = get_surface_node_vars(u_upper, equations, dg,
+                                                               i, mortar)
+                u_lower_ll, u_lower_rr = get_surface_node_vars(u_lower, equations, dg,
+                                                               i, mortar)
+                # Call pointwise nonconservative term
+                noncons_upper = nonconservative_flux(u_upper_ll, u_upper_rr,
+                                                     orientation, equations)
+                noncons_lower = nonconservative_flux(u_lower_ll, u_lower_rr,
+                                                     orientation, equations)
+                # Add to primary and secondary temporary storage
+                multiply_add_to_node_vars!(fstar_upper, 0.5, noncons_upper, equations,
+                                           dg, i)
+                multiply_add_to_node_vars!(fstar_lower, 0.5, noncons_lower, equations,
+                                           dg, i)
+            end
+        else # large_sides[mortar] == 2 -> small elements on the left
+            for i in eachnode(dg)
+                # Pull the left and right solutions
+                u_upper_ll, u_upper_rr = get_surface_node_vars(u_upper, equations, dg,
+                                                               i, mortar)
+                u_lower_ll, u_lower_rr = get_surface_node_vars(u_lower, equations, dg,
+                                                               i, mortar)
+                # Call pointwise nonconservative term
+                noncons_upper = nonconservative_flux(u_upper_rr, u_upper_ll,
+                                                     orientation, equations)
+                noncons_lower = nonconservative_flux(u_lower_rr, u_lower_ll,
+                                                     orientation, equations)
+                # Add to primary and secondary temporary storage
+                multiply_add_to_node_vars!(fstar_upper, 0.5, noncons_upper, equations,
+                                           dg, i)
+                multiply_add_to_node_vars!(fstar_lower, 0.5, noncons_lower, equations,
+                                           dg, i)
+            end
+        end
 
-    mortar_fluxes_to_elements!(surface_flux_values,
-                               mesh, equations, mortar_l2, dg, cache,
-                               mortar, fstar_upper, fstar_lower)
-  end
+        mortar_fluxes_to_elements!(surface_flux_values,
+                                   mesh, equations, mortar_l2, dg, cache,
+                                   mortar, fstar_upper, fstar_lower)
+    end
 
-  return nothing
+    return nothing
 end
 
-
-@inline function calc_fstar!(destination::AbstractArray{<:Any,2}, equations,
+@inline function calc_fstar!(destination::AbstractArray{<:Any, 2}, equations,
                              surface_flux, dg::DGSEM,
                              u_interfaces, interface, orientation)
+    for i in eachnode(dg)
+        # Call pointwise two-point numerical flux function
+        u_ll, u_rr = get_surface_node_vars(u_interfaces, equations, dg, i, interface)
+        flux = surface_flux(u_ll, u_rr, orientation, equations)
 
-  for i in eachnode(dg)
-    # Call pointwise two-point numerical flux function
-    u_ll, u_rr = get_surface_node_vars(u_interfaces, equations, dg, i, interface)
-    flux = surface_flux(u_ll, u_rr, orientation, equations)
-
-    # Copy flux to left and right element storage
-    set_node_vars!(destination, flux, equations, dg, i)
-  end
+        # Copy flux to left and right element storage
+        set_node_vars!(destination, flux, equations, dg, i)
+    end
 
-  return nothing
+    return nothing
 end
 
 @inline function mortar_fluxes_to_elements!(surface_flux_values,
@@ -931,150 +1005,148 @@ end
                                             mortar_l2::LobattoLegendreMortarL2,
                                             dg::DGSEM, cache,
                                             mortar, fstar_upper, fstar_lower)
-  large_element = cache.mortars.neighbor_ids[3, mortar]
-  upper_element = cache.mortars.neighbor_ids[2, mortar]
-  lower_element = cache.mortars.neighbor_ids[1, mortar]
-
-  # Copy flux small to small
-  if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
-    if cache.mortars.orientations[mortar] == 1
-      # L2 mortars in x-direction
-      direction = 1
-    else
-      # L2 mortars in y-direction
-      direction = 3
-    end
-  else # large_sides[mortar] == 2 -> small elements on left side
-    if cache.mortars.orientations[mortar] == 1
-      # L2 mortars in x-direction
-      direction = 2
-    else
-      # L2 mortars in y-direction
-      direction = 4
-    end
-  end
-  surface_flux_values[:, :, direction, upper_element] .= fstar_upper
-  surface_flux_values[:, :, direction, lower_element] .= fstar_lower
-
-  # Project small fluxes to large element
-  if cache.mortars.large_sides[mortar] == 1 # -> large element on left side
-    if cache.mortars.orientations[mortar] == 1
-      # L2 mortars in x-direction
-      direction = 2
-    else
-      # L2 mortars in y-direction
-      direction = 4
+    large_element = cache.mortars.neighbor_ids[3, mortar]
+    upper_element = cache.mortars.neighbor_ids[2, mortar]
+    lower_element = cache.mortars.neighbor_ids[1, mortar]
+
+    # Copy flux small to small
+    if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
+        if cache.mortars.orientations[mortar] == 1
+            # L2 mortars in x-direction
+            direction = 1
+        else
+            # L2 mortars in y-direction
+            direction = 3
+        end
+    else # large_sides[mortar] == 2 -> small elements on left side
+        if cache.mortars.orientations[mortar] == 1
+            # L2 mortars in x-direction
+            direction = 2
+        else
+            # L2 mortars in y-direction
+            direction = 4
+        end
     end
-  else # large_sides[mortar] == 2 -> large element on right side
-    if cache.mortars.orientations[mortar] == 1
-      # L2 mortars in x-direction
-      direction = 1
-    else
-      # L2 mortars in y-direction
-      direction = 3
+    surface_flux_values[:, :, direction, upper_element] .= fstar_upper
+    surface_flux_values[:, :, direction, lower_element] .= fstar_lower
+
+    # Project small fluxes to large element
+    if cache.mortars.large_sides[mortar] == 1 # -> large element on left side
+        if cache.mortars.orientations[mortar] == 1
+            # L2 mortars in x-direction
+            direction = 2
+        else
+            # L2 mortars in y-direction
+            direction = 4
+        end
+    else # large_sides[mortar] == 2 -> large element on right side
+        if cache.mortars.orientations[mortar] == 1
+            # L2 mortars in x-direction
+            direction = 1
+        else
+            # L2 mortars in y-direction
+            direction = 3
+        end
     end
-  end
-
-  # TODO: Taal performance
-  # for v in eachvariable(equations)
-  #   # The code below is semantically equivalent to
-  #   # surface_flux_values[v, :, direction, large_element] .=
-  #   #   (mortar_l2.reverse_upper * fstar_upper[v, :] + mortar_l2.reverse_lower * fstar_lower[v, :])
-  #   # but faster and does not allocate.
-  #   # Note that `true * some_float == some_float` in Julia, i.e. `true` acts as
-  #   # a universal `one`. Hence, the second `mul!` means "add the matrix-vector
-  #   # product to the current value of the destination".
-  #   @views mul!(surface_flux_values[v, :, direction, large_element],
-  #               mortar_l2.reverse_upper, fstar_upper[v, :])
-  #   @views mul!(surface_flux_values[v, :, direction, large_element],
-  #               mortar_l2.reverse_lower,  fstar_lower[v, :], true, true)
-  # end
-  # The code above could be replaced by the following code. However, the relative efficiency
-  # depends on the types of fstar_upper/fstar_lower and dg.l2mortar_reverse_upper.
-  # Using StaticArrays for both makes the code above faster for common test cases.
-  multiply_dimensionwise!(
-    view(surface_flux_values, :, :, direction, large_element), mortar_l2.reverse_upper, fstar_upper,
-                                                               mortar_l2.reverse_lower, fstar_lower)
-
-  return nothing
-end
 
+    # TODO: Taal performance
+    # for v in eachvariable(equations)
+    #   # The code below is semantically equivalent to
+    #   # surface_flux_values[v, :, direction, large_element] .=
+    #   #   (mortar_l2.reverse_upper * fstar_upper[v, :] + mortar_l2.reverse_lower * fstar_lower[v, :])
+    #   # but faster and does not allocate.
+    #   # Note that `true * some_float == some_float` in Julia, i.e. `true` acts as
+    #   # a universal `one`. Hence, the second `mul!` means "add the matrix-vector
+    #   # product to the current value of the destination".
+    #   @views mul!(surface_flux_values[v, :, direction, large_element],
+    #               mortar_l2.reverse_upper, fstar_upper[v, :])
+    #   @views mul!(surface_flux_values[v, :, direction, large_element],
+    #               mortar_l2.reverse_lower,  fstar_lower[v, :], true, true)
+    # end
+    # The code above could be replaced by the following code. However, the relative efficiency
+    # depends on the types of fstar_upper/fstar_lower and dg.l2mortar_reverse_upper.
+    # Using StaticArrays for both makes the code above faster for common test cases.
+    multiply_dimensionwise!(view(surface_flux_values, :, :, direction, large_element),
+                            mortar_l2.reverse_upper, fstar_upper,
+                            mortar_l2.reverse_lower, fstar_lower)
+
+    return nothing
+end
 
 function calc_surface_integral!(du, u, mesh::Union{TreeMesh{2}, StructuredMesh{2}},
                                 equations, surface_integral::SurfaceIntegralWeakForm,
                                 dg::DG, cache)
-  @unpack boundary_interpolation = dg.basis
-  @unpack surface_flux_values = cache.elements
-
-  # Note that all fluxes have been computed with outward-pointing normal vectors.
-  # Access the factors only once before beginning the loop to increase performance.
-  # We also use explicit assignments instead of `+=` to let `@muladd` turn these
-  # into FMAs (see comment at the top of the file).
-  factor_1 = boundary_interpolation[1,          1]
-  factor_2 = boundary_interpolation[nnodes(dg), 2]
-  @threaded for element in eachelement(dg, cache)
-    for l in eachnode(dg)
-      for v in eachvariable(equations)
-        # surface at -x
-        du[v, 1,          l, element] = (
-          du[v, 1,          l, element] - surface_flux_values[v, l, 1, element] * factor_1)
-
-        # surface at +x
-        du[v, nnodes(dg), l, element] = (
-          du[v, nnodes(dg), l, element] + surface_flux_values[v, l, 2, element] * factor_2)
-
-        # surface at -y
-        du[v, l, 1,          element] = (
-          du[v, l, 1,          element] - surface_flux_values[v, l, 3, element] * factor_1)
-
-        # surface at +y
-        du[v, l, nnodes(dg), element] = (
-          du[v, l, nnodes(dg), element] + surface_flux_values[v, l, 4, element] * factor_2)
-      end
+    @unpack boundary_interpolation = dg.basis
+    @unpack surface_flux_values = cache.elements
+
+    # Note that all fluxes have been computed with outward-pointing normal vectors.
+    # Access the factors only once before beginning the loop to increase performance.
+    # We also use explicit assignments instead of `+=` to let `@muladd` turn these
+    # into FMAs (see comment at the top of the file).
+    factor_1 = boundary_interpolation[1, 1]
+    factor_2 = boundary_interpolation[nnodes(dg), 2]
+    @threaded for element in eachelement(dg, cache)
+        for l in eachnode(dg)
+            for v in eachvariable(equations)
+                # surface at -x
+                du[v, 1, l, element] = (du[v, 1, l, element] -
+                                        surface_flux_values[v, l, 1, element] *
+                                        factor_1)
+
+                # surface at +x
+                du[v, nnodes(dg), l, element] = (du[v, nnodes(dg), l, element] +
+                                                 surface_flux_values[v, l, 2, element] *
+                                                 factor_2)
+
+                # surface at -y
+                du[v, l, 1, element] = (du[v, l, 1, element] -
+                                        surface_flux_values[v, l, 3, element] *
+                                        factor_1)
+
+                # surface at +y
+                du[v, l, nnodes(dg), element] = (du[v, l, nnodes(dg), element] +
+                                                 surface_flux_values[v, l, 4, element] *
+                                                 factor_2)
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function apply_jacobian!(du, mesh::TreeMesh{2},
                          equations, dg::DG, cache)
+    @threaded for element in eachelement(dg, cache)
+        factor = -cache.elements.inverse_jacobian[element]
 
-  @threaded for element in eachelement(dg, cache)
-    factor = -cache.elements.inverse_jacobian[element]
-
-    for j in eachnode(dg), i in eachnode(dg)
-      for v in eachvariable(equations)
-        du[v, i, j, element] *= factor
-      end
+        for j in eachnode(dg), i in eachnode(dg)
+            for v in eachvariable(equations)
+                du[v, i, j, element] *= factor
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # TODO: Taal dimension agnostic
 function calc_sources!(du, u, t, source_terms::Nothing,
                        equations::AbstractEquations{2}, dg::DG, cache)
-  return nothing
+    return nothing
 end
 
 function calc_sources!(du, u, t, source_terms,
                        equations::AbstractEquations{2}, dg::DG, cache)
-
-  @threaded for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, j, element)
-      x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, element)
-      du_local = source_terms(u_local, x_local, t, equations)
-      add_to_node_vars!(du, du_local, equations, dg, i, j, element)
+    @threaded for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, j, element)
+            x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i,
+                                      j, element)
+            du_local = source_terms(u_local, x_local, t, equations)
+            add_to_node_vars!(du, du_local, equations, dg, i, j, element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl b/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl
index 9d6e3c452b0..50b1e8cb5b4 100644
--- a/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl
+++ b/src/solvers/dgsem_tree/dg_2d_compressible_euler.jl
@@ -3,51 +3,50 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Calculate the vorticity on a single node using the derivative matrix from the polynomial basis of
 # a DGSEM solver. `u` is the solution on the whole domain.
 # This function is used for calculating acoustic source terms for coupled Euler-acoustics
 # simulations.
-function calc_vorticity_node(u, mesh::TreeMesh{2}, equations::CompressibleEulerEquations2D,
+function calc_vorticity_node(u, mesh::TreeMesh{2},
+                             equations::CompressibleEulerEquations2D,
                              dg::DGSEM, cache, i, j, element)
-  @unpack derivative_matrix = dg.basis
-
-  v2_x = zero(eltype(u)) # derivative of v2 in x direction
-  for ii in eachnode(dg)
-    rho, _, rho_v2 = get_node_vars(u, equations, dg, ii, j, element)
-    v2 = rho_v2 / rho
-    v2_x = v2_x + derivative_matrix[i, ii] * v2
-  end
-
-  v1_y = zero(eltype(u)) # derivative of v1 in y direction
-  for jj in eachnode(dg)
-    rho, rho_v1 = get_node_vars(u, equations, dg, i, jj, element)
-    v1 = rho_v1 / rho
-    v1_y = v1_y + derivative_matrix[j, jj] * v1
-  end
-
-  return (v2_x - v1_y) * cache.elements.inverse_jacobian[element]
+    @unpack derivative_matrix = dg.basis
+
+    v2_x = zero(eltype(u)) # derivative of v2 in x direction
+    for ii in eachnode(dg)
+        rho, _, rho_v2 = get_node_vars(u, equations, dg, ii, j, element)
+        v2 = rho_v2 / rho
+        v2_x = v2_x + derivative_matrix[i, ii] * v2
+    end
+
+    v1_y = zero(eltype(u)) # derivative of v1 in y direction
+    for jj in eachnode(dg)
+        rho, rho_v1 = get_node_vars(u, equations, dg, i, jj, element)
+        v1 = rho_v1 / rho
+        v1_y = v1_y + derivative_matrix[j, jj] * v1
+    end
+
+    return (v2_x - v1_y) * cache.elements.inverse_jacobian[element]
 end
 
 # Convenience function for calculating the vorticity on the whole domain and storing it in a
 # preallocated array
-function calc_vorticity!(vorticity, u, mesh::TreeMesh{2}, equations::CompressibleEulerEquations2D,
+function calc_vorticity!(vorticity, u, mesh::TreeMesh{2},
+                         equations::CompressibleEulerEquations2D,
                          dg::DGSEM, cache)
-  @threaded for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      vorticity[i, j, element] = calc_vorticity_node(u, mesh, equations, dg, cache, i, j, element)
+    @threaded for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            vorticity[i, j, element] = calc_vorticity_node(u, mesh, equations, dg,
+                                                           cache, i, j, element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
 end # muladd
 
-
-
 # From here on, this file contains specializations of DG methods on the
 # TreeMesh2D to the compressible Euler equations.
 #
@@ -62,7 +61,6 @@ end # muladd
 # We do not wrap this code in `@muladd begin ... end` block. Optimizations like
 # this are handled automatically by LoopVectorization.jl.
 
-
 # We specialize on `PtrArray` since these will be returned by `Trixi.wrap_array`
 # if LoopVectorization.jl can handle the array types. This ensures that `@turbo`
 # works efficiently here.
@@ -72,161 +70,161 @@ end # muladd
                                            equations::CompressibleEulerEquations2D,
                                            volume_flux::typeof(flux_shima_etal_turbo),
                                            dg::DGSEM, cache, alpha)
-  @unpack derivative_split = dg.basis
-
-  # Create a temporary array that will be used to store the RHS with permuted
-  # indices `[i, j, v]` to allow using SIMD instructions.
-  # `StrideArray`s with purely static dimensions do not allocate on the heap.
-  du = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations))))
-
-  # Convert conserved to primitive variables on the given `element`.
-  u_prim = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations))))
-
-  @turbo for j in eachnode(dg), i in eachnode(dg)
-    rho    = u_cons[1, i, j, element]
-    rho_v1 = u_cons[2, i, j, element]
-    rho_v2 = u_cons[3, i, j, element]
-    rho_e  = u_cons[4, i, j, element]
-
-    v1 = rho_v1 / rho
-    v2 = rho_v2 / rho
-    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
-
-    u_prim[i, j, 1] = rho
-    u_prim[i, j, 2] = v1
-    u_prim[i, j, 3] = v2
-    u_prim[i, j, 4] = p
-  end
-
-
-  # x direction
-  # At first, we create new temporary arrays with permuted memory layout to
-  # allow using SIMD instructions along the first dimension (which is contiguous
-  # in memory).
-  du_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations))))
-
-  u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations))))
-
-  @turbo for v in eachvariable(equations),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    u_prim_permuted[j, i, v] = u_prim[i, j, v]
-  end
-  fill!(du_permuted, zero(eltype(du_permuted)))
-
-  # Next, we basically inline the volume flux. To allow SIMD vectorization and
-  # still use the symmetry of the volume flux and the derivative matrix, we
-  # loop over the triangular part in an outer loop and use a plain inner loop.
-  for i in eachnode(dg), ii in (i+1):nnodes(dg)
-    @turbo for j in eachnode(dg)
-      rho_ll = u_prim_permuted[j, i, 1]
-      v1_ll  = u_prim_permuted[j, i, 2]
-      v2_ll  = u_prim_permuted[j, i, 3]
-      p_ll   = u_prim_permuted[j, i, 4]
-
-      rho_rr = u_prim_permuted[j, ii, 1]
-      v1_rr  = u_prim_permuted[j, ii, 2]
-      v2_rr  = u_prim_permuted[j, ii, 3]
-      p_rr   = u_prim_permuted[j, ii, 4]
-
-      # Compute required mean values
-      rho_avg = 0.5 * (rho_ll + rho_rr)
-      v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-      v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-      p_avg   = 0.5 * (  p_ll +   p_rr)
-      kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
-      pv1_avg = 0.5 * (p_ll * v1_rr + p_rr * v1_ll)
-
-      # Calculate fluxes depending on Cartesian orientation
-      f1 = rho_avg * v1_avg
-      f2 = f1 * v1_avg + p_avg
-      f3 = f1 * v2_avg
-      f4 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg
-
-      # Add scaled fluxes to RHS
-      factor_i = alpha * derivative_split[i, ii]
-      du_permuted[j, i, 1] += factor_i * f1
-      du_permuted[j, i, 2] += factor_i * f2
-      du_permuted[j, i, 3] += factor_i * f3
-      du_permuted[j, i, 4] += factor_i * f4
-
-      factor_ii = alpha * derivative_split[ii, i]
-      du_permuted[j, ii, 1] += factor_ii * f1
-      du_permuted[j, ii, 2] += factor_ii * f2
-      du_permuted[j, ii, 3] += factor_ii * f3
-      du_permuted[j, ii, 4] += factor_ii * f4
+    @unpack derivative_split = dg.basis
+
+    # Create a temporary array that will be used to store the RHS with permuted
+    # indices `[i, j, v]` to allow using SIMD instructions.
+    # `StrideArray`s with purely static dimensions do not allocate on the heap.
+    du = StrideArray{eltype(u_cons)}(undef,
+                                     (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
+                                      StaticInt(nvariables(equations))))
+
+    # Convert conserved to primitive variables on the given `element`.
+    u_prim = StrideArray{eltype(u_cons)}(undef,
+                                         (ntuple(_ -> StaticInt(nnodes(dg)),
+                                                 ndims(mesh))...,
+                                          StaticInt(nvariables(equations))))
+
+    @turbo for j in eachnode(dg), i in eachnode(dg)
+        rho = u_cons[1, i, j, element]
+        rho_v1 = u_cons[2, i, j, element]
+        rho_v2 = u_cons[3, i, j, element]
+        rho_e = u_cons[4, i, j, element]
+
+        v1 = rho_v1 / rho
+        v2 = rho_v2 / rho
+        p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
+
+        u_prim[i, j, 1] = rho
+        u_prim[i, j, 2] = v1
+        u_prim[i, j, 3] = v2
+        u_prim[i, j, 4] = p
+    end
+
+    # x direction
+    # At first, we create new temporary arrays with permuted memory layout to
+    # allow using SIMD instructions along the first dimension (which is contiguous
+    # in memory).
+    du_permuted = StrideArray{eltype(u_cons)}(undef,
+                                              (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
+                                               StaticInt(nvariables(equations))))
+
+    u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
+                                                  (StaticInt(nnodes(dg)),
+                                                   StaticInt(nnodes(dg)),
+                                                   StaticInt(nvariables(equations))))
+
+    @turbo for v in eachvariable(equations),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        u_prim_permuted[j, i, v] = u_prim[i, j, v]
     end
-  end
-
-  @turbo for v in eachvariable(equations),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    du[i, j, v] = du_permuted[j, i, v]
-  end
-
-
-  # y direction
-  # The memory layout is already optimal for SIMD vectorization in this loop.
-  for j in eachnode(dg), jj in (j+1):nnodes(dg)
-    @turbo for i in eachnode(dg)
-      rho_ll = u_prim[i, j, 1]
-      v1_ll  = u_prim[i, j, 2]
-      v2_ll  = u_prim[i, j, 3]
-      p_ll   = u_prim[i, j, 4]
-
-      rho_rr = u_prim[i, jj, 1]
-      v1_rr  = u_prim[i, jj, 2]
-      v2_rr  = u_prim[i, jj, 3]
-      p_rr   = u_prim[i, jj, 4]
-
-      # Compute required mean values
-      rho_avg = 0.5 * (rho_ll + rho_rr)
-      v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-      v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-      p_avg   = 0.5 * (  p_ll +   p_rr)
-      kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
-      pv2_avg = 0.5 * (p_ll * v2_rr + p_rr * v2_ll)
-
-      # Calculate fluxes depending on Cartesian orientation
-      f1 = rho_avg * v2_avg
-      f2 = f1 * v1_avg
-      f3 = f1 * v2_avg + p_avg
-      f4 = p_avg*v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg
-
-      # Add scaled fluxes to RHS
-      factor_j = alpha * derivative_split[j, jj]
-      du[i, j, 1] += factor_j * f1
-      du[i, j, 2] += factor_j * f2
-      du[i, j, 3] += factor_j * f3
-      du[i, j, 4] += factor_j * f4
-
-      factor_jj = alpha * derivative_split[jj, j]
-      du[i, jj, 1] += factor_jj * f1
-      du[i, jj, 2] += factor_jj * f2
-      du[i, jj, 3] += factor_jj * f3
-      du[i, jj, 4] += factor_jj * f4
+    fill!(du_permuted, zero(eltype(du_permuted)))
+
+    # Next, we basically inline the volume flux. To allow SIMD vectorization and
+    # still use the symmetry of the volume flux and the derivative matrix, we
+    # loop over the triangular part in an outer loop and use a plain inner loop.
+    for i in eachnode(dg), ii in (i + 1):nnodes(dg)
+        @turbo for j in eachnode(dg)
+            rho_ll = u_prim_permuted[j, i, 1]
+            v1_ll = u_prim_permuted[j, i, 2]
+            v2_ll = u_prim_permuted[j, i, 3]
+            p_ll = u_prim_permuted[j, i, 4]
+
+            rho_rr = u_prim_permuted[j, ii, 1]
+            v1_rr = u_prim_permuted[j, ii, 2]
+            v2_rr = u_prim_permuted[j, ii, 3]
+            p_rr = u_prim_permuted[j, ii, 4]
+
+            # Compute required mean values
+            rho_avg = 0.5 * (rho_ll + rho_rr)
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
+            pv1_avg = 0.5 * (p_ll * v1_rr + p_rr * v1_ll)
+
+            # Calculate fluxes depending on Cartesian orientation
+            f1 = rho_avg * v1_avg
+            f2 = f1 * v1_avg + p_avg
+            f3 = f1 * v2_avg
+            f4 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg
+
+            # Add scaled fluxes to RHS
+            factor_i = alpha * derivative_split[i, ii]
+            du_permuted[j, i, 1] += factor_i * f1
+            du_permuted[j, i, 2] += factor_i * f2
+            du_permuted[j, i, 3] += factor_i * f3
+            du_permuted[j, i, 4] += factor_i * f4
+
+            factor_ii = alpha * derivative_split[ii, i]
+            du_permuted[j, ii, 1] += factor_ii * f1
+            du_permuted[j, ii, 2] += factor_ii * f2
+            du_permuted[j, ii, 3] += factor_ii * f3
+            du_permuted[j, ii, 4] += factor_ii * f4
+        end
     end
-  end
 
+    @turbo for v in eachvariable(equations),
+               j in eachnode(dg),
+               i in eachnode(dg)
 
-  # Finally, we add the temporary RHS computed here to the global RHS in the
-  # given `element`.
-  @turbo for v in eachvariable(equations),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    _du[v, i, j, element] += du[i, j, v]
-  end
-end
+        du[i, j, v] = du_permuted[j, i, v]
+    end
+
+    # y direction
+    # The memory layout is already optimal for SIMD vectorization in this loop.
+    for j in eachnode(dg), jj in (j + 1):nnodes(dg)
+        @turbo for i in eachnode(dg)
+            rho_ll = u_prim[i, j, 1]
+            v1_ll = u_prim[i, j, 2]
+            v2_ll = u_prim[i, j, 3]
+            p_ll = u_prim[i, j, 4]
+
+            rho_rr = u_prim[i, jj, 1]
+            v1_rr = u_prim[i, jj, 2]
+            v2_rr = u_prim[i, jj, 3]
+            p_rr = u_prim[i, jj, 4]
+
+            # Compute required mean values
+            rho_avg = 0.5 * (rho_ll + rho_rr)
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
+            pv2_avg = 0.5 * (p_ll * v2_rr + p_rr * v2_ll)
+
+            # Calculate fluxes depending on Cartesian orientation
+            f1 = rho_avg * v2_avg
+            f2 = f1 * v1_avg
+            f3 = f1 * v2_avg + p_avg
+            f4 = p_avg * v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg
+
+            # Add scaled fluxes to RHS
+            factor_j = alpha * derivative_split[j, jj]
+            du[i, j, 1] += factor_j * f1
+            du[i, j, 2] += factor_j * f2
+            du[i, j, 3] += factor_j * f3
+            du[i, j, 4] += factor_j * f4
+
+            factor_jj = alpha * derivative_split[jj, j]
+            du[i, jj, 1] += factor_jj * f1
+            du[i, jj, 2] += factor_jj * f2
+            du[i, jj, 3] += factor_jj * f3
+            du[i, jj, 4] += factor_jj * f4
+        end
+    end
 
+    # Finally, we add the temporary RHS computed here to the global RHS in the
+    # given `element`.
+    @turbo for v in eachvariable(equations),
+               j in eachnode(dg),
+               i in eachnode(dg)
 
+        _du[v, i, j, element] += du[i, j, v]
+    end
+end
 
 @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray,
                                            element, mesh::TreeMesh{2},
@@ -234,225 +232,231 @@ end
                                            equations::CompressibleEulerEquations2D,
                                            volume_flux::typeof(flux_ranocha_turbo),
                                            dg::DGSEM, cache, alpha)
-  @unpack derivative_split = dg.basis
-
-  # Create a temporary array that will be used to store the RHS with permuted
-  # indices `[i, j, v]` to allow using SIMD instructions.
-  # `StrideArray`s with purely static dimensions do not allocate on the heap.
-  du = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations))))
-
-  # Convert conserved to primitive variables on the given `element`. In addition
-  # to the usual primitive variables, we also compute logarithms of the density
-  # and pressure to increase the performance of the required logarithmic mean
-  # values.
-  u_prim = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs
-
-  @turbo for j in eachnode(dg), i in eachnode(dg)
-    rho    = u_cons[1, i, j, element]
-    rho_v1 = u_cons[2, i, j, element]
-    rho_v2 = u_cons[3, i, j, element]
-    rho_e  = u_cons[4, i, j, element]
-
-    v1 = rho_v1 / rho
-    v2 = rho_v2 / rho
-    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
-
-    u_prim[i, j, 1] = rho
-    u_prim[i, j, 2] = v1
-    u_prim[i, j, 3] = v2
-    u_prim[i, j, 4] = p
-    u_prim[i, j, 5] = log(rho)
-    u_prim[i, j, 6] = log(p)
-  end
-
-
-  # x direction
-  # At first, we create new temporary arrays with permuted memory layout to
-  # allow using SIMD instructions along the first dimension (which is contiguous
-  # in memory).
-  du_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations))))
-
-  u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations) + 2)))
-
-  @turbo for v in indices(u_prim, 3), # v in eachvariable(equations) misses +2 logs
-             j in eachnode(dg),
-             i in eachnode(dg)
-    u_prim_permuted[j, i, v] = u_prim[i, j, v]
-  end
-  fill!(du_permuted, zero(eltype(du_permuted)))
-
-  # Next, we basically inline the volume flux. To allow SIMD vectorization and
-  # still use the symmetry of the volume flux and the derivative matrix, we
-  # loop over the triangular part in an outer loop and use a plain inner loop.
-  for i in eachnode(dg), ii in (i+1):nnodes(dg)
-    @turbo for j in eachnode(dg)
-      rho_ll     = u_prim_permuted[j, i, 1]
-      v1_ll      = u_prim_permuted[j, i, 2]
-      v2_ll      = u_prim_permuted[j, i, 3]
-      p_ll       = u_prim_permuted[j, i, 4]
-      log_rho_ll = u_prim_permuted[j, i, 5]
-      log_p_ll   = u_prim_permuted[j, i, 6]
-
-      rho_rr     = u_prim_permuted[j, ii, 1]
-      v1_rr      = u_prim_permuted[j, ii, 2]
-      v2_rr      = u_prim_permuted[j, ii, 3]
-      p_rr       = u_prim_permuted[j, ii, 4]
-      log_rho_rr = u_prim_permuted[j, ii, 5]
-      log_p_rr   = u_prim_permuted[j, ii, 6]
-
-      # Compute required mean values
-      # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
-      # it efficiently. This is equivalent to
-      #   rho_mean = ln_mean(rho_ll, rho_rr)
-      x1 = rho_ll
-      log_x1 = log_rho_ll
-      y1 = rho_rr
-      log_y1 = log_rho_rr
-      x1_plus_y1 = x1 + y1
-      y1_minus_x1 = y1 - x1
-      z1 = y1_minus_x1^2 / x1_plus_y1^2
-      special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1)))
-      regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
-      rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
-
-      # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-      # in exact arithmetic since
-      #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-      #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-      # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-      x2 = rho_ll * p_rr
-      log_x2 = log_rho_ll + log_p_rr
-      y2 = rho_rr * p_ll
-      log_y2 = log_rho_rr + log_p_ll
-      x2_plus_y2 = x2 + y2
-      y2_minus_x2 = y2 - x2
-      z2 = y2_minus_x2^2 / x2_plus_y2^2
-      special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2
-      regular_path2 = (log_y2 - log_x2) / y2_minus_x2
-      inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
-
-      v1_avg = 0.5 * (v1_ll + v1_rr)
-      v2_avg = 0.5 * (v2_ll + v2_rr)
-      p_avg  = 0.5 * ( p_ll +  p_rr)
-      velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
-
-      # Calculate fluxes depending on Cartesian orientation
-      f1 = rho_mean * v1_avg
-      f2 = f1 * v1_avg + p_avg
-      f3 = f1 * v2_avg
-      f4 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll)
-
-      # Add scaled fluxes to RHS
-      factor_i = alpha * derivative_split[i, ii]
-      du_permuted[j, i, 1] += factor_i * f1
-      du_permuted[j, i, 2] += factor_i * f2
-      du_permuted[j, i, 3] += factor_i * f3
-      du_permuted[j, i, 4] += factor_i * f4
-
-      factor_ii = alpha * derivative_split[ii, i]
-      du_permuted[j, ii, 1] += factor_ii * f1
-      du_permuted[j, ii, 2] += factor_ii * f2
-      du_permuted[j, ii, 3] += factor_ii * f3
-      du_permuted[j, ii, 4] += factor_ii * f4
+    @unpack derivative_split = dg.basis
+
+    # Create a temporary array that will be used to store the RHS with permuted
+    # indices `[i, j, v]` to allow using SIMD instructions.
+    # `StrideArray`s with purely static dimensions do not allocate on the heap.
+    du = StrideArray{eltype(u_cons)}(undef,
+                                     (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
+                                      StaticInt(nvariables(equations))))
+
+    # Convert conserved to primitive variables on the given `element`. In addition
+    # to the usual primitive variables, we also compute logarithms of the density
+    # and pressure to increase the performance of the required logarithmic mean
+    # values.
+    u_prim = StrideArray{eltype(u_cons)}(undef,
+                                         (ntuple(_ -> StaticInt(nnodes(dg)),
+                                                 ndims(mesh))...,
+                                          StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs
+
+    @turbo for j in eachnode(dg), i in eachnode(dg)
+        rho = u_cons[1, i, j, element]
+        rho_v1 = u_cons[2, i, j, element]
+        rho_v2 = u_cons[3, i, j, element]
+        rho_e = u_cons[4, i, j, element]
+
+        v1 = rho_v1 / rho
+        v2 = rho_v2 / rho
+        p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2))
+
+        u_prim[i, j, 1] = rho
+        u_prim[i, j, 2] = v1
+        u_prim[i, j, 3] = v2
+        u_prim[i, j, 4] = p
+        u_prim[i, j, 5] = log(rho)
+        u_prim[i, j, 6] = log(p)
+    end
+
+    # x direction
+    # At first, we create new temporary arrays with permuted memory layout to
+    # allow using SIMD instructions along the first dimension (which is contiguous
+    # in memory).
+    du_permuted = StrideArray{eltype(u_cons)}(undef,
+                                              (StaticInt(nnodes(dg)), StaticInt(nnodes(dg)),
+                                               StaticInt(nvariables(equations))))
+
+    u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
+                                                  (StaticInt(nnodes(dg)),
+                                                   StaticInt(nnodes(dg)),
+                                                   StaticInt(nvariables(equations) + 2)))
+
+    @turbo for v in indices(u_prim, 3), # v in eachvariable(equations) misses +2 logs
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        u_prim_permuted[j, i, v] = u_prim[i, j, v]
+    end
+    fill!(du_permuted, zero(eltype(du_permuted)))
+
+    # Next, we basically inline the volume flux. To allow SIMD vectorization and
+    # still use the symmetry of the volume flux and the derivative matrix, we
+    # loop over the triangular part in an outer loop and use a plain inner loop.
+    for i in eachnode(dg), ii in (i + 1):nnodes(dg)
+        @turbo for j in eachnode(dg)
+            rho_ll = u_prim_permuted[j, i, 1]
+            v1_ll = u_prim_permuted[j, i, 2]
+            v2_ll = u_prim_permuted[j, i, 3]
+            p_ll = u_prim_permuted[j, i, 4]
+            log_rho_ll = u_prim_permuted[j, i, 5]
+            log_p_ll = u_prim_permuted[j, i, 6]
+
+            rho_rr = u_prim_permuted[j, ii, 1]
+            v1_rr = u_prim_permuted[j, ii, 2]
+            v2_rr = u_prim_permuted[j, ii, 3]
+            p_rr = u_prim_permuted[j, ii, 4]
+            log_rho_rr = u_prim_permuted[j, ii, 5]
+            log_p_rr = u_prim_permuted[j, ii, 6]
+
+            # Compute required mean values
+            # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
+            # it efficiently. This is equivalent to
+            #   rho_mean = ln_mean(rho_ll, rho_rr)
+            x1 = rho_ll
+            log_x1 = log_rho_ll
+            y1 = rho_rr
+            log_y1 = log_rho_rr
+            x1_plus_y1 = x1 + y1
+            y1_minus_x1 = y1 - x1
+            z1 = y1_minus_x1^2 / x1_plus_y1^2
+            special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1)))
+            regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
+            rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
+
+            # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+            # in exact arithmetic since
+            #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+            #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+            # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+            x2 = rho_ll * p_rr
+            log_x2 = log_rho_ll + log_p_rr
+            y2 = rho_rr * p_ll
+            log_y2 = log_rho_rr + log_p_ll
+            x2_plus_y2 = x2 + y2
+            y2_minus_x2 = y2 - x2
+            z2 = y2_minus_x2^2 / x2_plus_y2^2
+            special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2
+            regular_path2 = (log_y2 - log_x2) / y2_minus_x2
+            inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
+
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
+
+            # Calculate fluxes depending on Cartesian orientation
+            f1 = rho_mean * v1_avg
+            f2 = f1 * v1_avg + p_avg
+            f3 = f1 * v2_avg
+            f4 = f1 *
+                 (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) +
+                 0.5 * (p_ll * v1_rr + p_rr * v1_ll)
+
+            # Add scaled fluxes to RHS
+            factor_i = alpha * derivative_split[i, ii]
+            du_permuted[j, i, 1] += factor_i * f1
+            du_permuted[j, i, 2] += factor_i * f2
+            du_permuted[j, i, 3] += factor_i * f3
+            du_permuted[j, i, 4] += factor_i * f4
+
+            factor_ii = alpha * derivative_split[ii, i]
+            du_permuted[j, ii, 1] += factor_ii * f1
+            du_permuted[j, ii, 2] += factor_ii * f2
+            du_permuted[j, ii, 3] += factor_ii * f3
+            du_permuted[j, ii, 4] += factor_ii * f4
+        end
+    end
+
+    @turbo for v in eachvariable(equations),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        du[i, j, v] = du_permuted[j, i, v]
     end
-  end
-
-  @turbo for v in eachvariable(equations),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    du[i, j, v] = du_permuted[j, i, v]
-  end
-
-
-  # y direction
-  # The memory layout is already optimal for SIMD vectorization in this loop.
-  for j in eachnode(dg), jj in (j+1):nnodes(dg)
-    @turbo for i in eachnode(dg)
-      rho_ll     = u_prim[i, j, 1]
-      v1_ll      = u_prim[i, j, 2]
-      v2_ll      = u_prim[i, j, 3]
-      p_ll       = u_prim[i, j, 4]
-      log_rho_ll = u_prim[i, j, 5]
-      log_p_ll   = u_prim[i, j, 6]
-
-      rho_rr     = u_prim[i, jj, 1]
-      v1_rr      = u_prim[i, jj, 2]
-      v2_rr      = u_prim[i, jj, 3]
-      p_rr       = u_prim[i, jj, 4]
-      log_rho_rr = u_prim[i, jj, 5]
-      log_p_rr   = u_prim[i, jj, 6]
-
-      # Compute required mean values
-      # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
-      # it efficiently. This is equivalent to
-      #   rho_mean = ln_mean(rho_ll, rho_rr)
-      x1 = rho_ll
-      log_x1 = log_rho_ll
-      y1 = rho_rr
-      log_y1 = log_rho_rr
-      x1_plus_y1 = x1 + y1
-      y1_minus_x1 = y1 - x1
-      z1 = y1_minus_x1^2 / x1_plus_y1^2
-      special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1)))
-      regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
-      rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
-
-      # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-      # in exact arithmetic since
-      #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-      #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-      # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-      x2 = rho_ll * p_rr
-      log_x2 = log_rho_ll + log_p_rr
-      y2 = rho_rr * p_ll
-      log_y2 = log_rho_rr + log_p_ll
-      x2_plus_y2 = x2 + y2
-      y2_minus_x2 = y2 - x2
-      z2 = y2_minus_x2^2 / x2_plus_y2^2
-      special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2
-      regular_path2 = (log_y2 - log_x2) / y2_minus_x2
-      inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
-
-      v1_avg = 0.5 * (v1_ll + v1_rr)
-      v2_avg = 0.5 * (v2_ll + v2_rr)
-      p_avg  = 0.5 * ( p_ll +  p_rr)
-      velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
-
-      # Calculate fluxes depending on Cartesian orientation
-      f1 = rho_mean * v2_avg
-      f2 = f1 * v1_avg
-      f3 = f1 * v2_avg + p_avg
-      f4 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v2_rr + p_rr*v2_ll)
-
-      # Add scaled fluxes to RHS
-      factor_j = alpha * derivative_split[j, jj]
-      du[i, j, 1] += factor_j * f1
-      du[i, j, 2] += factor_j * f2
-      du[i, j, 3] += factor_j * f3
-      du[i, j, 4] += factor_j * f4
-
-      factor_jj = alpha * derivative_split[jj, j]
-      du[i, jj, 1] += factor_jj * f1
-      du[i, jj, 2] += factor_jj * f2
-      du[i, jj, 3] += factor_jj * f3
-      du[i, jj, 4] += factor_jj * f4
+
+    # y direction
+    # The memory layout is already optimal for SIMD vectorization in this loop.
+    for j in eachnode(dg), jj in (j + 1):nnodes(dg)
+        @turbo for i in eachnode(dg)
+            rho_ll = u_prim[i, j, 1]
+            v1_ll = u_prim[i, j, 2]
+            v2_ll = u_prim[i, j, 3]
+            p_ll = u_prim[i, j, 4]
+            log_rho_ll = u_prim[i, j, 5]
+            log_p_ll = u_prim[i, j, 6]
+
+            rho_rr = u_prim[i, jj, 1]
+            v1_rr = u_prim[i, jj, 2]
+            v2_rr = u_prim[i, jj, 3]
+            p_rr = u_prim[i, jj, 4]
+            log_rho_rr = u_prim[i, jj, 5]
+            log_p_rr = u_prim[i, jj, 6]
+
+            # Compute required mean values
+            # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
+            # it efficiently. This is equivalent to
+            #   rho_mean = ln_mean(rho_ll, rho_rr)
+            x1 = rho_ll
+            log_x1 = log_rho_ll
+            y1 = rho_rr
+            log_y1 = log_rho_rr
+            x1_plus_y1 = x1 + y1
+            y1_minus_x1 = y1 - x1
+            z1 = y1_minus_x1^2 / x1_plus_y1^2
+            special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1)))
+            regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
+            rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
+
+            # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+            # in exact arithmetic since
+            #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+            #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+            # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+            x2 = rho_ll * p_rr
+            log_x2 = log_rho_ll + log_p_rr
+            y2 = rho_rr * p_ll
+            log_y2 = log_rho_rr + log_p_ll
+            x2_plus_y2 = x2 + y2
+            y2_minus_x2 = y2 - x2
+            z2 = y2_minus_x2^2 / x2_plus_y2^2
+            special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2
+            regular_path2 = (log_y2 - log_x2) / y2_minus_x2
+            inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
+
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr)
+
+            # Calculate fluxes depending on Cartesian orientation
+            f1 = rho_mean * v2_avg
+            f2 = f1 * v1_avg
+            f3 = f1 * v2_avg + p_avg
+            f4 = f1 *
+                 (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) +
+                 0.5 * (p_ll * v2_rr + p_rr * v2_ll)
+
+            # Add scaled fluxes to RHS
+            factor_j = alpha * derivative_split[j, jj]
+            du[i, j, 1] += factor_j * f1
+            du[i, j, 2] += factor_j * f2
+            du[i, j, 3] += factor_j * f3
+            du[i, j, 4] += factor_j * f4
+
+            factor_jj = alpha * derivative_split[jj, j]
+            du[i, jj, 1] += factor_jj * f1
+            du[i, jj, 2] += factor_jj * f2
+            du[i, jj, 3] += factor_jj * f3
+            du[i, jj, 4] += factor_jj * f4
+        end
     end
-  end
 
+    # Finally, we add the temporary RHS computed here to the global RHS in the
+    # given `element`.
+    @turbo for v in eachvariable(equations),
+               j in eachnode(dg),
+               i in eachnode(dg)
 
-  # Finally, we add the temporary RHS computed here to the global RHS in the
-  # given `element`.
-  @turbo for v in eachvariable(equations),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    _du[v, i, j, element] += du[i, j, v]
-  end
+        _du[v, i, j, element] += du[i, j, v]
+    end
 end
diff --git a/src/solvers/dgsem_tree/dg_2d_parabolic.jl b/src/solvers/dgsem_tree/dg_2d_parabolic.jl
index 07146c8d79e..c5862579992 100644
--- a/src/solvers/dgsem_tree/dg_2d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_2d_parabolic.jl
@@ -3,6 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 # This file collects all methods that have been updated to work with parabolic systems of equations
 #
@@ -12,75 +13,93 @@
 #               2. compute f(u, grad(u))
 #               3. compute div(f(u, grad(u))) (i.e., the "regular" rhs! call)
 # boundary conditions will be applied to both grad(u) and div(f(u, grad(u))).
-function rhs_parabolic!(du, u, t, mesh::Union{TreeMesh{2}, P4estMesh{2}}, 
+function rhs_parabolic!(du, u, t, mesh::Union{TreeMesh{2}, P4estMesh{2}},
                         equations_parabolic::AbstractEquationsParabolic,
                         initial_condition, boundary_conditions_parabolic, source_terms,
                         dg::DG, parabolic_scheme, cache, cache_parabolic)
-  (; u_transformed, gradients, flux_viscous) = cache_parabolic
-
-  # Convert conservative variables to a form more suitable for viscous flux calculations
-  @trixi_timeit timer() "transform variables" transform_variables!(
-    u_transformed, u, mesh, equations_parabolic, dg, parabolic_scheme, cache, cache_parabolic)
-
-  # Compute the gradients of the transformed variables
-  @trixi_timeit timer() "calculate gradient" calc_gradient!(
-    gradients, u_transformed, t, mesh, equations_parabolic, boundary_conditions_parabolic, dg,
-    cache, cache_parabolic)
-
-  # Compute and store the viscous fluxes
-  @trixi_timeit timer() "calculate viscous fluxes" calc_viscous_fluxes!(
-    flux_viscous, gradients, u_transformed, mesh, equations_parabolic, dg, cache, cache_parabolic)
-
-  # The remainder of this function is essentially a regular rhs! for parabolic equations (i.e., it
-  # computes the divergence of the viscous fluxes)
-  #
-  # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have
-  # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the
-  # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the
-  # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it
-  # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values*
-  # and *not the solution*.  The advantage is that a) we do not need to allocate more storage, b) we
-  # do not need to recreate the existing data structure only with a different name, and c) we do not
-  # need to interpolate solutions *and* gradients to the surfaces.
-
-  # TODO: parabolic; reconsider current data structure reuse strategy
-
-  # Reset du
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, flux_viscous, mesh, equations_parabolic, dg, cache)
-
-  # Prolong solution to interfaces
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache_parabolic, flux_viscous, mesh, equations_parabolic, dg.surface_integral, dg, cache)
-
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache_parabolic.elements.surface_flux_values, mesh, equations_parabolic, dg, cache_parabolic)
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache_parabolic, flux_viscous, mesh, equations_parabolic, dg.surface_integral, dg, cache)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux_divergence!(
-    cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic,
-    dg.surface_integral, dg)
-
-  # TODO: parabolic; extend to mortars
-  @assert nmortars(dg, cache) == 0
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic)
-
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian_parabolic!(
-    du, mesh, equations_parabolic, dg, cache_parabolic)
-
-  return nothing
+    (; u_transformed, gradients, flux_viscous) = cache_parabolic
+
+    # Convert conservative variables to a form more suitable for viscous flux calculations
+    @trixi_timeit timer() "transform variables" begin
+        transform_variables!(u_transformed, u, mesh, equations_parabolic,
+                             dg, parabolic_scheme, cache, cache_parabolic)
+    end
+
+    # Compute the gradients of the transformed variables
+    @trixi_timeit timer() "calculate gradient" begin
+        calc_gradient!(gradients, u_transformed, t, mesh, equations_parabolic,
+                       boundary_conditions_parabolic, dg, cache, cache_parabolic)
+    end
+
+    # Compute and store the viscous fluxes
+    @trixi_timeit timer() "calculate viscous fluxes" begin
+        calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh,
+                             equations_parabolic, dg, cache, cache_parabolic)
+    end
+
+    # The remainder of this function is essentially a regular rhs! for parabolic
+    # equations (i.e., it computes the divergence of the viscous fluxes)
+    #
+    # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have
+    # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the
+    # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the
+    # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it
+    # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values*
+    # and *not the solution*.  The advantage is that a) we do not need to allocate more storage, b) we
+    # do not need to recreate the existing data structure only with a different name, and c) we do not
+    # need to interpolate solutions *and* gradients to the surfaces.
+
+    # TODO: parabolic; reconsider current data structure reuse strategy
+
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, flux_viscous, mesh, equations_parabolic, dg, cache)
+    end
+
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache_parabolic, flux_viscous, mesh, equations_parabolic,
+                            dg.surface_integral, dg, cache)
+    end
+
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache_parabolic.elements.surface_flux_values, mesh,
+                             equations_parabolic, dg, cache_parabolic)
+    end
+
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache_parabolic, flux_viscous, mesh, equations_parabolic,
+                            dg.surface_integral, dg, cache)
+    end
+
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux_divergence!(cache_parabolic, t,
+                                       boundary_conditions_parabolic, mesh,
+                                       equations_parabolic,
+                                       dg.surface_integral, dg)
+    end
+
+    # TODO: parabolic; extend to mortars
+    @assert nmortars(dg, cache) == 0
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations_parabolic,
+                               dg.surface_integral, dg, cache_parabolic)
+    end
+
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" begin
+        apply_jacobian_parabolic!(du, mesh, equations_parabolic, dg, cache_parabolic)
+    end
+
+    return nothing
 end
 
 # Transform solution variables prior to taking the gradient
@@ -89,519 +108,598 @@ end
 function transform_variables!(u_transformed, u, mesh::Union{TreeMesh{2}, P4estMesh{2}},
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, parabolic_scheme, cache, cache_parabolic)
-  @threaded for element in eachelement(dg, cache)
-    # Calculate volume terms in one element
-    for j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations_parabolic, dg, i, j, element)
-      u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node, equations_parabolic)
-      set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg, i, j, element)
+    @threaded for element in eachelement(dg, cache)
+        # Calculate volume terms in one element
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations_parabolic, dg, i, j, element)
+            u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node,
+                                                                                       equations_parabolic)
+            set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg,
+                           i, j, element)
+        end
     end
-  end
 end
 
 # This is the version used when calculating the divergence of the viscous fluxes
 function calc_volume_integral!(du, flux_viscous,
-                               mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+                               mesh::TreeMesh{2},
+                               equations_parabolic::AbstractEquationsParabolic,
                                dg::DGSEM, cache)
-  @unpack derivative_dhat = dg.basis
-  flux_viscous_x, flux_viscous_y = flux_viscous
-
-  @threaded for element in eachelement(dg, cache)
-    # Calculate volume terms in one element
-    for j in eachnode(dg), i in eachnode(dg)
-      flux_1_node = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j, element)
-      flux_2_node = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j, element)
-
-      for ii in eachnode(dg)
-        multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node, equations_parabolic, dg, ii, j, element)
-      end
-
-      for jj in eachnode(dg)
-        multiply_add_to_node_vars!(du, derivative_dhat[jj, j], flux_2_node, equations_parabolic, dg, i, jj, element)
-      end
+    @unpack derivative_dhat = dg.basis
+    flux_viscous_x, flux_viscous_y = flux_viscous
+
+    @threaded for element in eachelement(dg, cache)
+        # Calculate volume terms in one element
+        for j in eachnode(dg), i in eachnode(dg)
+            flux_1_node = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j,
+                                        element)
+            flux_2_node = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j,
+                                        element)
+
+            for ii in eachnode(dg)
+                multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node,
+                                           equations_parabolic, dg, ii, j, element)
+            end
+
+            for jj in eachnode(dg)
+                multiply_add_to_node_vars!(du, derivative_dhat[jj, j], flux_2_node,
+                                           equations_parabolic, dg, i, jj, element)
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # This is the version used when calculating the divergence of the viscous fluxes
 # We pass the `surface_integral` argument solely for dispatch
 function prolong2interfaces!(cache_parabolic, flux_viscous,
-                             mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+                             mesh::TreeMesh{2},
+                             equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
-  @unpack interfaces = cache_parabolic
-  @unpack orientations = interfaces
-
-  flux_viscous_x, flux_viscous_y = flux_viscous
-
-  @threaded for interface in eachinterface(dg, cache)
-    left_element  = interfaces.neighbor_ids[1, interface]
-    right_element = interfaces.neighbor_ids[2, interface]
-
-    if orientations[interface] == 1
-      # interface in x-direction
-      for j in eachnode(dg), v in eachvariable(equations_parabolic)
-        # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-        interfaces.u[1, v, j, interface] = flux_viscous_x[v, nnodes(dg), j, left_element]
-        interfaces.u[2, v, j, interface] = flux_viscous_x[v,          1, j, right_element]
-      end
-    else # if orientations[interface] == 2
-      # interface in y-direction
-      for i in eachnode(dg), v in eachvariable(equations_parabolic)
-        # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-        interfaces.u[1, v, i, interface] = flux_viscous_y[v, i, nnodes(dg), left_element]
-        interfaces.u[2, v, i, interface] = flux_viscous_y[v, i,          1, right_element]
-      end
+    @unpack interfaces = cache_parabolic
+    @unpack orientations = interfaces
+
+    flux_viscous_x, flux_viscous_y = flux_viscous
+
+    @threaded for interface in eachinterface(dg, cache)
+        left_element = interfaces.neighbor_ids[1, interface]
+        right_element = interfaces.neighbor_ids[2, interface]
+
+        if orientations[interface] == 1
+            # interface in x-direction
+            for j in eachnode(dg), v in eachvariable(equations_parabolic)
+                # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
+                interfaces.u[1, v, j, interface] = flux_viscous_x[v, nnodes(dg), j,
+                                                                  left_element]
+                interfaces.u[2, v, j, interface] = flux_viscous_x[v, 1, j,
+                                                                  right_element]
+            end
+        else # if orientations[interface] == 2
+            # interface in y-direction
+            for i in eachnode(dg), v in eachvariable(equations_parabolic)
+                # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
+                interfaces.u[1, v, i, interface] = flux_viscous_y[v, i, nnodes(dg),
+                                                                  left_element]
+                interfaces.u[2, v, i, interface] = flux_viscous_y[v, i, 1,
+                                                                  right_element]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # This is the version used when calculating the divergence of the viscous fluxes
 function calc_interface_flux!(surface_flux_values,
                               mesh::TreeMesh{2}, equations_parabolic,
                               dg::DG, cache_parabolic)
-  @unpack neighbor_ids, orientations = cache_parabolic.interfaces
-
-  @threaded for interface in eachinterface(dg, cache_parabolic)
-    # Get neighboring elements
-    left_id  = neighbor_ids[1, interface]
-    right_id = neighbor_ids[2, interface]
-
-    # Determine interface direction with respect to elements:
-    # orientation = 1: left -> 2, right -> 1
-    # orientation = 2: left -> 4, right -> 3
-    left_direction  = 2 * orientations[interface]
-    right_direction = 2 * orientations[interface] - 1
-
-    for i in eachnode(dg)
-      # Get precomputed fluxes at interfaces
-      flux_ll, flux_rr = get_surface_node_vars(cache_parabolic.interfaces.u, equations_parabolic,
-                                               dg, i, interface)
-
-      # Compute interface flux as mean of left and right viscous fluxes
-      # TODO: parabolic; only BR1 at the moment
-      flux = 0.5 * (flux_ll + flux_rr)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations_parabolic)
-        surface_flux_values[v, i, left_direction,  left_id]  = flux[v]
-        surface_flux_values[v, i, right_direction, right_id] = flux[v]
-      end
+    @unpack neighbor_ids, orientations = cache_parabolic.interfaces
+
+    @threaded for interface in eachinterface(dg, cache_parabolic)
+        # Get neighboring elements
+        left_id = neighbor_ids[1, interface]
+        right_id = neighbor_ids[2, interface]
+
+        # Determine interface direction with respect to elements:
+        # orientation = 1: left -> 2, right -> 1
+        # orientation = 2: left -> 4, right -> 3
+        left_direction = 2 * orientations[interface]
+        right_direction = 2 * orientations[interface] - 1
+
+        for i in eachnode(dg)
+            # Get precomputed fluxes at interfaces
+            flux_ll, flux_rr = get_surface_node_vars(cache_parabolic.interfaces.u,
+                                                     equations_parabolic,
+                                                     dg, i, interface)
+
+            # Compute interface flux as mean of left and right viscous fluxes
+            # TODO: parabolic; only BR1 at the moment
+            flux = 0.5 * (flux_ll + flux_rr)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations_parabolic)
+                surface_flux_values[v, i, left_direction, left_id] = flux[v]
+                surface_flux_values[v, i, right_direction, right_id] = flux[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # This is the version used when calculating the divergence of the viscous fluxes
 function prolong2boundaries!(cache_parabolic, flux_viscous,
-                             mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+                             mesh::TreeMesh{2},
+                             equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
-  @unpack boundaries = cache_parabolic
-  @unpack orientations, neighbor_sides = boundaries
-  flux_viscous_x, flux_viscous_y = flux_viscous
-
-  @threaded for boundary in eachboundary(dg, cache_parabolic)
-    element = boundaries.neighbor_ids[boundary]
-
-    if orientations[boundary] == 1
-      # boundary in x-direction
-      if neighbor_sides[boundary] == 1
-        # element in -x direction of boundary
-        for l in eachnode(dg), v in eachvariable(equations_parabolic)
-          # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-          boundaries.u[1, v, l, boundary] = flux_viscous_x[v, nnodes(dg), l, element]
-        end
-      else # Element in +x direction of boundary
-        for l in eachnode(dg), v in eachvariable(equations_parabolic)
-          # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-          boundaries.u[2, v, l, boundary] = flux_viscous_x[v, 1,          l, element]
+    @unpack boundaries = cache_parabolic
+    @unpack orientations, neighbor_sides = boundaries
+    flux_viscous_x, flux_viscous_y = flux_viscous
+
+    @threaded for boundary in eachboundary(dg, cache_parabolic)
+        element = boundaries.neighbor_ids[boundary]
+
+        if orientations[boundary] == 1
+            # boundary in x-direction
+            if neighbor_sides[boundary] == 1
+                # element in -x direction of boundary
+                for l in eachnode(dg), v in eachvariable(equations_parabolic)
+                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries.u[1, v, l, boundary] = flux_viscous_x[v, nnodes(dg), l,
+                                                                     element]
+                end
+            else # Element in +x direction of boundary
+                for l in eachnode(dg), v in eachvariable(equations_parabolic)
+                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries.u[2, v, l, boundary] = flux_viscous_x[v, 1, l, element]
+                end
+            end
+        else # if orientations[boundary] == 2
+            # boundary in y-direction
+            if neighbor_sides[boundary] == 1
+                # element in -y direction of boundary
+                for l in eachnode(dg), v in eachvariable(equations_parabolic)
+                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries.u[1, v, l, boundary] = flux_viscous_y[v, l, nnodes(dg),
+                                                                     element]
+                end
+            else
+                # element in +y direction of boundary
+                for l in eachnode(dg), v in eachvariable(equations_parabolic)
+                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries.u[2, v, l, boundary] = flux_viscous_y[v, l, 1, element]
+                end
+            end
         end
-      end
-    else # if orientations[boundary] == 2
-      # boundary in y-direction
-      if neighbor_sides[boundary] == 1
-        # element in -y direction of boundary
-        for l in eachnode(dg), v in eachvariable(equations_parabolic)
-          # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-          boundaries.u[1, v, l, boundary] = flux_viscous_y[v, l, nnodes(dg), element]
-        end
-      else
-        # element in +y direction of boundary
-        for l in eachnode(dg), v in eachvariable(equations_parabolic)
-          # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-          boundaries.u[2, v, l, boundary] = flux_viscous_y[v, l, 1,          element]
-        end
-      end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
-function calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, 
-                              mesh::Union{TreeMesh{2}, P4estMesh{2}}, 
+function calc_viscous_fluxes!(flux_viscous, gradients, u_transformed,
+                              mesh::Union{TreeMesh{2}, P4estMesh{2}},
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, cache, cache_parabolic)
-  gradients_x, gradients_y = gradients
-  flux_viscous_x, flux_viscous_y = flux_viscous # output arrays
-
-  @threaded for element in eachelement(dg, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      # Get solution and gradients
-      u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, element)
-      gradients_1_node = get_node_vars(gradients_x, equations_parabolic, dg, i, j, element)
-      gradients_2_node = get_node_vars(gradients_y, equations_parabolic, dg, i, j, element)
-
-      # Calculate viscous flux and store each component for later use
-      flux_viscous_node_x = flux(u_node, (gradients_1_node, gradients_2_node), 1, equations_parabolic)
-      flux_viscous_node_y = flux(u_node, (gradients_1_node, gradients_2_node), 2, equations_parabolic)
-      set_node_vars!(flux_viscous_x, flux_viscous_node_x, equations_parabolic, dg, i, j, element)
-      set_node_vars!(flux_viscous_y, flux_viscous_node_y, equations_parabolic, dg, i, j, element)
+    gradients_x, gradients_y = gradients
+    flux_viscous_x, flux_viscous_y = flux_viscous # output arrays
+
+    @threaded for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            # Get solution and gradients
+            u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j,
+                                   element)
+            gradients_1_node = get_node_vars(gradients_x, equations_parabolic, dg, i, j,
+                                             element)
+            gradients_2_node = get_node_vars(gradients_y, equations_parabolic, dg, i, j,
+                                             element)
+
+            # Calculate viscous flux and store each component for later use
+            flux_viscous_node_x = flux(u_node, (gradients_1_node, gradients_2_node), 1,
+                                       equations_parabolic)
+            flux_viscous_node_y = flux(u_node, (gradients_1_node, gradients_2_node), 2,
+                                       equations_parabolic)
+            set_node_vars!(flux_viscous_x, flux_viscous_node_x, equations_parabolic, dg,
+                           i, j, element)
+            set_node_vars!(flux_viscous_y, flux_viscous_node_y, equations_parabolic, dg,
+                           i, j, element)
+        end
     end
-  end
 end
 
-
 # TODO: parabolic; decide if we should keep this, and if so, extend to 3D.
 function get_unsigned_normal_vector_2d(direction)
-  if direction > 4 || direction < 1
-    error("Direction = $direction; in 2D, direction should be 1, 2, 3, or 4.")
-  end
-  if direction == 1 || direction == 2
-    return SVector(1.0, 0.0)
-  else
-    return SVector(0.0, 1.0)
-  end
+    if direction > 4 || direction < 1
+        error("Direction = $direction; in 2D, direction should be 1, 2, 3, or 4.")
+    end
+    if direction == 1 || direction == 2
+        return SVector(1.0, 0.0)
+    else
+        return SVector(0.0, 1.0)
+    end
 end
 
-function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic,
-                                      mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations_parabolic::AbstractEquationsParabolic,
-                                      surface_integral, dg::DG)
-  return nothing
+function calc_boundary_flux_gradients!(cache, t,
+                                       boundary_conditions_parabolic::BoundaryConditionPeriodic,
+                                       mesh::Union{TreeMesh{2}, P4estMesh{2}},
+                                       equations_parabolic::AbstractEquationsParabolic,
+                                       surface_integral, dg::DG)
+    return nothing
 end
 
-function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic,
-                                        mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations_parabolic::AbstractEquationsParabolic,
+function calc_boundary_flux_divergence!(cache, t,
+                                        boundary_conditions_parabolic::BoundaryConditionPeriodic,
+                                        mesh::Union{TreeMesh{2}, P4estMesh{2}},
+                                        equations_parabolic::AbstractEquationsParabolic,
                                         surface_integral, dg::DG)
-  return nothing
+    return nothing
 end
 
-function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::NamedTuple,
-                                       mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+function calc_boundary_flux_gradients!(cache, t,
+                                       boundary_conditions_parabolic::NamedTuple,
+                                       mesh::TreeMesh{2},
+                                       equations_parabolic::AbstractEquationsParabolic,
                                        surface_integral, dg::DG)
-  @unpack surface_flux_values = cache.elements
-  @unpack n_boundaries_per_direction = cache.boundaries
-
-  # Calculate indices
-  lasts = accumulate(+, n_boundaries_per_direction)
-  firsts = lasts - n_boundaries_per_direction .+ 1
-
-  # Calc boundary fluxes in each direction
-  calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[1],
-                                            equations_parabolic, surface_integral, dg, cache,
-                                            1, firsts[1], lasts[1])
-  calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[2],
-                                            equations_parabolic, surface_integral, dg, cache,
-                                            2, firsts[2], lasts[2])
-  calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[3],
-                                            equations_parabolic, surface_integral, dg, cache,
-                                            3, firsts[3], lasts[3])
-  calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[4],
-                                            equations_parabolic, surface_integral, dg, cache,
-                                            4, firsts[4], lasts[4])
+    @unpack surface_flux_values = cache.elements
+    @unpack n_boundaries_per_direction = cache.boundaries
+
+    # Calculate indices
+    lasts = accumulate(+, n_boundaries_per_direction)
+    firsts = lasts - n_boundaries_per_direction .+ 1
+
+    # Calc boundary fluxes in each direction
+    calc_boundary_flux_by_direction_gradient!(surface_flux_values, t,
+                                              boundary_conditions_parabolic[1],
+                                              equations_parabolic, surface_integral, dg,
+                                              cache,
+                                              1, firsts[1], lasts[1])
+    calc_boundary_flux_by_direction_gradient!(surface_flux_values, t,
+                                              boundary_conditions_parabolic[2],
+                                              equations_parabolic, surface_integral, dg,
+                                              cache,
+                                              2, firsts[2], lasts[2])
+    calc_boundary_flux_by_direction_gradient!(surface_flux_values, t,
+                                              boundary_conditions_parabolic[3],
+                                              equations_parabolic, surface_integral, dg,
+                                              cache,
+                                              3, firsts[3], lasts[3])
+    calc_boundary_flux_by_direction_gradient!(surface_flux_values, t,
+                                              boundary_conditions_parabolic[4],
+                                              equations_parabolic, surface_integral, dg,
+                                              cache,
+                                              4, firsts[4], lasts[4])
 end
-function calc_boundary_flux_by_direction_gradient!(surface_flux_values::AbstractArray{<:Any,4}, t,
+function calc_boundary_flux_by_direction_gradient!(surface_flux_values::AbstractArray{
+                                                                                      <:Any,
+                                                                                      4
+                                                                                      },
+                                                   t,
                                                    boundary_condition,
                                                    equations_parabolic::AbstractEquationsParabolic,
                                                    surface_integral, dg::DG, cache,
-                                                   direction, first_boundary, last_boundary)
-  @unpack surface_flux = surface_integral
-  @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
-
-  @threaded for boundary in first_boundary:last_boundary
-    # Get neighboring element
-    neighbor = neighbor_ids[boundary]
-
-    for i in eachnode(dg)
-      # Get boundary flux
-      u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, i, boundary)
-      if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
-        u_inner = u_ll
-      else # Element is on the right, boundary on the left
-        u_inner = u_rr
-      end
-
-      # TODO: revisit if we want more general boundary treatments.
-      # This assumes the gradient numerical flux at the boundary is the gradient variable,
-      # which is consistent with BR1, LDG.
-      flux_inner = u_inner
-
-      x = get_node_coords(node_coordinates, equations_parabolic, dg, i, boundary)
-      flux = boundary_condition(flux_inner, u_inner, get_unsigned_normal_vector_2d(direction),
-                                x, t, Gradient(), equations_parabolic)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations_parabolic)
-        surface_flux_values[v, i, direction, neighbor] = flux[v]
-      end
+                                                   direction, first_boundary,
+                                                   last_boundary)
+    @unpack surface_flux = surface_integral
+    @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
+
+    @threaded for boundary in first_boundary:last_boundary
+        # Get neighboring element
+        neighbor = neighbor_ids[boundary]
+
+        for i in eachnode(dg)
+            # Get boundary flux
+            u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, i, boundary)
+            if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
+                u_inner = u_ll
+            else # Element is on the right, boundary on the left
+                u_inner = u_rr
+            end
+
+            # TODO: revisit if we want more general boundary treatments.
+            # This assumes the gradient numerical flux at the boundary is the gradient variable,
+            # which is consistent with BR1, LDG.
+            flux_inner = u_inner
+
+            x = get_node_coords(node_coordinates, equations_parabolic, dg, i, boundary)
+            flux = boundary_condition(flux_inner, u_inner,
+                                      get_unsigned_normal_vector_2d(direction),
+                                      x, t, Gradient(), equations_parabolic)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations_parabolic)
+                surface_flux_values[v, i, direction, neighbor] = flux[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::NamedTuple,
-                                        mesh::TreeMesh{2}, equations_parabolic::AbstractEquationsParabolic,
+function calc_boundary_flux_divergence!(cache, t,
+                                        boundary_conditions_parabolic::NamedTuple,
+                                        mesh::TreeMesh{2},
+                                        equations_parabolic::AbstractEquationsParabolic,
                                         surface_integral, dg::DG)
-  @unpack surface_flux_values = cache.elements
-  @unpack n_boundaries_per_direction = cache.boundaries
-
-  # Calculate indices
-  lasts = accumulate(+, n_boundaries_per_direction)
-  firsts = lasts - n_boundaries_per_direction .+ 1
-
-  # Calc boundary fluxes in each direction
-  calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[1],
-                                              equations_parabolic, surface_integral, dg, cache,
-                                              1, firsts[1], lasts[1])
-  calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[2],
-                                              equations_parabolic, surface_integral, dg, cache,
-                                              2, firsts[2], lasts[2])
-  calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[3],
-                                              equations_parabolic, surface_integral, dg, cache,
-                                              3, firsts[3], lasts[3])
-  calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[4],
-                                              equations_parabolic, surface_integral, dg, cache,
-                                              4, firsts[4], lasts[4])
+    @unpack surface_flux_values = cache.elements
+    @unpack n_boundaries_per_direction = cache.boundaries
+
+    # Calculate indices
+    lasts = accumulate(+, n_boundaries_per_direction)
+    firsts = lasts - n_boundaries_per_direction .+ 1
+
+    # Calc boundary fluxes in each direction
+    calc_boundary_flux_by_direction_divergence!(surface_flux_values, t,
+                                                boundary_conditions_parabolic[1],
+                                                equations_parabolic, surface_integral,
+                                                dg, cache,
+                                                1, firsts[1], lasts[1])
+    calc_boundary_flux_by_direction_divergence!(surface_flux_values, t,
+                                                boundary_conditions_parabolic[2],
+                                                equations_parabolic, surface_integral,
+                                                dg, cache,
+                                                2, firsts[2], lasts[2])
+    calc_boundary_flux_by_direction_divergence!(surface_flux_values, t,
+                                                boundary_conditions_parabolic[3],
+                                                equations_parabolic, surface_integral,
+                                                dg, cache,
+                                                3, firsts[3], lasts[3])
+    calc_boundary_flux_by_direction_divergence!(surface_flux_values, t,
+                                                boundary_conditions_parabolic[4],
+                                                equations_parabolic, surface_integral,
+                                                dg, cache,
+                                                4, firsts[4], lasts[4])
 end
-function calc_boundary_flux_by_direction_divergence!(surface_flux_values::AbstractArray{<:Any,4}, t,
+function calc_boundary_flux_by_direction_divergence!(surface_flux_values::AbstractArray{
+                                                                                        <:Any,
+                                                                                        4
+                                                                                        },
+                                                     t,
                                                      boundary_condition,
                                                      equations_parabolic::AbstractEquationsParabolic,
                                                      surface_integral, dg::DG, cache,
-                                                     direction, first_boundary, last_boundary)
-  @unpack surface_flux = surface_integral
-
-  # Note: cache.boundaries.u contains the unsigned normal component (using "orientation", not "direction")
-  # of the viscous flux, as computed in `prolong2boundaries!`
-  @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
-
-  @threaded for boundary in first_boundary:last_boundary
-    # Get neighboring element
-    neighbor = neighbor_ids[boundary]
-
-    for i in eachnode(dg)
-      # Get viscous boundary fluxes
-      flux_ll, flux_rr = get_surface_node_vars(u, equations_parabolic, dg, i, boundary)
-      if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
-        flux_inner = flux_ll
-      else # Element is on the right, boundary on the left
-        flux_inner = flux_rr
-      end
-
-      x = get_node_coords(node_coordinates, equations_parabolic, dg, i, boundary)
-
-      # TODO: add a field in `cache.boundaries` for gradient information.
-      # Here, we pass in `u_inner = nothing` since we overwrite cache.boundaries.u with gradient information.
-      # This currently works with Dirichlet/Neuman boundary conditions for LaplaceDiffusion2D and
-      # NoSlipWall/Adiabatic boundary conditions for CompressibleNavierStokesDiffusion2D as of 2022-6-27.
-      # It will not work with implementations which utilize `u_inner` to impose boundary conditions.
-      flux = boundary_condition(flux_inner, nothing, get_unsigned_normal_vector_2d(direction),
-                                x, t, Divergence(), equations_parabolic)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations_parabolic)
-        surface_flux_values[v, i, direction, neighbor] = flux[v]
-      end
+                                                     direction, first_boundary,
+                                                     last_boundary)
+    @unpack surface_flux = surface_integral
+
+    # Note: cache.boundaries.u contains the unsigned normal component (using "orientation", not "direction")
+    # of the viscous flux, as computed in `prolong2boundaries!`
+    @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
+
+    @threaded for boundary in first_boundary:last_boundary
+        # Get neighboring element
+        neighbor = neighbor_ids[boundary]
+
+        for i in eachnode(dg)
+            # Get viscous boundary fluxes
+            flux_ll, flux_rr = get_surface_node_vars(u, equations_parabolic, dg, i,
+                                                     boundary)
+            if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
+                flux_inner = flux_ll
+            else # Element is on the right, boundary on the left
+                flux_inner = flux_rr
+            end
+
+            x = get_node_coords(node_coordinates, equations_parabolic, dg, i, boundary)
+
+            # TODO: add a field in `cache.boundaries` for gradient information.
+            # Here, we pass in `u_inner = nothing` since we overwrite cache.boundaries.u with gradient information.
+            # This currently works with Dirichlet/Neuman boundary conditions for LaplaceDiffusion2D and
+            # NoSlipWall/Adiabatic boundary conditions for CompressibleNavierStokesDiffusion2D as of 2022-6-27.
+            # It will not work with implementations which utilize `u_inner` to impose boundary conditions.
+            flux = boundary_condition(flux_inner, nothing,
+                                      get_unsigned_normal_vector_2d(direction),
+                                      x, t, Divergence(), equations_parabolic)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations_parabolic)
+                surface_flux_values[v, i, direction, neighbor] = flux[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Calculate the gradient of the transformed variables
 function calc_gradient!(gradients, u_transformed, t,
                         mesh::TreeMesh{2}, equations_parabolic,
                         boundary_conditions_parabolic, dg::DG, cache, cache_parabolic)
+    gradients_x, gradients_y = gradients
 
-  gradients_x, gradients_y = gradients
-
-  # Reset du
-  @trixi_timeit timer() "reset gradients" begin
-    reset_du!(gradients_x, dg, cache)
-    reset_du!(gradients_y, dg, cache)
-  end
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" begin
-    @unpack derivative_dhat = dg.basis
-    @threaded for element in eachelement(dg, cache)
-
-      # Calculate volume terms in one element
-      for j in eachnode(dg), i in eachnode(dg)
-        u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, element)
+    # Reset du
+    @trixi_timeit timer() "reset gradients" begin
+        reset_du!(gradients_x, dg, cache)
+        reset_du!(gradients_y, dg, cache)
+    end
 
-        for ii in eachnode(dg)
-          multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i], u_node, equations_parabolic, dg, ii, j, element)
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        @unpack derivative_dhat = dg.basis
+        @threaded for element in eachelement(dg, cache)
+
+            # Calculate volume terms in one element
+            for j in eachnode(dg), i in eachnode(dg)
+                u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j,
+                                       element)
+
+                for ii in eachnode(dg)
+                    multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i],
+                                               u_node, equations_parabolic, dg, ii, j,
+                                               element)
+                end
+
+                for jj in eachnode(dg)
+                    multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j],
+                                               u_node, equations_parabolic, dg, i, jj,
+                                               element)
+                end
+            end
         end
+    end
 
-        for jj in eachnode(dg)
-          multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j], u_node, equations_parabolic, dg, i, jj, element)
-        end
-      end
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache_parabolic, u_transformed, mesh, equations_parabolic,
+                            dg.surface_integral, dg)
     end
-  end
 
-  # Prolong solution to interfaces
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg)
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        @unpack surface_flux_values = cache_parabolic.elements
+        @unpack neighbor_ids, orientations = cache_parabolic.interfaces
+
+        @threaded for interface in eachinterface(dg, cache_parabolic)
+            # Get neighboring elements
+            left_id = neighbor_ids[1, interface]
+            right_id = neighbor_ids[2, interface]
+
+            # Determine interface direction with respect to elements:
+            # orientation = 1: left -> 2, right -> 1
+            # orientation = 2: left -> 4, right -> 3
+            left_direction = 2 * orientations[interface]
+            right_direction = 2 * orientations[interface] - 1
+
+            for i in eachnode(dg)
+                # Call pointwise Riemann solver
+                u_ll, u_rr = get_surface_node_vars(cache_parabolic.interfaces.u,
+                                                   equations_parabolic, dg, i,
+                                                   interface)
+                flux = 0.5 * (u_ll + u_rr)
+
+                # Copy flux to left and right element storage
+                for v in eachvariable(equations_parabolic)
+                    surface_flux_values[v, i, left_direction, left_id] = flux[v]
+                    surface_flux_values[v, i, right_direction, right_id] = flux[v]
+                end
+            end
+        end
+    end
 
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" begin
-    @unpack surface_flux_values = cache_parabolic.elements
-    @unpack neighbor_ids, orientations = cache_parabolic.interfaces
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache_parabolic, u_transformed, mesh, equations_parabolic,
+                            dg.surface_integral, dg)
+    end
 
-    @threaded for interface in eachinterface(dg, cache_parabolic)
-      # Get neighboring elements
-      left_id  = neighbor_ids[1, interface]
-      right_id = neighbor_ids[2, interface]
-
-      # Determine interface direction with respect to elements:
-      # orientation = 1: left -> 2, right -> 1
-      # orientation = 2: left -> 4, right -> 3
-      left_direction  = 2 * orientations[interface]
-      right_direction = 2 * orientations[interface] - 1
-
-      for i in eachnode(dg)
-        # Call pointwise Riemann solver
-        u_ll, u_rr = get_surface_node_vars(cache_parabolic.interfaces.u,
-                                           equations_parabolic, dg, i, interface)
-        flux = 0.5 * (u_ll + u_rr)
-
-        # Copy flux to left and right element storage
-        for v in eachvariable(equations_parabolic)
-          surface_flux_values[v, i, left_direction,  left_id]  = flux[v]
-          surface_flux_values[v, i, right_direction, right_id] = flux[v]
-        end
-      end
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux_gradients!(cache_parabolic, t,
+                                      boundary_conditions_parabolic, mesh,
+                                      equations_parabolic,
+                                      dg.surface_integral, dg)
     end
-  end
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux_gradients!(
-    cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic,
-    dg.surface_integral, dg)
-
-  # TODO: parabolic; mortars
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" begin
-    @unpack boundary_interpolation = dg.basis
-    @unpack surface_flux_values = cache_parabolic.elements
-
-    # Note that all fluxes have been computed with outward-pointing normal vectors.
-    # Access the factors only once before beginning the loop to increase performance.
-    # We also use explicit assignments instead of `+=` to let `@muladd` turn these
-    # into FMAs (see comment at the top of the file).
-    factor_1 = boundary_interpolation[1,          1]
-    factor_2 = boundary_interpolation[nnodes(dg), 2]
-    @threaded for element in eachelement(dg, cache)
-      for l in eachnode(dg)
-        for v in eachvariable(equations_parabolic)
-          # surface at -x
-          gradients_x[v, 1,          l, element] = (
-            gradients_x[v, 1,          l, element] - surface_flux_values[v, l, 1, element] * factor_1)
-
-          # surface at +x
-          gradients_x[v, nnodes(dg), l, element] = (
-            gradients_x[v, nnodes(dg), l, element] + surface_flux_values[v, l, 2, element] * factor_2)
-
-          # surface at -y
-          gradients_y[v, l, 1,          element] = (
-            gradients_y[v, l, 1,          element] - surface_flux_values[v, l, 3, element] * factor_1)
-
-          # surface at +y
-          gradients_y[v, l, nnodes(dg), element] = (
-            gradients_y[v, l, nnodes(dg), element] + surface_flux_values[v, l, 4, element] * factor_2)
+
+    # TODO: parabolic; mortars
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        @unpack boundary_interpolation = dg.basis
+        @unpack surface_flux_values = cache_parabolic.elements
+
+        # Note that all fluxes have been computed with outward-pointing normal vectors.
+        # Access the factors only once before beginning the loop to increase performance.
+        # We also use explicit assignments instead of `+=` to let `@muladd` turn these
+        # into FMAs (see comment at the top of the file).
+        factor_1 = boundary_interpolation[1, 1]
+        factor_2 = boundary_interpolation[nnodes(dg), 2]
+        @threaded for element in eachelement(dg, cache)
+            for l in eachnode(dg)
+                for v in eachvariable(equations_parabolic)
+                    # surface at -x
+                    gradients_x[v, 1, l, element] = (gradients_x[v, 1, l, element] -
+                                                     surface_flux_values[v, l, 1,
+                                                                         element] *
+                                                     factor_1)
+
+                    # surface at +x
+                    gradients_x[v, nnodes(dg), l, element] = (gradients_x[v, nnodes(dg),
+                                                                          l, element] +
+                                                              surface_flux_values[v, l,
+                                                                                  2,
+                                                                                  element] *
+                                                              factor_2)
+
+                    # surface at -y
+                    gradients_y[v, l, 1, element] = (gradients_y[v, l, 1, element] -
+                                                     surface_flux_values[v, l, 3,
+                                                                         element] *
+                                                     factor_1)
+
+                    # surface at +y
+                    gradients_y[v, l, nnodes(dg), element] = (gradients_y[v, l,
+                                                                          nnodes(dg),
+                                                                          element] +
+                                                              surface_flux_values[v, l,
+                                                                                  4,
+                                                                                  element] *
+                                                              factor_2)
+                end
+            end
         end
-      end
     end
-  end
 
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" begin
-    apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg, cache_parabolic)
-    apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg, cache_parabolic)
-  end
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" begin
+        apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg,
+                                  cache_parabolic)
+        apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg,
+                                  cache_parabolic)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 # This method is called when a SemidiscretizationHyperbolic is constructed.
 # It constructs the basic `cache` used throughout the simulation to compute
 # the RHS etc.
-function create_cache_parabolic(mesh::TreeMesh{2}, equations_hyperbolic::AbstractEquations,
+function create_cache_parabolic(mesh::TreeMesh{2},
+                                equations_hyperbolic::AbstractEquations,
                                 equations_parabolic::AbstractEquationsParabolic,
                                 dg::DG, parabolic_scheme, RealT, uEltype)
-  # Get cells for which an element needs to be created (i.e. all leaf cells)
-  leaf_cell_ids = local_leaf_cells(mesh.tree)
+    # Get cells for which an element needs to be created (i.e. all leaf cells)
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
 
-  elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT, uEltype)
+    elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT,
+                             uEltype)
 
-  n_vars = nvariables(equations_hyperbolic)
-  n_nodes = nnodes(elements)
-  n_elements = nelements(elements)
-  u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_elements)
-  gradients = ntuple(_ -> similar(u_transformed), ndims(mesh))
-  flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh))
+    n_vars = nvariables(equations_hyperbolic)
+    n_nodes = nnodes(elements)
+    n_elements = nelements(elements)
+    u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_elements)
+    gradients = ntuple(_ -> similar(u_transformed), ndims(mesh))
+    flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh))
 
-  interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
+    interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
 
-  boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
+    boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
 
-  # mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
+    # mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
 
-  # cache = (; elements, interfaces, boundaries, mortars)
-  cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed)
+    # cache = (; elements, interfaces, boundaries, mortars)
+    cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed)
 
-  # Add specialized parts of the cache required to compute the mortars etc.
-  # cache = (;cache..., create_cache(mesh, equations_parabolic, dg.mortar, uEltype)...)
+    # Add specialized parts of the cache required to compute the mortars etc.
+    # cache = (;cache..., create_cache(mesh, equations_parabolic, dg.mortar, uEltype)...)
 
-  return cache
+    return cache
 end
 
-
 # Needed to *not* flip the sign of the inverse Jacobian.
 # This is because the parabolic fluxes are assumed to be of the form
 #   `du/dt + df/dx = dg/dx + source(x,t)`,
 # where f(u) is the inviscid flux and g(u) is the viscous flux.
-function apply_jacobian_parabolic!(du, mesh::Union{TreeMesh{2}, P4estMesh{2}}, 
+function apply_jacobian_parabolic!(du, mesh::Union{TreeMesh{2}, P4estMesh{2}},
                                    equations::AbstractEquationsParabolic, dg::DG, cache)
+    @threaded for element in eachelement(dg, cache)
+        factor = cache.elements.inverse_jacobian[element]
 
-  @threaded for element in eachelement(dg, cache)
-    factor = cache.elements.inverse_jacobian[element]
-
-    for j in eachnode(dg), i in eachnode(dg)
-      for v in eachvariable(equations)
-        du[v, i, j, element] *= factor
-      end
+        for j in eachnode(dg), i in eachnode(dg)
+            for v in eachvariable(equations)
+                du[v, i, j, element] *= factor
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_2d_parallel.jl b/src/solvers/dgsem_tree/dg_2d_parallel.jl
index a7c6a8b4746..8095dae123a 100644
--- a/src/solvers/dgsem_tree/dg_2d_parallel.jl
+++ b/src/solvers/dgsem_tree/dg_2d_parallel.jl
@@ -3,734 +3,789 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # everything related to a DG semidiscretization in 2D using MPI,
 # currently limited to Lobatto-Legendre nodes
 
-
 # TODO: MPI dimension agnostic
 mutable struct MPICache{uEltype <: Real}
-  mpi_neighbor_ranks::Vector{Int}
-  mpi_neighbor_interfaces::Vector{Vector{Int}}
-  mpi_neighbor_mortars::Vector{Vector{Int}}
-  mpi_send_buffers::Vector{Vector{uEltype}}
-  mpi_recv_buffers::Vector{Vector{uEltype}}
-  mpi_send_requests::Vector{MPI.Request}
-  mpi_recv_requests::Vector{MPI.Request}
-  n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}}
-  n_elements_global::Int
-  first_element_global_id::Int
+    mpi_neighbor_ranks::Vector{Int}
+    mpi_neighbor_interfaces::Vector{Vector{Int}}
+    mpi_neighbor_mortars::Vector{Vector{Int}}
+    mpi_send_buffers::Vector{Vector{uEltype}}
+    mpi_recv_buffers::Vector{Vector{uEltype}}
+    mpi_send_requests::Vector{MPI.Request}
+    mpi_recv_requests::Vector{MPI.Request}
+    n_elements_by_rank::OffsetArray{Int, 1, Array{Int, 1}}
+    n_elements_global::Int
+    first_element_global_id::Int
 end
 
-
 function MPICache(uEltype)
-  # MPI communication "just works" for bitstypes only
-  if !isbitstype(uEltype)
-    throw(ArgumentError("MPICache only supports bitstypes, $uEltype is not a bitstype."))
-  end
-  mpi_neighbor_ranks = Vector{Int}(undef, 0)
-  mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0)
-  mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0)
-  mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0)
-  mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0)
-  mpi_send_requests = Vector{MPI.Request}(undef, 0)
-  mpi_recv_requests = Vector{MPI.Request}(undef, 0)
-  n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1)
-  n_elements_global = 0
-  first_element_global_id = 0
-
-  MPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars,
-                    mpi_send_buffers, mpi_recv_buffers,
-                    mpi_send_requests, mpi_recv_requests,
-                    n_elements_by_rank, n_elements_global,
-                    first_element_global_id)
+    # MPI communication "just works" for bitstypes only
+    if !isbitstype(uEltype)
+        throw(ArgumentError("MPICache only supports bitstypes, $uEltype is not a bitstype."))
+    end
+    mpi_neighbor_ranks = Vector{Int}(undef, 0)
+    mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, 0)
+    mpi_neighbor_mortars = Vector{Vector{Int}}(undef, 0)
+    mpi_send_buffers = Vector{Vector{uEltype}}(undef, 0)
+    mpi_recv_buffers = Vector{Vector{uEltype}}(undef, 0)
+    mpi_send_requests = Vector{MPI.Request}(undef, 0)
+    mpi_recv_requests = Vector{MPI.Request}(undef, 0)
+    n_elements_by_rank = OffsetArray(Vector{Int}(undef, 0), 0:-1)
+    n_elements_global = 0
+    first_element_global_id = 0
+
+    MPICache{uEltype}(mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars,
+                      mpi_send_buffers, mpi_recv_buffers,
+                      mpi_send_requests, mpi_recv_requests,
+                      n_elements_by_rank, n_elements_global,
+                      first_element_global_id)
 end
-@inline Base.eltype(::MPICache{uEltype}) where uEltype = uEltype
-
+@inline Base.eltype(::MPICache{uEltype}) where {uEltype} = uEltype
 
 # TODO: MPI dimension agnostic
 function start_mpi_receive!(mpi_cache::MPICache)
+    for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks)
+        mpi_cache.mpi_recv_requests[index] = MPI.Irecv!(mpi_cache.mpi_recv_buffers[index],
+                                                        d, d, mpi_comm())
+    end
 
-  for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks)
-    mpi_cache.mpi_recv_requests[index] = MPI.Irecv!(
-      mpi_cache.mpi_recv_buffers[index], d, d, mpi_comm())
-  end
-
-  return nothing
+    return nothing
 end
 
-
 # TODO: MPI dimension agnostic
 function start_mpi_send!(mpi_cache::MPICache, mesh, equations, dg, cache)
-  data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
+    data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
 
-  for d in 1:length(mpi_cache.mpi_neighbor_ranks)
-    send_buffer = mpi_cache.mpi_send_buffers[d]
+    for d in 1:length(mpi_cache.mpi_neighbor_ranks)
+        send_buffer = mpi_cache.mpi_send_buffers[d]
 
-    for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d])
-      first = (index - 1) * data_size + 1
-      last =  (index - 1) * data_size + data_size
-
-      if cache.mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction
-        @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[2, :, :, interface])
-      else # local element in negative direction
-        @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[1, :, :, interface])
-      end
-    end
+        for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d])
+            first = (index - 1) * data_size + 1
+            last = (index - 1) * data_size + data_size
 
-    # Each mortar has a total size of 4 * data_size, set everything to NaN first and overwrite the
-    # parts where local data exists
-    interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size
-    mortars_data_size = length(mpi_cache.mpi_neighbor_mortars[d]) * 4 * data_size
-    send_buffer[interfaces_data_size+1:interfaces_data_size+mortars_data_size] .= NaN
-
-    for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d])
-      # First and last indices in the send buffer for mortar data obtained from local element
-      # in a given position
-      index_base = interfaces_data_size + (index - 1) * 4 * data_size
-      indices = (
-        # first, last for local element in position 1 (lower element)
-        (index_base + 1,
-         index_base + 1 * data_size),
-        # first, last for local element in position 2 (upper element)
-        (index_base + 1 * data_size + 1,
-         index_base + 2 * data_size),
-        # firsts, lasts for local element in position 3 (large element)
-        (index_base + 2 * data_size + 1,
-         index_base + 3 * data_size,
-         index_base + 3 * data_size + 1,
-         index_base + 4 * data_size),
-      )
-
-      for position in cache.mpi_mortars.local_neighbor_positions[mortar]
-        # Determine whether the data belongs to the left or right side
-        if cache.mpi_mortars.large_sides[mortar] == 1 # large element on left side
-          if position in (1, 2) # small element
-            leftright = 2
-          else # large element
-            leftright = 1
-          end
-        else # large element on right side
-          if position in (1, 2) # small element
-            leftright = 1
-          else # large element
-            leftright = 2
-          end
+            if cache.mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction
+                @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[2, :, :,
+                                                                             interface])
+            else # local element in negative direction
+                @views send_buffer[first:last] .= vec(cache.mpi_interfaces.u[1, :, :,
+                                                                             interface])
+            end
         end
-        # copy data to buffer
-        if position == 1 # lower element
-          first, last = indices[position]
-          @views send_buffer[first:last] .= vec(cache.mpi_mortars.u_lower[leftright, :, :, mortar])
-        elseif position == 2 # upper element
-          first, last = indices[position]
-          @views send_buffer[first:last] .= vec(cache.mpi_mortars.u_upper[leftright, :, :, mortar])
-        else # large element
-          first_lower, last_lower, first_upper, last_upper = indices[position]
-          @views send_buffer[first_lower:last_lower] .= vec(cache.mpi_mortars.u_lower[leftright, :, :, mortar])
-          @views send_buffer[first_upper:last_upper] .= vec(cache.mpi_mortars.u_upper[leftright, :, :, mortar])
+
+        # Each mortar has a total size of 4 * data_size, set everything to NaN first and overwrite the
+        # parts where local data exists
+        interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size
+        mortars_data_size = length(mpi_cache.mpi_neighbor_mortars[d]) * 4 * data_size
+        send_buffer[(interfaces_data_size + 1):(interfaces_data_size + mortars_data_size)] .= NaN
+
+        for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d])
+            # First and last indices in the send buffer for mortar data obtained from local element
+            # in a given position
+            index_base = interfaces_data_size + (index - 1) * 4 * data_size
+            indices = (
+                       # first, last for local element in position 1 (lower element)
+                       (index_base + 1,
+                        index_base + 1 * data_size),
+                       # first, last for local element in position 2 (upper element)
+                       (index_base + 1 * data_size + 1,
+                        index_base + 2 * data_size),
+                       # firsts, lasts for local element in position 3 (large element)
+                       (index_base + 2 * data_size + 1,
+                        index_base + 3 * data_size,
+                        index_base + 3 * data_size + 1,
+                        index_base + 4 * data_size))
+
+            for position in cache.mpi_mortars.local_neighbor_positions[mortar]
+                # Determine whether the data belongs to the left or right side
+                if cache.mpi_mortars.large_sides[mortar] == 1 # large element on left side
+                    if position in (1, 2) # small element
+                        leftright = 2
+                    else # large element
+                        leftright = 1
+                    end
+                else # large element on right side
+                    if position in (1, 2) # small element
+                        leftright = 1
+                    else # large element
+                        leftright = 2
+                    end
+                end
+                # copy data to buffer
+                if position == 1 # lower element
+                    first, last = indices[position]
+                    @views send_buffer[first:last] .= vec(cache.mpi_mortars.u_lower[leftright,
+                                                                                    :,
+                                                                                    :,
+                                                                                    mortar])
+                elseif position == 2 # upper element
+                    first, last = indices[position]
+                    @views send_buffer[first:last] .= vec(cache.mpi_mortars.u_upper[leftright,
+                                                                                    :,
+                                                                                    :,
+                                                                                    mortar])
+                else # large element
+                    first_lower, last_lower, first_upper, last_upper = indices[position]
+                    @views send_buffer[first_lower:last_lower] .= vec(cache.mpi_mortars.u_lower[leftright,
+                                                                                                :,
+                                                                                                :,
+                                                                                                mortar])
+                    @views send_buffer[first_upper:last_upper] .= vec(cache.mpi_mortars.u_upper[leftright,
+                                                                                                :,
+                                                                                                :,
+                                                                                                mortar])
+                end
+            end
         end
-      end
     end
-  end
 
-  # Start sending
-  for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks)
-    mpi_cache.mpi_send_requests[index] = MPI.Isend(
-      mpi_cache.mpi_send_buffers[index], d, mpi_rank(), mpi_comm())
-  end
+    # Start sending
+    for (index, d) in enumerate(mpi_cache.mpi_neighbor_ranks)
+        mpi_cache.mpi_send_requests[index] = MPI.Isend(mpi_cache.mpi_send_buffers[index],
+                                                       d, mpi_rank(), mpi_comm())
+    end
 
-  return nothing
+    return nothing
 end
 
-
 # TODO: MPI dimension agnostic
 function finish_mpi_send!(mpi_cache::MPICache)
-  MPI.Waitall(mpi_cache.mpi_send_requests, MPI.Status)
+    MPI.Waitall(mpi_cache.mpi_send_requests, MPI.Status)
 end
 
-
 # TODO: MPI dimension agnostic
 function finish_mpi_receive!(mpi_cache::MPICache, mesh, equations, dg, cache)
-  data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
-
-  # Start receiving and unpack received data until all communication is finished
-  d = MPI.Waitany(mpi_cache.mpi_recv_requests)
-  while d !== nothing
-    recv_buffer = mpi_cache.mpi_recv_buffers[d]
-
-    for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d])
-      first = (index - 1) * data_size + 1
-      last =  (index - 1) * data_size + data_size
-
-      if cache.mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction
-        @views vec(cache.mpi_interfaces.u[1, :, :, interface]) .= recv_buffer[first:last]
-      else # local element in negative direction
-        @views vec(cache.mpi_interfaces.u[2, :, :, interface]) .= recv_buffer[first:last]
-      end
-    end
+    data_size = nvariables(equations) * nnodes(dg)^(ndims(mesh) - 1)
 
-    interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size
-    for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d])
-      # First and last indices in the receive buffer for mortar data obtained from remote element
-      # in a given position
-      index_base = interfaces_data_size + (index - 1) * 4 * data_size
-      indices = (
-        # first, last for local element in position 1 (lower element)
-        (index_base + 1,
-         index_base + 1 * data_size),
-        # first, last for local element in position 2 (upper element)
-        (index_base + 1 * data_size + 1,
-         index_base + 2 * data_size),
-        # firsts, lasts for local element in position 3 (large element)
-        (index_base + 2 * data_size + 1,
-         index_base + 3 * data_size,
-         index_base + 3 * data_size + 1,
-         index_base + 4 * data_size),
-      )
-
-      for position in 1:3
-        # Skip if received data for `pos` is NaN as no real data has been sent for the
-        # corresponding element
-        if isnan(recv_buffer[Base.first(indices[position])])
-          continue
-        end
+    # Start receiving and unpack received data until all communication is finished
+    d = MPI.Waitany(mpi_cache.mpi_recv_requests)
+    while d !== nothing
+        recv_buffer = mpi_cache.mpi_recv_buffers[d]
+
+        for (index, interface) in enumerate(mpi_cache.mpi_neighbor_interfaces[d])
+            first = (index - 1) * data_size + 1
+            last = (index - 1) * data_size + data_size
 
-        # Determine whether the received data belongs to the left or right side
-        if cache.mpi_mortars.large_sides[mortar] == 1 # large element on left side
-          if position in (1, 2) # small element
-            leftright = 2
-          else # large element
-            leftright = 1
-          end
-        else # large element on right side
-          if position in (1, 2) # small element
-            leftright = 1
-          else # large element
-            leftright = 2
-          end
+            if cache.mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction
+                @views vec(cache.mpi_interfaces.u[1, :, :, interface]) .= recv_buffer[first:last]
+            else # local element in negative direction
+                @views vec(cache.mpi_interfaces.u[2, :, :, interface]) .= recv_buffer[first:last]
+            end
         end
 
-        if position == 1 # lower element data has been received
-          first, last = indices[position]
-          @views vec(cache.mpi_mortars.u_lower[leftright, :, :, mortar]) .= recv_buffer[first:last]
-        elseif position == 2 # upper element data has been received
-          first, last = indices[position]
-          @views vec(cache.mpi_mortars.u_upper[leftright, :, :, mortar]) .= recv_buffer[first:last]
-        else # large element data has been received
-          first_lower, last_lower, first_upper, last_upper = indices[position]
-          @views vec(cache.mpi_mortars.u_lower[leftright, :, :, mortar]) .= recv_buffer[first_lower:last_lower]
-          @views vec(cache.mpi_mortars.u_upper[leftright, :, :, mortar]) .= recv_buffer[first_upper:last_upper]
+        interfaces_data_size = length(mpi_cache.mpi_neighbor_interfaces[d]) * data_size
+        for (index, mortar) in enumerate(mpi_cache.mpi_neighbor_mortars[d])
+            # First and last indices in the receive buffer for mortar data obtained from remote element
+            # in a given position
+            index_base = interfaces_data_size + (index - 1) * 4 * data_size
+            indices = (
+                       # first, last for local element in position 1 (lower element)
+                       (index_base + 1,
+                        index_base + 1 * data_size),
+                       # first, last for local element in position 2 (upper element)
+                       (index_base + 1 * data_size + 1,
+                        index_base + 2 * data_size),
+                       # firsts, lasts for local element in position 3 (large element)
+                       (index_base + 2 * data_size + 1,
+                        index_base + 3 * data_size,
+                        index_base + 3 * data_size + 1,
+                        index_base + 4 * data_size))
+
+            for position in 1:3
+                # Skip if received data for `pos` is NaN as no real data has been sent for the
+                # corresponding element
+                if isnan(recv_buffer[Base.first(indices[position])])
+                    continue
+                end
+
+                # Determine whether the received data belongs to the left or right side
+                if cache.mpi_mortars.large_sides[mortar] == 1 # large element on left side
+                    if position in (1, 2) # small element
+                        leftright = 2
+                    else # large element
+                        leftright = 1
+                    end
+                else # large element on right side
+                    if position in (1, 2) # small element
+                        leftright = 1
+                    else # large element
+                        leftright = 2
+                    end
+                end
+
+                if position == 1 # lower element data has been received
+                    first, last = indices[position]
+                    @views vec(cache.mpi_mortars.u_lower[leftright, :, :, mortar]) .= recv_buffer[first:last]
+                elseif position == 2 # upper element data has been received
+                    first, last = indices[position]
+                    @views vec(cache.mpi_mortars.u_upper[leftright, :, :, mortar]) .= recv_buffer[first:last]
+                else # large element data has been received
+                    first_lower, last_lower, first_upper, last_upper = indices[position]
+                    @views vec(cache.mpi_mortars.u_lower[leftright, :, :, mortar]) .= recv_buffer[first_lower:last_lower]
+                    @views vec(cache.mpi_mortars.u_upper[leftright, :, :, mortar]) .= recv_buffer[first_upper:last_upper]
+                end
+            end
         end
-      end
-    end
 
-    d = MPI.Waitany(mpi_cache.mpi_recv_requests)
-  end
+        d = MPI.Waitany(mpi_cache.mpi_recv_requests)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 # This method is called when a SemidiscretizationHyperbolic is constructed.
 # It constructs the basic `cache` used throughout the simulation to compute
 # the RHS etc.
 function create_cache(mesh::ParallelTreeMesh{2}, equations,
-                      dg::DG, RealT, ::Type{uEltype}) where {uEltype<:Real}
-  # Get cells for which an element needs to be created (i.e. all leaf cells)
-  leaf_cell_ids = local_leaf_cells(mesh.tree)
+                      dg::DG, RealT, ::Type{uEltype}) where {uEltype <: Real}
+    # Get cells for which an element needs to be created (i.e. all leaf cells)
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
 
-  elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)
+    elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)
 
-  interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
+    interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
 
-  mpi_interfaces = init_mpi_interfaces(leaf_cell_ids, mesh, elements)
+    mpi_interfaces = init_mpi_interfaces(leaf_cell_ids, mesh, elements)
 
-  boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
+    boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
 
-  mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
+    mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
 
-  mpi_mortars = init_mpi_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
+    mpi_mortars = init_mpi_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
 
-  mpi_cache = init_mpi_cache(mesh, elements, mpi_interfaces, mpi_mortars,
-                             nvariables(equations), nnodes(dg), uEltype)
+    mpi_cache = init_mpi_cache(mesh, elements, mpi_interfaces, mpi_mortars,
+                               nvariables(equations), nnodes(dg), uEltype)
 
-  cache = (; elements, interfaces, mpi_interfaces, boundaries, mortars, mpi_mortars,
+    cache = (; elements, interfaces, mpi_interfaces, boundaries, mortars, mpi_mortars,
              mpi_cache)
 
-  # Add specialized parts of the cache required to compute the volume integral etc.
-  cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
-  cache = (;cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
+    # Add specialized parts of the cache required to compute the volume integral etc.
+    cache = (; cache...,
+             create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
+    cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
 
-  return cache
+    return cache
 end
 
+function init_mpi_cache(mesh, elements, mpi_interfaces, mpi_mortars, nvars, nnodes,
+                        uEltype)
+    mpi_cache = MPICache(uEltype)
 
-function init_mpi_cache(mesh, elements, mpi_interfaces, mpi_mortars, nvars, nnodes, uEltype)
-  mpi_cache = MPICache(uEltype)
+    init_mpi_cache!(mpi_cache, mesh, elements, mpi_interfaces, mpi_mortars, nvars,
+                    nnodes, uEltype)
+    return mpi_cache
+end
 
-  init_mpi_cache!(mpi_cache, mesh, elements, mpi_interfaces, mpi_mortars, nvars, nnodes, uEltype)
-  return mpi_cache
+function init_mpi_cache!(mpi_cache, mesh, elements, mpi_interfaces, mpi_mortars, nvars,
+                         nnodes, uEltype)
+    mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars = init_mpi_neighbor_connectivity(elements,
+                                                                                                       mpi_interfaces,
+                                                                                                       mpi_mortars,
+                                                                                                       mesh)
+
+    mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests = init_mpi_data_structures(mpi_neighbor_interfaces,
+                                                                                                        mpi_neighbor_mortars,
+                                                                                                        ndims(mesh),
+                                                                                                        nvars,
+                                                                                                        nnodes,
+                                                                                                        uEltype)
+
+    # Determine local and total number of elements
+    n_elements_by_rank = Vector{Int}(undef, mpi_nranks())
+    n_elements_by_rank[mpi_rank() + 1] = nelements(elements)
+    MPI.Allgather!(MPI.UBuffer(n_elements_by_rank, 1), mpi_comm())
+    n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1))
+    n_elements_global = MPI.Allreduce(nelements(elements), +, mpi_comm())
+    @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements"
+
+    # Determine the global element id of the first element
+    first_element_global_id = MPI.Exscan(nelements(elements), +, mpi_comm())
+    if mpi_isroot()
+        # With Exscan, the result on the first rank is undefined
+        first_element_global_id = 1
+    else
+        # On all other ranks we need to add one, since Julia has one-based indices
+        first_element_global_id += 1
+    end
+    # TODO reuse existing structures
+    @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces,
+                       mpi_neighbor_mortars,
+                       mpi_send_buffers, mpi_recv_buffers,
+                       mpi_send_requests, mpi_recv_requests,
+                       n_elements_by_rank, n_elements_global,
+                       first_element_global_id
 end
 
+# Initialize connectivity between MPI neighbor ranks
+function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mpi_mortars,
+                                        mesh::TreeMesh2D)
+    tree = mesh.tree
+
+    # Determine neighbor ranks and sides for MPI interfaces
+    neighbor_ranks_interface = fill(-1, nmpiinterfaces(mpi_interfaces))
+    # The global interface id is the smaller of the (globally unique) neighbor cell ids, multiplied by
+    # number of directions (2 * ndims) plus direction minus one
+    global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces))
+    for interface_id in 1:nmpiinterfaces(mpi_interfaces)
+        orientation = mpi_interfaces.orientations[interface_id]
+        remote_side = mpi_interfaces.remote_sides[interface_id]
+        # Direction is from local cell to remote cell
+        if orientation == 1 # MPI interface in x-direction
+            if remote_side == 1 # remote cell on the "left" of MPI interface
+                direction = 1
+            else # remote cell on the "right" of MPI interface
+                direction = 2
+            end
+        else # MPI interface in y-direction
+            if remote_side == 1 # remote cell on the "left" of MPI interface
+                direction = 3
+            else # remote cell on the "right" of MPI interface
+                direction = 4
+            end
+        end
+        local_neighbor_id = mpi_interfaces.local_neighbor_ids[interface_id]
+        local_cell_id = elements.cell_ids[local_neighbor_id]
+        remote_cell_id = tree.neighbor_ids[direction, local_cell_id]
+        neighbor_ranks_interface[interface_id] = tree.mpi_ranks[remote_cell_id]
+        if local_cell_id < remote_cell_id
+            global_interface_ids[interface_id] = 2 * ndims(tree) * local_cell_id +
+                                                 direction - 1
+        else
+            global_interface_ids[interface_id] = (2 * ndims(tree) * remote_cell_id +
+                                                  opposite_direction(direction) - 1)
+        end
+    end
 
-function init_mpi_cache!(mpi_cache, mesh, elements, mpi_interfaces, mpi_mortars, nvars, nnodes, uEltype)
-  mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars =
-    init_mpi_neighbor_connectivity(elements, mpi_interfaces, mpi_mortars, mesh)
-
-  mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests =
-    init_mpi_data_structures(mpi_neighbor_interfaces, mpi_neighbor_mortars, ndims(mesh), nvars, nnodes, uEltype)
-
-  # Determine local and total number of elements
-  n_elements_by_rank = Vector{Int}(undef, mpi_nranks())
-  n_elements_by_rank[mpi_rank() + 1] = nelements(elements)
-  MPI.Allgather!(MPI.UBuffer(n_elements_by_rank, 1), mpi_comm())
-  n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1))
-  n_elements_global = MPI.Allreduce(nelements(elements), +, mpi_comm())
-  @assert n_elements_global == sum(n_elements_by_rank) "error in total number of elements"
-
-  # Determine the global element id of the first element
-  first_element_global_id = MPI.Exscan(nelements(elements), +, mpi_comm())
-  if mpi_isroot()
-    # With Exscan, the result on the first rank is undefined
-    first_element_global_id = 1
-  else
-    # On all other ranks we need to add one, since Julia has one-based indices
-    first_element_global_id += 1
-  end
-  # TODO reuse existing structures
-  @pack! mpi_cache = mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars,
-                     mpi_send_buffers, mpi_recv_buffers,
-                     mpi_send_requests, mpi_recv_requests,
-                     n_elements_by_rank, n_elements_global,
-                     first_element_global_id
-end
+    # Determine neighbor ranks for MPI mortars
+    neighbor_ranks_mortar = Vector{Vector{Int}}(undef, nmpimortars(mpi_mortars))
+    # The global mortar id is the (globally unique) large cell id, multiplied by
+    # number of directions (2 * ndims) plus direction minus one where
+    # direction = 1 for mortars in x-direction where large element is left
+    # direction = 2 for mortars in x-direction where large element is right
+    # direction = 3 for mortars in y-direction where large element is left
+    # direction = 4 for mortars in y-direction where large element is right
+    global_mortar_ids = fill(-1, nmpimortars(mpi_mortars))
+    for mortar in 1:nmpimortars(mpi_mortars)
+        neighbor_ranks_mortar[mortar] = Vector{Int}()
+
+        orientation = mpi_mortars.orientations[mortar]
+        large_side = mpi_mortars.large_sides[mortar]
+        direction = (orientation - 1) * 2 + large_side
+
+        local_neighbor_ids = mpi_mortars.local_neighbor_ids[mortar]
+        local_neighbor_positions = mpi_mortars.local_neighbor_positions[mortar]
+        if 3 in local_neighbor_positions # large element is on this rank
+            large_element_id = local_neighbor_ids[findfirst(pos -> pos == 3,
+                                                            local_neighbor_positions)]
+            large_cell_id = elements.cell_ids[large_element_id]
+        else # large element is remote
+            cell_id = elements.cell_ids[first(local_neighbor_ids)]
+            large_cell_id = tree.neighbor_ids[direction, tree.parent_ids[cell_id]]
+        end
+
+        neighbor_cell_id = tree.neighbor_ids[opposite_direction(direction),
+                                             large_cell_id]
+        if direction == 1
+            lower_cell_id = tree.child_ids[1, neighbor_cell_id]
+            upper_cell_id = tree.child_ids[3, neighbor_cell_id]
+        elseif direction == 2
+            lower_cell_id = tree.child_ids[2, neighbor_cell_id]
+            upper_cell_id = tree.child_ids[4, neighbor_cell_id]
+        elseif direction == 3
+            lower_cell_id = tree.child_ids[1, neighbor_cell_id]
+            upper_cell_id = tree.child_ids[2, neighbor_cell_id]
+        else
+            lower_cell_id = tree.child_ids[3, neighbor_cell_id]
+            upper_cell_id = tree.child_ids[4, neighbor_cell_id]
+        end
 
+        for cell_id in (lower_cell_id, upper_cell_id, large_cell_id)
+            if !is_own_cell(tree, cell_id)
+                neighbor_rank = tree.mpi_ranks[cell_id]
+                if !(neighbor_rank in neighbor_ranks_mortar[mortar])
+                    push!(neighbor_ranks_mortar[mortar], neighbor_rank)
+                end
+            end
+        end
 
-# Initialize connectivity between MPI neighbor ranks
-function init_mpi_neighbor_connectivity(elements, mpi_interfaces, mpi_mortars, mesh::TreeMesh2D)
-  tree = mesh.tree
-
-  # Determine neighbor ranks and sides for MPI interfaces
-  neighbor_ranks_interface = fill(-1, nmpiinterfaces(mpi_interfaces))
-  # The global interface id is the smaller of the (globally unique) neighbor cell ids, multiplied by
-  # number of directions (2 * ndims) plus direction minus one
-  global_interface_ids = fill(-1, nmpiinterfaces(mpi_interfaces))
-  for interface_id in 1:nmpiinterfaces(mpi_interfaces)
-    orientation = mpi_interfaces.orientations[interface_id]
-    remote_side = mpi_interfaces.remote_sides[interface_id]
-    # Direction is from local cell to remote cell
-    if orientation == 1 # MPI interface in x-direction
-      if remote_side == 1 # remote cell on the "left" of MPI interface
-        direction = 1
-      else # remote cell on the "right" of MPI interface
-        direction = 2
-      end
-    else # MPI interface in y-direction
-      if remote_side == 1 # remote cell on the "left" of MPI interface
-        direction = 3
-      else # remote cell on the "right" of MPI interface
-        direction = 4
-      end
+        global_mortar_ids[mortar] = 2 * ndims(tree) * large_cell_id + direction - 1
     end
-    local_neighbor_id = mpi_interfaces.local_neighbor_ids[interface_id]
-    local_cell_id = elements.cell_ids[local_neighbor_id]
-    remote_cell_id = tree.neighbor_ids[direction, local_cell_id]
-    neighbor_ranks_interface[interface_id] = tree.mpi_ranks[remote_cell_id]
-    if local_cell_id < remote_cell_id
-      global_interface_ids[interface_id] = 2 * ndims(tree) * local_cell_id + direction - 1
-    else
-      global_interface_ids[interface_id] = (2 * ndims(tree) * remote_cell_id +
-                                            opposite_direction(direction) - 1)
+
+    # Get sorted, unique neighbor ranks
+    mpi_neighbor_ranks = vcat(neighbor_ranks_interface, neighbor_ranks_mortar...) |>
+                         sort |> unique
+
+    # Sort interfaces by global interface id
+    p = sortperm(global_interface_ids)
+    neighbor_ranks_interface .= neighbor_ranks_interface[p]
+    interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p]
+
+    # Sort mortars by global mortar id
+    p = sortperm(global_mortar_ids)
+    neighbor_ranks_mortar .= neighbor_ranks_mortar[p]
+    mortar_ids = collect(1:nmpimortars(mpi_mortars))[p]
+
+    # For each neighbor rank, init connectivity data structures
+    mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks))
+    mpi_neighbor_mortars = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks))
+    for (index, d) in enumerate(mpi_neighbor_ranks)
+        mpi_neighbor_interfaces[index] = interface_ids[findall(x -> (x == d),
+                                                               neighbor_ranks_interface)]
+        mpi_neighbor_mortars[index] = mortar_ids[findall(x -> (d in x),
+                                                         neighbor_ranks_mortar)]
     end
-  end
-
-  # Determine neighbor ranks for MPI mortars
-  neighbor_ranks_mortar = Vector{Vector{Int}}(undef, nmpimortars(mpi_mortars))
-  # The global mortar id is the (globally unique) large cell id, multiplied by
-  # number of directions (2 * ndims) plus direction minus one where
-  # direction = 1 for mortars in x-direction where large element is left
-  # direction = 2 for mortars in x-direction where large element is right
-  # direction = 3 for mortars in y-direction where large element is left
-  # direction = 4 for mortars in y-direction where large element is right
-  global_mortar_ids = fill(-1, nmpimortars(mpi_mortars))
-  for mortar in 1:nmpimortars(mpi_mortars)
-    neighbor_ranks_mortar[mortar] = Vector{Int}()
-
-    orientation = mpi_mortars.orientations[mortar]
-    large_side = mpi_mortars.large_sides[mortar]
-    direction = (orientation - 1) * 2 + large_side
-
-    local_neighbor_ids = mpi_mortars.local_neighbor_ids[mortar]
-    local_neighbor_positions = mpi_mortars.local_neighbor_positions[mortar]
-    if 3 in local_neighbor_positions # large element is on this rank
-      large_element_id = local_neighbor_ids[findfirst(pos -> pos == 3, local_neighbor_positions)]
-      large_cell_id = elements.cell_ids[large_element_id]
-    else # large element is remote
-      cell_id = elements.cell_ids[first(local_neighbor_ids)]
-      large_cell_id = tree.neighbor_ids[direction, tree.parent_ids[cell_id]]
+
+    # Sanity checks that we counted all interfaces exactly once
+    @assert sum(length(v) for v in mpi_neighbor_interfaces) ==
+            nmpiinterfaces(mpi_interfaces)
+
+    return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars
+end
+
+function rhs!(du, u, t,
+              mesh::Union{ParallelTreeMesh{2}, ParallelP4estMesh{2}}, equations,
+              initial_condition, boundary_conditions, source_terms::Source,
+              dg::DG, cache) where {Source}
+    # Start to receive MPI data
+    @trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache)
+
+    # Prolong solution to MPI interfaces
+    @trixi_timeit timer() "prolong2mpiinterfaces" begin
+        prolong2mpiinterfaces!(cache, u, mesh, equations, dg.surface_integral, dg)
     end
 
-    neighbor_cell_id = tree.neighbor_ids[opposite_direction(direction), large_cell_id]
-    if direction == 1
-      lower_cell_id = tree.child_ids[1, neighbor_cell_id]
-      upper_cell_id = tree.child_ids[3, neighbor_cell_id]
-    elseif direction == 2
-      lower_cell_id = tree.child_ids[2, neighbor_cell_id]
-      upper_cell_id = tree.child_ids[4, neighbor_cell_id]
-    elseif direction == 3
-      lower_cell_id = tree.child_ids[1, neighbor_cell_id]
-      upper_cell_id = tree.child_ids[2, neighbor_cell_id]
-    else
-      lower_cell_id = tree.child_ids[3, neighbor_cell_id]
-      upper_cell_id = tree.child_ids[4, neighbor_cell_id]
+    # Prolong solution to MPI mortars
+    @trixi_timeit timer() "prolong2mpimortars" begin
+        prolong2mpimortars!(cache, u, mesh, equations,
+                            dg.mortar, dg.surface_integral, dg)
     end
 
-    for cell_id in (lower_cell_id, upper_cell_id, large_cell_id)
-      if !is_own_cell(tree, cell_id)
-        neighbor_rank = tree.mpi_ranks[cell_id]
-        if !(neighbor_rank in neighbor_ranks_mortar[mortar])
-          push!(neighbor_ranks_mortar[mortar], neighbor_rank)
-        end
-      end
+    # Start to send MPI data
+    @trixi_timeit timer() "start MPI send" begin
+        start_mpi_send!(cache.mpi_cache, mesh, equations, dg, cache)
     end
 
-    global_mortar_ids[mortar] = 2 * ndims(tree) * large_cell_id + direction - 1
-  end
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
 
-  # Get sorted, unique neighbor ranks
-  mpi_neighbor_ranks = vcat(neighbor_ranks_interface, neighbor_ranks_mortar...) |> sort |> unique
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, u, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.volume_integral, dg, cache)
+    end
 
-  # Sort interfaces by global interface id
-  p = sortperm(global_interface_ids)
-  neighbor_ranks_interface .= neighbor_ranks_interface[p]
-  interface_ids = collect(1:nmpiinterfaces(mpi_interfaces))[p]
+    # Prolong solution to interfaces
+    # TODO: Taal decide order of arguments, consistent vs. modified cache first?
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache, u, mesh, equations,
+                            dg.surface_integral, dg)
+    end
 
-  # Sort mortars by global mortar id
-  p = sortperm(global_mortar_ids)
-  neighbor_ranks_mortar .= neighbor_ranks_mortar[p]
-  mortar_ids = collect(1:nmpimortars(mpi_mortars))[p]
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache.elements.surface_flux_values, mesh,
+                             have_nonconservative_terms(equations), equations,
+                             dg.surface_integral, dg, cache)
+    end
 
-  # For each neighbor rank, init connectivity data structures
-  mpi_neighbor_interfaces = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks))
-  mpi_neighbor_mortars = Vector{Vector{Int}}(undef, length(mpi_neighbor_ranks))
-  for (index, d) in enumerate(mpi_neighbor_ranks)
-    mpi_neighbor_interfaces[index] = interface_ids[findall(x->(x == d), neighbor_ranks_interface)]
-    mpi_neighbor_mortars[index] = mortar_ids[findall(x->(d in x), neighbor_ranks_mortar)]
-  end
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache, u, mesh, equations,
+                            dg.surface_integral, dg)
+    end
 
-  # Sanity checks that we counted all interfaces exactly once
-  @assert sum(length(v) for v in mpi_neighbor_interfaces) == nmpiinterfaces(mpi_interfaces)
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
+                            dg.surface_integral, dg)
+    end
 
-  return mpi_neighbor_ranks, mpi_neighbor_interfaces, mpi_neighbor_mortars
-end
+    # Prolong solution to mortars
+    @trixi_timeit timer() "prolong2mortars" begin
+        prolong2mortars!(cache, u, mesh, equations,
+                         dg.mortar, dg.surface_integral, dg)
+    end
 
+    # Calculate mortar fluxes
+    @trixi_timeit timer() "mortar flux" begin
+        calc_mortar_flux!(cache.elements.surface_flux_values, mesh,
+                          have_nonconservative_terms(equations), equations,
+                          dg.mortar, dg.surface_integral, dg, cache)
+    end
 
+    # Finish to receive MPI data
+    @trixi_timeit timer() "finish MPI receive" begin
+        finish_mpi_receive!(cache.mpi_cache, mesh, equations, dg, cache)
+    end
 
-function rhs!(du, u, t,
-              mesh::Union{ParallelTreeMesh{2}, ParallelP4estMesh{2}}, equations,
-              initial_condition, boundary_conditions, source_terms::Source,
-              dg::DG, cache) where {Source}
-  # Start to receive MPI data
-  @trixi_timeit timer() "start MPI receive" start_mpi_receive!(cache.mpi_cache)
-
-  # Prolong solution to MPI interfaces
-  @trixi_timeit timer() "prolong2mpiinterfaces" prolong2mpiinterfaces!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Prolong solution to MPI mortars
-  @trixi_timeit timer() "prolong2mpimortars" prolong2mpimortars!(
-    cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg)
-
-  # Start to send MPI data
-  @trixi_timeit timer() "start MPI send" start_mpi_send!(
-    cache.mpi_cache, mesh, equations, dg, cache)
-
-  # Reset du
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
-
-  # Prolong solution to interfaces
-  # TODO: Taal decide order of arguments, consistent vs. modified cache first?
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.surface_integral, dg, cache)
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg)
-
-  # Prolong solution to mortars
-  @trixi_timeit timer() "prolong2mortars" prolong2mortars!(
-    cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg)
-
-  # Calculate mortar fluxes
-  @trixi_timeit timer() "mortar flux" calc_mortar_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.mortar, dg.surface_integral, dg, cache)
-
-  # Finish to receive MPI data
-  @trixi_timeit timer() "finish MPI receive" finish_mpi_receive!(
-    cache.mpi_cache, mesh, equations, dg, cache)
-
-  # Calculate MPI interface fluxes
-  @trixi_timeit timer() "MPI interface flux" calc_mpi_interface_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.surface_integral, dg, cache)
-
-  # Calculate MPI mortar fluxes
-  @trixi_timeit timer() "MPI mortar flux" calc_mpi_mortar_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.mortar, dg.surface_integral, dg, cache)
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations, dg.surface_integral, dg, cache)
-
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian!(
-    du, mesh, equations, dg, cache)
-
-  # Calculate source terms
-  @trixi_timeit timer() "source terms" calc_sources!(
-    du, u, t, source_terms, equations, dg, cache)
-
-  # Finish to send MPI data
-  @trixi_timeit timer() "finish MPI send" finish_mpi_send!(cache.mpi_cache)
-
-  return nothing
-end
+    # Calculate MPI interface fluxes
+    @trixi_timeit timer() "MPI interface flux" begin
+        calc_mpi_interface_flux!(cache.elements.surface_flux_values, mesh,
+                                 have_nonconservative_terms(equations), equations,
+                                 dg.surface_integral, dg, cache)
+    end
 
+    # Calculate MPI mortar fluxes
+    @trixi_timeit timer() "MPI mortar flux" begin
+        calc_mpi_mortar_flux!(cache.elements.surface_flux_values, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.mortar, dg.surface_integral, dg, cache)
+    end
 
-function prolong2mpiinterfaces!(cache, u,
-                                mesh::ParallelTreeMesh{2},
-                                equations, surface_integral, dg::DG)
-  @unpack mpi_interfaces = cache
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations,
+                               dg.surface_integral, dg, cache)
+    end
 
-  @threaded for interface in eachmpiinterface(dg, cache)
-    local_element = mpi_interfaces.local_neighbor_ids[interface]
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache)
 
-    if mpi_interfaces.orientations[interface] == 1 # interface in x-direction
-      if mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction
-        for j in eachnode(dg), v in eachvariable(equations)
-          mpi_interfaces.u[2, v, j, interface] = u[v,          1, j, local_element]
-        end
-      else # local element in negative direction
-        for j in eachnode(dg), v in eachvariable(equations)
-          mpi_interfaces.u[1, v, j, interface] = u[v, nnodes(dg), j, local_element]
-        end
-      end
-    else # interface in y-direction
-      if mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction
-        for i in eachnode(dg), v in eachvariable(equations)
-          mpi_interfaces.u[2, v, i, interface] = u[v, i,          1, local_element]
-        end
-      else # local element in negative direction
-        for i in eachnode(dg), v in eachvariable(equations)
-          mpi_interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg), local_element]
-        end
-      end
+    # Calculate source terms
+    @trixi_timeit timer() "source terms" begin
+        calc_sources!(du, u, t, source_terms, equations, dg, cache)
     end
-  end
 
-  return nothing
-end
+    # Finish to send MPI data
+    @trixi_timeit timer() "finish MPI send" finish_mpi_send!(cache.mpi_cache)
 
+    return nothing
+end
 
-function prolong2mpimortars!(cache, u,
-                             mesh::ParallelTreeMesh{2}, equations,
-                             mortar_l2::LobattoLegendreMortarL2, surface_integral, dg::DGSEM)
-  @unpack mpi_mortars = cache
+function prolong2mpiinterfaces!(cache, u,
+                                mesh::ParallelTreeMesh{2},
+                                equations, surface_integral, dg::DG)
+    @unpack mpi_interfaces = cache
 
-  @threaded for mortar in eachmpimortar(dg, cache)
-    local_neighbor_ids = mpi_mortars.local_neighbor_ids[mortar]
-    local_neighbor_positions = mpi_mortars.local_neighbor_positions[mortar]
+    @threaded for interface in eachmpiinterface(dg, cache)
+        local_element = mpi_interfaces.local_neighbor_ids[interface]
 
-    for (element, position) in zip(local_neighbor_ids, local_neighbor_positions)
-      if position in (1, 2) # Current element is small
-        # Copy solution small to small
-        if mpi_mortars.large_sides[mortar] == 1 # -> small elements on right side
-          if mpi_mortars.orientations[mortar] == 1
-            # L2 mortars in x-direction
-            if position == 1
-              for l in eachnode(dg)
-                for v in eachvariable(equations)
-                  mpi_mortars.u_lower[2, v, l, mortar] = u[v, 1, l, element]
-                end
-              end
-            else # position == 2
-              for l in eachnode(dg)
-                for v in eachvariable(equations)
-                  mpi_mortars.u_upper[2, v, l, mortar] = u[v, 1, l, element]
-                end
-              end
-            end
-          else
-            # L2 mortars in y-direction
-            if position == 1
-              for l in eachnode(dg)
-                for v in eachvariable(equations)
-                  mpi_mortars.u_lower[2, v, l, mortar] = u[v, l, 1, element]
+        if mpi_interfaces.orientations[interface] == 1 # interface in x-direction
+            if mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction
+                for j in eachnode(dg), v in eachvariable(equations)
+                    mpi_interfaces.u[2, v, j, interface] = u[v, 1, j, local_element]
                 end
-              end
-            else # position == 2
-              for l in eachnode(dg)
-                for v in eachvariable(equations)
-                  mpi_mortars.u_upper[2, v, l, mortar] = u[v, l, 1, element]
+            else # local element in negative direction
+                for j in eachnode(dg), v in eachvariable(equations)
+                    mpi_interfaces.u[1, v, j, interface] = u[v, nnodes(dg), j,
+                                                             local_element]
                 end
-              end
             end
-          end
-        else # large_sides[mortar] == 2 -> small elements on left side
-          if mpi_mortars.orientations[mortar] == 1
-            # L2 mortars in x-direction
-            if position == 1
-              for l in eachnode(dg)
-                for v in eachvariable(equations)
-                  mpi_mortars.u_lower[1, v, l, mortar] = u[v, nnodes(dg), l, element]
+        else # interface in y-direction
+            if mpi_interfaces.remote_sides[interface] == 1 # local element in positive direction
+                for i in eachnode(dg), v in eachvariable(equations)
+                    mpi_interfaces.u[2, v, i, interface] = u[v, i, 1, local_element]
                 end
-              end
-            else # position == 2
-              for l in eachnode(dg)
-                for v in eachvariable(equations)
-                  mpi_mortars.u_upper[1, v, l, mortar] = u[v, nnodes(dg), l, element]
+            else # local element in negative direction
+                for i in eachnode(dg), v in eachvariable(equations)
+                    mpi_interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg),
+                                                             local_element]
                 end
-              end
             end
-          else
-            # L2 mortars in y-direction
-            if position == 1
-              for l in eachnode(dg)
-                for v in eachvariable(equations)
-                  mpi_mortars.u_lower[1, v, l, mortar] = u[v, l, nnodes(dg), element]
+        end
+    end
+
+    return nothing
+end
+
+function prolong2mpimortars!(cache, u,
+                             mesh::ParallelTreeMesh{2}, equations,
+                             mortar_l2::LobattoLegendreMortarL2, surface_integral,
+                             dg::DGSEM)
+    @unpack mpi_mortars = cache
+
+    @threaded for mortar in eachmpimortar(dg, cache)
+        local_neighbor_ids = mpi_mortars.local_neighbor_ids[mortar]
+        local_neighbor_positions = mpi_mortars.local_neighbor_positions[mortar]
+
+        for (element, position) in zip(local_neighbor_ids, local_neighbor_positions)
+            if position in (1, 2) # Current element is small
+                # Copy solution small to small
+                if mpi_mortars.large_sides[mortar] == 1 # -> small elements on right side
+                    if mpi_mortars.orientations[mortar] == 1
+                        # L2 mortars in x-direction
+                        if position == 1
+                            for l in eachnode(dg)
+                                for v in eachvariable(equations)
+                                    mpi_mortars.u_lower[2, v, l, mortar] = u[v, 1, l,
+                                                                             element]
+                                end
+                            end
+                        else # position == 2
+                            for l in eachnode(dg)
+                                for v in eachvariable(equations)
+                                    mpi_mortars.u_upper[2, v, l, mortar] = u[v, 1, l,
+                                                                             element]
+                                end
+                            end
+                        end
+                    else
+                        # L2 mortars in y-direction
+                        if position == 1
+                            for l in eachnode(dg)
+                                for v in eachvariable(equations)
+                                    mpi_mortars.u_lower[2, v, l, mortar] = u[v, l, 1,
+                                                                             element]
+                                end
+                            end
+                        else # position == 2
+                            for l in eachnode(dg)
+                                for v in eachvariable(equations)
+                                    mpi_mortars.u_upper[2, v, l, mortar] = u[v, l, 1,
+                                                                             element]
+                                end
+                            end
+                        end
+                    end
+                else # large_sides[mortar] == 2 -> small elements on left side
+                    if mpi_mortars.orientations[mortar] == 1
+                        # L2 mortars in x-direction
+                        if position == 1
+                            for l in eachnode(dg)
+                                for v in eachvariable(equations)
+                                    mpi_mortars.u_lower[1, v, l, mortar] = u[v,
+                                                                             nnodes(dg),
+                                                                             l, element]
+                                end
+                            end
+                        else # position == 2
+                            for l in eachnode(dg)
+                                for v in eachvariable(equations)
+                                    mpi_mortars.u_upper[1, v, l, mortar] = u[v,
+                                                                             nnodes(dg),
+                                                                             l, element]
+                                end
+                            end
+                        end
+                    else
+                        # L2 mortars in y-direction
+                        if position == 1
+                            for l in eachnode(dg)
+                                for v in eachvariable(equations)
+                                    mpi_mortars.u_lower[1, v, l, mortar] = u[v, l,
+                                                                             nnodes(dg),
+                                                                             element]
+                                end
+                            end
+                        else # position == 2
+                            for l in eachnode(dg)
+                                for v in eachvariable(equations)
+                                    mpi_mortars.u_upper[1, v, l, mortar] = u[v, l,
+                                                                             nnodes(dg),
+                                                                             element]
+                                end
+                            end
+                        end
+                    end
                 end
-              end
-            else # position == 2
-              for l in eachnode(dg)
-                for v in eachvariable(equations)
-                  mpi_mortars.u_upper[1, v, l, mortar] = u[v, l, nnodes(dg), element]
+            else # position == 3 -> current element is large
+                # Interpolate large element face data to small interface locations
+                if mpi_mortars.large_sides[mortar] == 1 # -> large element on left side
+                    leftright = 1
+                    if mpi_mortars.orientations[mortar] == 1
+                        # L2 mortars in x-direction
+                        u_large = view(u, :, nnodes(dg), :, element)
+                        element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright,
+                                                      mortar, u_large)
+                    else
+                        # L2 mortars in y-direction
+                        u_large = view(u, :, :, nnodes(dg), element)
+                        element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright,
+                                                      mortar, u_large)
+                    end
+                else # large_sides[mortar] == 2 -> large element on right side
+                    leftright = 2
+                    if mpi_mortars.orientations[mortar] == 1
+                        # L2 mortars in x-direction
+                        u_large = view(u, :, 1, :, element)
+                        element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright,
+                                                      mortar, u_large)
+                    else
+                        # L2 mortars in y-direction
+                        u_large = view(u, :, :, 1, element)
+                        element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright,
+                                                      mortar, u_large)
+                    end
                 end
-              end
             end
-          end
-        end
-      else # position == 3 -> current element is large
-        # Interpolate large element face data to small interface locations
-        if mpi_mortars.large_sides[mortar] == 1 # -> large element on left side
-          leftright = 1
-          if mpi_mortars.orientations[mortar] == 1
-            # L2 mortars in x-direction
-            u_large = view(u, :, nnodes(dg), :, element)
-            element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright, mortar, u_large)
-          else
-            # L2 mortars in y-direction
-            u_large = view(u, :, :, nnodes(dg), element)
-            element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright, mortar, u_large)
-          end
-        else # large_sides[mortar] == 2 -> large element on right side
-          leftright = 2
-          if mpi_mortars.orientations[mortar] == 1
-            # L2 mortars in x-direction
-            u_large = view(u, :, 1, :, element)
-            element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright, mortar, u_large)
-          else
-            # L2 mortars in y-direction
-            u_large = view(u, :, :, 1, element)
-            element_solutions_to_mortars!(mpi_mortars, mortar_l2, leftright, mortar, u_large)
-          end
         end
-      end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_mpi_interface_flux!(surface_flux_values,
                                   mesh::ParallelTreeMesh{2},
                                   nonconservative_terms::False, equations,
                                   surface_integral, dg::DG, cache)
-  @unpack surface_flux = surface_integral
-  @unpack u, local_neighbor_ids, orientations, remote_sides = cache.mpi_interfaces
-
-  @threaded for interface in eachmpiinterface(dg, cache)
-    # Get local neighboring element
-    element = local_neighbor_ids[interface]
-
-    # Determine interface direction with respect to element:
-    if orientations[interface] == 1 # interface in x-direction
-      if remote_sides[interface] == 1 # local element in positive direction
-        direction = 1
-      else # local element in negative direction
-        direction = 2
-      end
-    else # interface in y-direction
-      if remote_sides[interface] == 1 # local element in positive direction
-        direction = 3
-      else # local element in negative direction
-        direction = 4
-      end
-    end
+    @unpack surface_flux = surface_integral
+    @unpack u, local_neighbor_ids, orientations, remote_sides = cache.mpi_interfaces
+
+    @threaded for interface in eachmpiinterface(dg, cache)
+        # Get local neighboring element
+        element = local_neighbor_ids[interface]
+
+        # Determine interface direction with respect to element:
+        if orientations[interface] == 1 # interface in x-direction
+            if remote_sides[interface] == 1 # local element in positive direction
+                direction = 1
+            else # local element in negative direction
+                direction = 2
+            end
+        else # interface in y-direction
+            if remote_sides[interface] == 1 # local element in positive direction
+                direction = 3
+            else # local element in negative direction
+                direction = 4
+            end
+        end
 
-    for i in eachnode(dg)
-      # Call pointwise Riemann solver
-      u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface)
-      flux = surface_flux(u_ll, u_rr, orientations[interface], equations)
+        for i in eachnode(dg)
+            # Call pointwise Riemann solver
+            u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface)
+            flux = surface_flux(u_ll, u_rr, orientations[interface], equations)
 
-      # Copy flux to local element storage
-      for v in eachvariable(equations)
-        surface_flux_values[v, i, direction, element] = flux[v]
-      end
+            # Copy flux to local element storage
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, direction, element] = flux[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_mpi_mortar_flux!(surface_flux_values,
                                mesh::ParallelTreeMesh{2},
                                nonconservative_terms::False, equations,
                                mortar_l2::LobattoLegendreMortarL2,
                                surface_integral, dg::DG, cache)
-  @unpack surface_flux = surface_integral
-  @unpack u_lower, u_upper, orientations = cache.mpi_mortars
-  @unpack fstar_upper_threaded, fstar_lower_threaded = cache
-
-  @threaded for mortar in eachmpimortar(dg, cache)
-    # Choose thread-specific pre-allocated container
-    fstar_upper = fstar_upper_threaded[Threads.threadid()]
-    fstar_lower = fstar_lower_threaded[Threads.threadid()]
-
-    # Calculate fluxes
-    orientation = orientations[mortar]
-    calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar, orientation)
-    calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar, orientation)
-
-    mpi_mortar_fluxes_to_elements!(surface_flux_values,
-                                   mesh, equations, mortar_l2, dg, cache,
-                                   mortar, fstar_upper, fstar_lower)
-  end
-
-  return nothing
+    @unpack surface_flux = surface_integral
+    @unpack u_lower, u_upper, orientations = cache.mpi_mortars
+    @unpack fstar_upper_threaded, fstar_lower_threaded = cache
+
+    @threaded for mortar in eachmpimortar(dg, cache)
+        # Choose thread-specific pre-allocated container
+        fstar_upper = fstar_upper_threaded[Threads.threadid()]
+        fstar_lower = fstar_lower_threaded[Threads.threadid()]
+
+        # Calculate fluxes
+        orientation = orientations[mortar]
+        calc_fstar!(fstar_upper, equations, surface_flux, dg, u_upper, mortar,
+                    orientation)
+        calc_fstar!(fstar_lower, equations, surface_flux, dg, u_lower, mortar,
+                    orientation)
+
+        mpi_mortar_fluxes_to_elements!(surface_flux_values,
+                                       mesh, equations, mortar_l2, dg, cache,
+                                       mortar, fstar_upper, fstar_lower)
+    end
+
+    return nothing
 end
 
 @inline function mpi_mortar_fluxes_to_elements!(surface_flux_values,
@@ -738,64 +793,61 @@ end
                                                 mortar_l2::LobattoLegendreMortarL2,
                                                 dg::DGSEM, cache,
                                                 mortar, fstar_upper, fstar_lower)
-  local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar]
-  local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar]
-
-  for (element, position) in zip(local_neighbor_ids, local_neighbor_positions)
-    if position in (1, 2) # Current element is small
-      # Copy flux small to small
-      if cache.mpi_mortars.large_sides[mortar] == 1 # -> small elements on right side
-        if cache.mpi_mortars.orientations[mortar] == 1
-          # L2 mortars in x-direction
-          direction = 1
-        else
-          # L2 mortars in y-direction
-          direction = 3
-        end
-      else # large_sides[mortar] == 2 -> small elements on left side
-        if cache.mpi_mortars.orientations[mortar] == 1
-          # L2 mortars in x-direction
-          direction = 2
-        else
-          # L2 mortars in y-direction
-          direction = 4
-        end
-      end
-
-      if position == 1
-        surface_flux_values[:, :, direction, element] .= fstar_lower
-      elseif position == 2
-        surface_flux_values[:, :, direction, element] .= fstar_upper
-      end
-    else # position == 3 -> current element is large
-      # Project small fluxes to large element
-      if cache.mpi_mortars.large_sides[mortar] == 1 # -> large element on left side
-        if cache.mpi_mortars.orientations[mortar] == 1
-          # L2 mortars in x-direction
-          direction = 2
-        else
-          # L2 mortars in y-direction
-          direction = 4
-        end
-      else # large_sides[mortar] == 2 -> large element on right side
-        if cache.mpi_mortars.orientations[mortar] == 1
-          # L2 mortars in x-direction
-          direction = 1
-        else
-          # L2 mortars in y-direction
-          direction = 3
-        end
-      end
-
-      multiply_dimensionwise!(
-        view(surface_flux_values, :, :, direction, element), mortar_l2.reverse_upper, fstar_upper,
-                                                             mortar_l2.reverse_lower, fstar_lower)
-    end
-  end
+    local_neighbor_ids = cache.mpi_mortars.local_neighbor_ids[mortar]
+    local_neighbor_positions = cache.mpi_mortars.local_neighbor_positions[mortar]
 
-  return nothing
-end
+    for (element, position) in zip(local_neighbor_ids, local_neighbor_positions)
+        if position in (1, 2) # Current element is small
+            # Copy flux small to small
+            if cache.mpi_mortars.large_sides[mortar] == 1 # -> small elements on right side
+                if cache.mpi_mortars.orientations[mortar] == 1
+                    # L2 mortars in x-direction
+                    direction = 1
+                else
+                    # L2 mortars in y-direction
+                    direction = 3
+                end
+            else # large_sides[mortar] == 2 -> small elements on left side
+                if cache.mpi_mortars.orientations[mortar] == 1
+                    # L2 mortars in x-direction
+                    direction = 2
+                else
+                    # L2 mortars in y-direction
+                    direction = 4
+                end
+            end
 
+            if position == 1
+                surface_flux_values[:, :, direction, element] .= fstar_lower
+            elseif position == 2
+                surface_flux_values[:, :, direction, element] .= fstar_upper
+            end
+        else # position == 3 -> current element is large
+            # Project small fluxes to large element
+            if cache.mpi_mortars.large_sides[mortar] == 1 # -> large element on left side
+                if cache.mpi_mortars.orientations[mortar] == 1
+                    # L2 mortars in x-direction
+                    direction = 2
+                else
+                    # L2 mortars in y-direction
+                    direction = 4
+                end
+            else # large_sides[mortar] == 2 -> large element on right side
+                if cache.mpi_mortars.orientations[mortar] == 1
+                    # L2 mortars in x-direction
+                    direction = 1
+                else
+                    # L2 mortars in y-direction
+                    direction = 3
+                end
+            end
 
+            multiply_dimensionwise!(view(surface_flux_values, :, :, direction, element),
+                                    mortar_l2.reverse_upper, fstar_upper,
+                                    mortar_l2.reverse_lower, fstar_lower)
+        end
+    end
 
-end # @muladd
\ No newline at end of file
+    return nothing
+end
+end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl
index aef86e3de7d..95abb2595e5 100644
--- a/src/solvers/dgsem_tree/dg_3d.jl
+++ b/src/solvers/dgsem_tree/dg_3d.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # everything related to a DG semidiscretization in 3D,
 # currently limited to Lobatto-Legendre nodes
@@ -13,357 +13,403 @@
 # the RHS etc.
 function create_cache(mesh::TreeMesh{3}, equations,
                       dg::DG, RealT, uEltype)
-  # Get cells for which an element needs to be created (i.e. all leaf cells)
-  leaf_cell_ids = local_leaf_cells(mesh.tree)
+    # Get cells for which an element needs to be created (i.e. all leaf cells)
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
 
-  elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)
+    elements = init_elements(leaf_cell_ids, mesh, equations, dg.basis, RealT, uEltype)
 
-  interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
+    interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
 
-  boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
+    boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
 
-  mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
+    mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
 
-  cache = (; elements, interfaces, boundaries, mortars)
+    cache = (; elements, interfaces, boundaries, mortars)
 
-  # Add specialized parts of the cache required to compute the volume integral etc.
-  cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
-  cache = (;cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
+    # Add specialized parts of the cache required to compute the volume integral etc.
+    cache = (; cache...,
+             create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
+    cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
 
-  return cache
+    return cache
 end
 
-
 # The methods below are specialized on the volume integral type
 # and called from the basic `create_cache` method at the top.
 function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}},
-                      equations, volume_integral::VolumeIntegralFluxDifferencing, dg::DG, uEltype)
-  NamedTuple()
+                      equations, volume_integral::VolumeIntegralFluxDifferencing,
+                      dg::DG, uEltype)
+    NamedTuple()
 end
 
-
-function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, equations,
+function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}},
+                      equations,
                       volume_integral::VolumeIntegralShockCapturingHG, dg::DG, uEltype)
-  element_ids_dg   = Int[]
-  element_ids_dgfv = Int[]
-
-  cache = create_cache(mesh, equations,
-                       VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
-                       dg, uEltype)
-
-  A4dp1_x = Array{uEltype, 4}
-  A4dp1_y = Array{uEltype, 4}
-  A4dp1_z = Array{uEltype, 4}
-  fstar1_L_threaded  = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg), nnodes(dg))
-                             for _ in 1:Threads.nthreads()]
-  fstar1_R_threaded  = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg), nnodes(dg))
-                             for _ in 1:Threads.nthreads()]
-  fstar2_L_threaded  = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1, nnodes(dg))
-                             for _ in 1:Threads.nthreads()]
-  fstar2_R_threaded  = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1, nnodes(dg))
-                             for _ in 1:Threads.nthreads()]
-  fstar3_L_threaded  = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg)+1)
-                             for _ in 1:Threads.nthreads()]
-  fstar3_R_threaded  = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg)+1)
-                             for _ in 1:Threads.nthreads()]
-
-  return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded, fstar1_R_threaded,
+    element_ids_dg = Int[]
+    element_ids_dgfv = Int[]
+
+    cache = create_cache(mesh, equations,
+                         VolumeIntegralFluxDifferencing(volume_integral.volume_flux_dg),
+                         dg, uEltype)
+
+    A4dp1_x = Array{uEltype, 4}
+    A4dp1_y = Array{uEltype, 4}
+    A4dp1_z = Array{uEltype, 4}
+    fstar1_L_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg) + 1,
+                                        nnodes(dg), nnodes(dg))
+                                for _ in 1:Threads.nthreads()]
+    fstar1_R_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg) + 1,
+                                        nnodes(dg), nnodes(dg))
+                                for _ in 1:Threads.nthreads()]
+    fstar2_L_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg),
+                                        nnodes(dg) + 1, nnodes(dg))
+                                for _ in 1:Threads.nthreads()]
+    fstar2_R_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg),
+                                        nnodes(dg) + 1, nnodes(dg))
+                                for _ in 1:Threads.nthreads()]
+    fstar3_L_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg),
+                                        nnodes(dg), nnodes(dg) + 1)
+                                for _ in 1:Threads.nthreads()]
+    fstar3_R_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg),
+                                        nnodes(dg), nnodes(dg) + 1)
+                                for _ in 1:Threads.nthreads()]
+
+    return (; cache..., element_ids_dg, element_ids_dgfv, fstar1_L_threaded,
+            fstar1_R_threaded,
             fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded)
 end
 
-
-function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, equations,
-                      volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG, uEltype)
-
-  A4dp1_x = Array{uEltype, 4}
-  A4dp1_y = Array{uEltype, 4}
-  A4dp1_z = Array{uEltype, 4}
-  fstar1_L_threaded  = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg), nnodes(dg))
-                             for _ in 1:Threads.nthreads()]
-  fstar1_R_threaded  = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg)+1, nnodes(dg), nnodes(dg))
-                             for _ in 1:Threads.nthreads()]
-  fstar2_L_threaded  = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1, nnodes(dg))
-                             for _ in 1:Threads.nthreads()]
-  fstar2_R_threaded  = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg), nnodes(dg)+1, nnodes(dg))
-                             for _ in 1:Threads.nthreads()]
-  fstar3_L_threaded  = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg)+1)
-                             for _ in 1:Threads.nthreads()]
-  fstar3_R_threaded  = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg), nnodes(dg), nnodes(dg)+1)
-                             for _ in 1:Threads.nthreads()]
-
-  return (; fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded,
+function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}},
+                      equations,
+                      volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG,
+                      uEltype)
+    A4dp1_x = Array{uEltype, 4}
+    A4dp1_y = Array{uEltype, 4}
+    A4dp1_z = Array{uEltype, 4}
+    fstar1_L_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg) + 1,
+                                        nnodes(dg), nnodes(dg))
+                                for _ in 1:Threads.nthreads()]
+    fstar1_R_threaded = A4dp1_x[A4dp1_x(undef, nvariables(equations), nnodes(dg) + 1,
+                                        nnodes(dg), nnodes(dg))
+                                for _ in 1:Threads.nthreads()]
+    fstar2_L_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg),
+                                        nnodes(dg) + 1, nnodes(dg))
+                                for _ in 1:Threads.nthreads()]
+    fstar2_R_threaded = A4dp1_y[A4dp1_y(undef, nvariables(equations), nnodes(dg),
+                                        nnodes(dg) + 1, nnodes(dg))
+                                for _ in 1:Threads.nthreads()]
+    fstar3_L_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg),
+                                        nnodes(dg), nnodes(dg) + 1)
+                                for _ in 1:Threads.nthreads()]
+    fstar3_R_threaded = A4dp1_z[A4dp1_z(undef, nvariables(equations), nnodes(dg),
+                                        nnodes(dg), nnodes(dg) + 1)
+                                for _ in 1:Threads.nthreads()]
+
+    return (; fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded,
+            fstar2_R_threaded,
             fstar3_L_threaded, fstar3_R_threaded)
 end
 
-
 # The methods below are specialized on the mortar type
 # and called from the basic `create_cache` method at the top.
-function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}}, equations, mortar_l2::LobattoLegendreMortarL2, uEltype)
-  # TODO: Taal compare performance of different types
-  A3d = Array{uEltype, 3}
-  fstar_upper_left_threaded  = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2))
-                                   for _ in 1:Threads.nthreads()]
-  fstar_upper_right_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2))
-                                   for _ in 1:Threads.nthreads()]
-  fstar_lower_left_threaded  = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2))
-                                   for _ in 1:Threads.nthreads()]
-  fstar_lower_right_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2))
-                                   for _ in 1:Threads.nthreads()]
-  fstar_tmp1_threaded        = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2), nnodes(mortar_l2))
-                                   for _ in 1:Threads.nthreads()]
-
-  (; fstar_upper_left_threaded, fstar_upper_right_threaded,
+function create_cache(mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}},
+                      equations, mortar_l2::LobattoLegendreMortarL2, uEltype)
+    # TODO: Taal compare performance of different types
+    A3d = Array{uEltype, 3}
+    fstar_upper_left_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2),
+                                        nnodes(mortar_l2))
+                                    for _ in 1:Threads.nthreads()]
+    fstar_upper_right_threaded = A3d[A3d(undef, nvariables(equations),
+                                         nnodes(mortar_l2), nnodes(mortar_l2))
+                                     for _ in 1:Threads.nthreads()]
+    fstar_lower_left_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2),
+                                        nnodes(mortar_l2))
+                                    for _ in 1:Threads.nthreads()]
+    fstar_lower_right_threaded = A3d[A3d(undef, nvariables(equations),
+                                         nnodes(mortar_l2), nnodes(mortar_l2))
+                                     for _ in 1:Threads.nthreads()]
+    fstar_tmp1_threaded = A3d[A3d(undef, nvariables(equations), nnodes(mortar_l2),
+                                  nnodes(mortar_l2))
+                              for _ in 1:Threads.nthreads()]
+
+    (; fstar_upper_left_threaded, fstar_upper_right_threaded,
      fstar_lower_left_threaded, fstar_lower_right_threaded,
      fstar_tmp1_threaded)
 end
 
-
 # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer?
 
 function rhs!(du, u, t,
               mesh::Union{TreeMesh{3}, P4estMesh{3}}, equations,
               initial_condition, boundary_conditions, source_terms::Source,
               dg::DG, cache) where {Source}
-  # Reset du
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
-
-  # Prolong solution to interfaces
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.surface_integral, dg, cache)
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg)
-
-  # Prolong solution to mortars
-  @trixi_timeit timer() "prolong2mortars" prolong2mortars!(
-    cache, u, mesh, equations, dg.mortar, dg.surface_integral, dg)
-
-  # Calculate mortar fluxes
-  @trixi_timeit timer() "mortar flux" calc_mortar_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.mortar, dg.surface_integral, dg, cache)
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations, dg.surface_integral, dg, cache)
-
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian!(
-    du, mesh, equations, dg, cache)
-
-  # Calculate source terms
-  @trixi_timeit timer() "source terms" calc_sources!(
-    du, u, t, source_terms, equations, dg, cache)
-
-  return nothing
-end
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, u, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.volume_integral, dg, cache)
+    end
 
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache, u, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache.elements.surface_flux_values, mesh,
+                             have_nonconservative_terms(equations), equations,
+                             dg.surface_integral, dg, cache)
+    end
+
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache, u, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Prolong solution to mortars
+    @trixi_timeit timer() "prolong2mortars" begin
+        prolong2mortars!(cache, u, mesh, equations,
+                         dg.mortar, dg.surface_integral, dg)
+    end
+
+    # Calculate mortar fluxes
+    @trixi_timeit timer() "mortar flux" begin
+        calc_mortar_flux!(cache.elements.surface_flux_values, mesh,
+                          have_nonconservative_terms(equations), equations,
+                          dg.mortar, dg.surface_integral, dg, cache)
+    end
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations,
+                               dg.surface_integral, dg, cache)
+    end
+
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache)
+
+    # Calculate source terms
+    @trixi_timeit timer() "source terms" begin
+        calc_sources!(du, u, t, source_terms, equations, dg, cache)
+    end
+
+    return nothing
+end
 
 function calc_volume_integral!(du, u,
-                               mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}},
+                               mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}
+                                           },
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralWeakForm,
                                dg::DGSEM, cache)
+    @threaded for element in eachelement(dg, cache)
+        weak_form_kernel!(du, u, element, mesh,
+                          nonconservative_terms, equations,
+                          dg, cache)
+    end
 
-  @threaded for element in eachelement(dg, cache)
-    weak_form_kernel!(du, u, element, mesh,
-                      nonconservative_terms, equations,
-                      dg, cache)
-  end
-
-  return nothing
+    return nothing
 end
 
 @inline function weak_form_kernel!(du, u,
                                    element, mesh::TreeMesh{3},
                                    nonconservative_terms::False, equations,
-                                   dg::DGSEM, cache, alpha=true)
-  # true * [some floating point value] == [exactly the same floating point value]
-  # This can (hopefully) be optimized away due to constant propagation.
-  @unpack derivative_dhat = dg.basis
+                                   dg::DGSEM, cache, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    @unpack derivative_dhat = dg.basis
 
-  for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, j, k, element)
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
 
-    flux1 = flux(u_node, 1, equations)
-    for ii in eachnode(dg)
-      multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1, equations, dg, ii, j, k, element)
-    end
+        flux1 = flux(u_node, 1, equations)
+        for ii in eachnode(dg)
+            multiply_add_to_node_vars!(du, alpha * derivative_dhat[ii, i], flux1,
+                                       equations, dg, ii, j, k, element)
+        end
 
-    flux2 = flux(u_node, 2, equations)
-    for jj in eachnode(dg)
-      multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], flux2, equations, dg, i, jj, k, element)
-    end
+        flux2 = flux(u_node, 2, equations)
+        for jj in eachnode(dg)
+            multiply_add_to_node_vars!(du, alpha * derivative_dhat[jj, j], flux2,
+                                       equations, dg, i, jj, k, element)
+        end
 
-    flux3 = flux(u_node, 3, equations)
-    for kk in eachnode(dg)
-      multiply_add_to_node_vars!(du, alpha * derivative_dhat[kk, k], flux3, equations, dg, i, j, kk, element)
+        flux3 = flux(u_node, 3, equations)
+        for kk in eachnode(dg)
+            multiply_add_to_node_vars!(du, alpha * derivative_dhat[kk, k], flux3,
+                                       equations, dg, i, j, kk, element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_volume_integral!(du, u,
-                               mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}},
+                               mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}
+                                           },
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralFluxDifferencing,
                                dg::DGSEM, cache)
-  @threaded for element in eachelement(dg, cache)
-    flux_differencing_kernel!(du, u, element, mesh,
-                              nonconservative_terms, equations,
-                              volume_integral.volume_flux, dg, cache)
-  end
+    @threaded for element in eachelement(dg, cache)
+        flux_differencing_kernel!(du, u, element, mesh,
+                                  nonconservative_terms, equations,
+                                  volume_integral.volume_flux, dg, cache)
+    end
 end
 
 @inline function flux_differencing_kernel!(du, u,
                                            element, mesh::TreeMesh{3},
                                            nonconservative_terms::False, equations,
-                                           volume_flux, dg::DGSEM, cache, alpha=true)
-  # true * [some floating point value] == [exactly the same floating point value]
-  # This can (hopefully) be optimized away due to constant propagation.
-  @unpack derivative_split = dg.basis
-
-  # Calculate volume integral in one element
-  for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, j, k, element)
-
-    # All diagonal entries of `derivative_split` are zero. Thus, we can skip
-    # the computation of the diagonal terms. In addition, we use the symmetry
-    # of the `volume_flux` to save half of the possible two-point flux
-    # computations.
-
-    # x direction
-    for ii in (i+1):nnodes(dg)
-      u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
-      flux1 = volume_flux(u_node, u_node_ii, 1, equations)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], flux1, equations, dg, i,  j, k, element)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], flux1, equations, dg, ii, j, k, element)
-    end
+                                           volume_flux, dg::DGSEM, cache, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    @unpack derivative_split = dg.basis
 
-    # y direction
-    for jj in (j+1):nnodes(dg)
-      u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
-      flux2 = volume_flux(u_node, u_node_jj, 2, equations)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], flux2, equations, dg, i, j,  k, element)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], flux2, equations, dg, i, jj, k, element)
-    end
+    # Calculate volume integral in one element
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+        # the computation of the diagonal terms. In addition, we use the symmetry
+        # of the `volume_flux` to save half of the possible two-point flux
+        # computations.
+
+        # x direction
+        for ii in (i + 1):nnodes(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
+            flux1 = volume_flux(u_node, u_node_ii, 1, equations)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[i, ii], flux1,
+                                       equations, dg, i, j, k, element)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[ii, i], flux1,
+                                       equations, dg, ii, j, k, element)
+        end
+
+        # y direction
+        for jj in (j + 1):nnodes(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
+            flux2 = volume_flux(u_node, u_node_jj, 2, equations)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[j, jj], flux2,
+                                       equations, dg, i, j, k, element)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[jj, j], flux2,
+                                       equations, dg, i, jj, k, element)
+        end
 
-    # z direction
-    for kk in (k+1):nnodes(dg)
-      u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)
-      flux3 = volume_flux(u_node, u_node_kk, 3, equations)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[k, kk], flux3, equations, dg, i, j, k,  element)
-      multiply_add_to_node_vars!(du, alpha * derivative_split[kk, k], flux3, equations, dg, i, j, kk, element)
+        # z direction
+        for kk in (k + 1):nnodes(dg)
+            u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)
+            flux3 = volume_flux(u_node, u_node_kk, 3, equations)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[k, kk], flux3,
+                                       equations, dg, i, j, k, element)
+            multiply_add_to_node_vars!(du, alpha * derivative_split[kk, k], flux3,
+                                       equations, dg, i, j, kk, element)
+        end
     end
-  end
 end
 
 @inline function flux_differencing_kernel!(du, u,
                                            element, mesh::TreeMesh{3},
                                            nonconservative_terms::True, equations,
-                                           volume_flux, dg::DGSEM, cache, alpha=true)
-  # true * [some floating point value] == [exactly the same floating point value]
-  # This can (hopefully) be optimized away due to constant propagation.
-  @unpack derivative_split = dg.basis
-  symmetric_flux, nonconservative_flux = volume_flux
-
-  # Apply the symmetric flux as usual
-  flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux, dg, cache, alpha)
-
-  # Calculate the remaining volume terms using the nonsymmetric generalized flux
-  for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    u_node = get_node_vars(u, equations, dg, i, j, k, element)
-
-    # The diagonal terms are zero since the diagonal of `derivative_split`
-    # is zero. We ignore this for now.
-
-    # x direction
-    integral_contribution = zero(u_node)
-    for ii in eachnode(dg)
-      u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
-      noncons_flux1 = nonconservative_flux(u_node, u_node_ii, 1, equations)
-      integral_contribution = integral_contribution + derivative_split[i, ii] * noncons_flux1
-    end
+                                           volume_flux, dg::DGSEM, cache, alpha = true)
+    # true * [some floating point value] == [exactly the same floating point value]
+    # This can (hopefully) be optimized away due to constant propagation.
+    @unpack derivative_split = dg.basis
+    symmetric_flux, nonconservative_flux = volume_flux
 
-    # y direction
-    for jj in eachnode(dg)
-      u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
-      noncons_flux2 = nonconservative_flux(u_node, u_node_jj, 2, equations)
-      integral_contribution = integral_contribution + derivative_split[j, jj] * noncons_flux2
-    end
+    # Apply the symmetric flux as usual
+    flux_differencing_kernel!(du, u, element, mesh, False(), equations, symmetric_flux,
+                              dg, cache, alpha)
 
-    # z direction
-    for kk in eachnode(dg)
-      u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)
-      noncons_flux3 = nonconservative_flux(u_node, u_node_kk, 3, equations)
-      integral_contribution = integral_contribution + derivative_split[k, kk] * noncons_flux3
-    end
+    # Calculate the remaining volume terms using the nonsymmetric generalized flux
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # The diagonal terms are zero since the diagonal of `derivative_split`
+        # is zero. We ignore this for now.
+
+        # x direction
+        integral_contribution = zero(u_node)
+        for ii in eachnode(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, k, element)
+            noncons_flux1 = nonconservative_flux(u_node, u_node_ii, 1, equations)
+            integral_contribution = integral_contribution +
+                                    derivative_split[i, ii] * noncons_flux1
+        end
 
-    # The factor 0.5 cancels the factor 2 in the flux differencing form
-    multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations, dg, i, j, k, element)
-  end
-end
+        # y direction
+        for jj in eachnode(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, k, element)
+            noncons_flux2 = nonconservative_flux(u_node, u_node_jj, 2, equations)
+            integral_contribution = integral_contribution +
+                                    derivative_split[j, jj] * noncons_flux2
+        end
 
+        # z direction
+        for kk in eachnode(dg)
+            u_node_kk = get_node_vars(u, equations, dg, i, j, kk, element)
+            noncons_flux3 = nonconservative_flux(u_node, u_node_kk, 3, equations)
+            integral_contribution = integral_contribution +
+                                    derivative_split[k, kk] * noncons_flux3
+        end
+
+        # The factor 0.5 cancels the factor 2 in the flux differencing form
+        multiply_add_to_node_vars!(du, alpha * 0.5, integral_contribution, equations,
+                                   dg, i, j, k, element)
+    end
+end
 
 # TODO: Taal dimension agnostic
 function calc_volume_integral!(du, u,
-                               mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}},
+                               mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}
+                                           },
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralShockCapturingHG,
                                dg::DGSEM, cache)
-  @unpack element_ids_dg, element_ids_dgfv = cache
-  @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral
-
-  # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α
-  alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg, cache)
-
-  # Determine element ids for DG-only and blended DG-FV volume integral
-  pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache)
-
-  # Loop over pure DG elements
-  @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg)
-    element = element_ids_dg[idx_element]
-    flux_differencing_kernel!(du, u, element, mesh,
-                              nonconservative_terms, equations,
-                              volume_flux_dg, dg, cache)
-  end
+    @unpack element_ids_dg, element_ids_dgfv = cache
+    @unpack volume_flux_dg, volume_flux_fv, indicator = volume_integral
+
+    # Calculate blending factors α: u = u_DG * (1 - α) + u_FV * α
+    alpha = @trixi_timeit timer() "blending factors" indicator(u, mesh, equations, dg,
+                                                               cache)
+
+    # Determine element ids for DG-only and blended DG-FV volume integral
+    pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha, dg, cache)
+
+    # Loop over pure DG elements
+    @trixi_timeit timer() "pure DG" @threaded for idx_element in eachindex(element_ids_dg)
+        element = element_ids_dg[idx_element]
+        flux_differencing_kernel!(du, u, element, mesh,
+                                  nonconservative_terms, equations,
+                                  volume_flux_dg, dg, cache)
+    end
 
-  # Loop over blended DG-FV elements
-  @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv)
-    element = element_ids_dgfv[idx_element]
-    alpha_element = alpha[element]
+    # Loop over blended DG-FV elements
+    @trixi_timeit timer() "blended DG-FV" @threaded for idx_element in eachindex(element_ids_dgfv)
+        element = element_ids_dgfv[idx_element]
+        alpha_element = alpha[element]
 
-    # Calculate DG volume integral contribution
-    flux_differencing_kernel!(du, u, element, mesh,
-                              nonconservative_terms, equations,
-                              volume_flux_dg, dg, cache, 1 - alpha_element)
+        # Calculate DG volume integral contribution
+        flux_differencing_kernel!(du, u, element, mesh,
+                                  nonconservative_terms, equations,
+                                  volume_flux_dg, dg, cache, 1 - alpha_element)
 
-    # Calculate FV volume integral contribution
-    fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv,
-               dg, cache, element, alpha_element)
-  end
+        # Calculate FV volume integral contribution
+        fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv,
+                   dg, cache, element, alpha_element)
+    end
 
-  return nothing
+    return nothing
 end
 
 # TODO: Taal dimension agnostic
@@ -372,575 +418,650 @@ function calc_volume_integral!(du, u,
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralPureLGLFiniteVolume,
                                dg::DGSEM, cache)
-  @unpack volume_flux_fv = volume_integral
+    @unpack volume_flux_fv = volume_integral
 
-  # Calculate LGL FV volume integral
-  @threaded for element in eachelement(dg, cache)
-    fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv,
-               dg, cache, element, true)
-  end
+    # Calculate LGL FV volume integral
+    @threaded for element in eachelement(dg, cache)
+        fv_kernel!(du, u, mesh, nonconservative_terms, equations, volume_flux_fv,
+                   dg, cache, element, true)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 @inline function fv_kernel!(du, u,
                             mesh::Union{TreeMesh{3}, StructuredMesh{3}, P4estMesh{3}},
                             nonconservative_terms, equations,
-                            volume_flux_fv, dg::DGSEM, cache, element, alpha=true)
-  @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded = cache
-  @unpack inverse_weights = dg.basis
-
-  # Calculate FV two-point fluxes
-  fstar1_L = fstar1_L_threaded[Threads.threadid()]
-  fstar2_L = fstar2_L_threaded[Threads.threadid()]
-  fstar3_L = fstar3_L_threaded[Threads.threadid()]
-  fstar1_R = fstar1_R_threaded[Threads.threadid()]
-  fstar2_R = fstar2_R_threaded[Threads.threadid()]
-  fstar3_R = fstar3_R_threaded[Threads.threadid()]
-
-  calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u,
-               mesh, nonconservative_terms, equations, volume_flux_fv, dg, element, cache)
-
-  # Calculate FV volume integral contribution
-  for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    for v in eachvariable(equations)
-      du[v, i, j, k, element] += ( alpha *
-                                   (inverse_weights[i] * (fstar1_L[v, i+1, j, k] - fstar1_R[v, i, j, k]) +
-                                    inverse_weights[j] * (fstar2_L[v, i, j+1, k] - fstar2_R[v, i, j, k]) +
-                                    inverse_weights[k] * (fstar3_L[v, i, j, k+1] - fstar3_R[v, i, j, k])) )
+                            volume_flux_fv, dg::DGSEM, cache, element, alpha = true)
+    @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded, fstar3_L_threaded, fstar3_R_threaded = cache
+    @unpack inverse_weights = dg.basis
+
+    # Calculate FV two-point fluxes
+    fstar1_L = fstar1_L_threaded[Threads.threadid()]
+    fstar2_L = fstar2_L_threaded[Threads.threadid()]
+    fstar3_L = fstar3_L_threaded[Threads.threadid()]
+    fstar1_R = fstar1_R_threaded[Threads.threadid()]
+    fstar2_R = fstar2_R_threaded[Threads.threadid()]
+    fstar3_R = fstar3_R_threaded[Threads.threadid()]
+
+    calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u,
+                 mesh, nonconservative_terms, equations, volume_flux_fv, dg, element,
+                 cache)
 
+    # Calculate FV volume integral contribution
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        for v in eachvariable(equations)
+            du[v, i, j, k, element] += (alpha *
+                                        (inverse_weights[i] *
+                                         (fstar1_L[v, i + 1, j, k] -
+                                          fstar1_R[v, i, j, k]) +
+                                         inverse_weights[j] *
+                                         (fstar2_L[v, i, j + 1, k] -
+                                          fstar2_R[v, i, j, k]) +
+                                         inverse_weights[k] *
+                                         (fstar3_L[v, i, j, k + 1] -
+                                          fstar3_R[v, i, j, k])))
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Calculate the finite volume fluxes inside the elements (**without non-conservative terms**).
-@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u,
-                              mesh::TreeMesh{3}, nonconservative_terms::False, equations,
+@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L,
+                              fstar3_R, u,
+                              mesh::TreeMesh{3}, nonconservative_terms::False,
+                              equations,
                               volume_flux_fv, dg::DGSEM, element, cache)
+    fstar1_L[:, 1, :, :] .= zero(eltype(fstar1_L))
+    fstar1_L[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_L))
+    fstar1_R[:, 1, :, :] .= zero(eltype(fstar1_R))
+    fstar1_R[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_R))
+
+    for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)
+        u_ll = get_node_vars(u, equations, dg, i - 1, j, k, element)
+        u_rr = get_node_vars(u, equations, dg, i, j, k, element)
+        flux = volume_flux_fv(u_ll, u_rr, 1, equations) # orientation 1: x direction
+        set_node_vars!(fstar1_L, flux, equations, dg, i, j, k)
+        set_node_vars!(fstar1_R, flux, equations, dg, i, j, k)
+    end
 
-  fstar1_L[:, 1,            :, :] .= zero(eltype(fstar1_L))
-  fstar1_L[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_L))
-  fstar1_R[:, 1,            :, :] .= zero(eltype(fstar1_R))
-  fstar1_R[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_R))
-
-  for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)
-    u_ll = get_node_vars(u, equations, dg, i-1, j, k, element)
-    u_rr = get_node_vars(u, equations, dg, i,   j, k, element)
-    flux = volume_flux_fv(u_ll, u_rr, 1, equations) # orientation 1: x direction
-    set_node_vars!(fstar1_L, flux, equations, dg, i, j, k)
-    set_node_vars!(fstar1_R, flux, equations, dg, i, j, k)
-  end
-
-  fstar2_L[:, :, 1           , :] .= zero(eltype(fstar2_L))
-  fstar2_L[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_L))
-  fstar2_R[:, :, 1           , :] .= zero(eltype(fstar2_R))
-  fstar2_R[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_R))
-
-  for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg)
-    u_ll = get_node_vars(u, equations, dg, i, j-1, k, element)
-    u_rr = get_node_vars(u, equations, dg, i, j,   k, element)
-    flux = volume_flux_fv(u_ll, u_rr, 2, equations) # orientation 2: y direction
-    set_node_vars!(fstar2_L, flux, equations, dg, i, j, k)
-    set_node_vars!(fstar2_R, flux, equations, dg, i, j, k)
-  end
-
-  fstar3_L[:, :, :, 1           ] .= zero(eltype(fstar3_L))
-  fstar3_L[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_L))
-  fstar3_R[:, :, :, 1           ] .= zero(eltype(fstar3_R))
-  fstar3_R[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_R))
-
-  for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg)
-    u_ll = get_node_vars(u, equations, dg, i, j, k-1, element)
-    u_rr = get_node_vars(u, equations, dg, i, j, k,   element)
-    flux = volume_flux_fv(u_ll, u_rr, 3, equations) # orientation 3: z direction
-    set_node_vars!(fstar3_L, flux, equations, dg, i, j, k)
-    set_node_vars!(fstar3_R, flux, equations, dg, i, j, k)
-  end
-
-  return nothing
-end
+    fstar2_L[:, :, 1, :] .= zero(eltype(fstar2_L))
+    fstar2_L[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_L))
+    fstar2_R[:, :, 1, :] .= zero(eltype(fstar2_R))
+    fstar2_R[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_R))
+
+    for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg)
+        u_ll = get_node_vars(u, equations, dg, i, j - 1, k, element)
+        u_rr = get_node_vars(u, equations, dg, i, j, k, element)
+        flux = volume_flux_fv(u_ll, u_rr, 2, equations) # orientation 2: y direction
+        set_node_vars!(fstar2_L, flux, equations, dg, i, j, k)
+        set_node_vars!(fstar2_R, flux, equations, dg, i, j, k)
+    end
 
+    fstar3_L[:, :, :, 1] .= zero(eltype(fstar3_L))
+    fstar3_L[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_L))
+    fstar3_R[:, :, :, 1] .= zero(eltype(fstar3_R))
+    fstar3_R[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_R))
+
+    for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg)
+        u_ll = get_node_vars(u, equations, dg, i, j, k - 1, element)
+        u_rr = get_node_vars(u, equations, dg, i, j, k, element)
+        flux = volume_flux_fv(u_ll, u_rr, 3, equations) # orientation 3: z direction
+        set_node_vars!(fstar3_L, flux, equations, dg, i, j, k)
+        set_node_vars!(fstar3_R, flux, equations, dg, i, j, k)
+    end
+
+    return nothing
+end
 
 # Calculate the finite volume fluxes inside the elements (**without non-conservative terms**).
-@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L, fstar3_R, u,
+@inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, fstar3_L,
+                              fstar3_R, u,
                               mesh::TreeMesh{3}, nonconservative_terms::True, equations,
                               volume_flux_fv, dg::DGSEM, element, cache)
-  volume_flux, nonconservative_flux = volume_flux_fv
-
-  fstar1_L[:, 1,            :, :] .= zero(eltype(fstar1_L))
-  fstar1_L[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_L))
-  fstar1_R[:, 1,            :, :] .= zero(eltype(fstar1_R))
-  fstar1_R[:, nnodes(dg)+1, :, :] .= zero(eltype(fstar1_R))
-
-  for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)
-    u_ll = get_node_vars(u, equations, dg, i-1, j, k, element)
-    u_rr = get_node_vars(u, equations, dg, i,   j, k, element)
-
-    # Compute conservative part
-    flux = volume_flux(u_ll, u_rr, 1, equations) # orientation 1: x direction
-
-    # Compute nonconservative part
-    # Note the factor 0.5 necessary for the nonconservative fluxes based on
-    # the interpretation of global SBP operators coupled discontinuously via
-    # central fluxes/SATs
-    flux_L = flux + 0.5 * nonconservative_flux(u_ll, u_rr, 1, equations)
-    flux_R = flux + 0.5 * nonconservative_flux(u_rr, u_ll, 1, equations)
-
-    set_node_vars!(fstar1_L, flux_L, equations, dg, i, j, k)
-    set_node_vars!(fstar1_R, flux_R, equations, dg, i, j, k)
-  end
-
-  fstar2_L[:, :, 1           , :] .= zero(eltype(fstar2_L))
-  fstar2_L[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_L))
-  fstar2_R[:, :, 1           , :] .= zero(eltype(fstar2_R))
-  fstar2_R[:, :, nnodes(dg)+1, :] .= zero(eltype(fstar2_R))
-
-  for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg)
-    u_ll = get_node_vars(u, equations, dg, i, j-1, k, element)
-    u_rr = get_node_vars(u, equations, dg, i, j,   k, element)
-
-    # Compute conservative part
-    flux = volume_flux(u_ll, u_rr, 2, equations) # orientation 2: y direction
-
-    # Compute nonconservative part
-    # Note the factor 0.5 necessary for the nonconservative fluxes based on
-    # the interpretation of global SBP operators coupled discontinuously via
-    # central fluxes/SATs
-    flux_L = flux + 0.5 * nonconservative_flux(u_ll, u_rr, 2, equations)
-    flux_R = flux + 0.5 * nonconservative_flux(u_rr, u_ll, 2, equations)
-
-    set_node_vars!(fstar2_L, flux_L, equations, dg, i, j, k)
-    set_node_vars!(fstar2_R, flux_R, equations, dg, i, j, k)
-  end
-
-  fstar3_L[:, :, :, 1           ] .= zero(eltype(fstar3_L))
-  fstar3_L[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_L))
-  fstar3_R[:, :, :, 1           ] .= zero(eltype(fstar3_R))
-  fstar3_R[:, :, :, nnodes(dg)+1] .= zero(eltype(fstar3_R))
-
-  for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg)
-    u_ll = get_node_vars(u, equations, dg, i, j, k-1, element)
-    u_rr = get_node_vars(u, equations, dg, i, j, k,   element)
-
-    # Compute conservative part
-    flux = volume_flux(u_ll, u_rr, 3, equations) # orientation 3: z direction
-
-    # Compute nonconservative part
-    # Note the factor 0.5 necessary for the nonconservative fluxes based on
-    # the interpretation of global SBP operators coupled discontinuously via
-    # central fluxes/SATs
-    flux_L = flux + 0.5 * nonconservative_flux(u_ll, u_rr, 3, equations)
-    flux_R = flux + 0.5 * nonconservative_flux(u_rr, u_ll, 3, equations)
-
-    set_node_vars!(fstar3_L, flux_L, equations, dg, i, j, k)
-    set_node_vars!(fstar3_R, flux_R, equations, dg, i, j, k)
-  end
-
-  return nothing
-end
+    volume_flux, nonconservative_flux = volume_flux_fv
+
+    fstar1_L[:, 1, :, :] .= zero(eltype(fstar1_L))
+    fstar1_L[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_L))
+    fstar1_R[:, 1, :, :] .= zero(eltype(fstar1_R))
+    fstar1_R[:, nnodes(dg) + 1, :, :] .= zero(eltype(fstar1_R))
+
+    for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)
+        u_ll = get_node_vars(u, equations, dg, i - 1, j, k, element)
+        u_rr = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # Compute conservative part
+        flux = volume_flux(u_ll, u_rr, 1, equations) # orientation 1: x direction
+
+        # Compute nonconservative part
+        # Note the factor 0.5 necessary for the nonconservative fluxes based on
+        # the interpretation of global SBP operators coupled discontinuously via
+        # central fluxes/SATs
+        flux_L = flux + 0.5 * nonconservative_flux(u_ll, u_rr, 1, equations)
+        flux_R = flux + 0.5 * nonconservative_flux(u_rr, u_ll, 1, equations)
+
+        set_node_vars!(fstar1_L, flux_L, equations, dg, i, j, k)
+        set_node_vars!(fstar1_R, flux_R, equations, dg, i, j, k)
+    end
+
+    fstar2_L[:, :, 1, :] .= zero(eltype(fstar2_L))
+    fstar2_L[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_L))
+    fstar2_R[:, :, 1, :] .= zero(eltype(fstar2_R))
+    fstar2_R[:, :, nnodes(dg) + 1, :] .= zero(eltype(fstar2_R))
+
+    for k in eachnode(dg), j in 2:nnodes(dg), i in eachnode(dg)
+        u_ll = get_node_vars(u, equations, dg, i, j - 1, k, element)
+        u_rr = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # Compute conservative part
+        flux = volume_flux(u_ll, u_rr, 2, equations) # orientation 2: y direction
+
+        # Compute nonconservative part
+        # Note the factor 0.5 necessary for the nonconservative fluxes based on
+        # the interpretation of global SBP operators coupled discontinuously via
+        # central fluxes/SATs
+        flux_L = flux + 0.5 * nonconservative_flux(u_ll, u_rr, 2, equations)
+        flux_R = flux + 0.5 * nonconservative_flux(u_rr, u_ll, 2, equations)
+
+        set_node_vars!(fstar2_L, flux_L, equations, dg, i, j, k)
+        set_node_vars!(fstar2_R, flux_R, equations, dg, i, j, k)
+    end
+
+    fstar3_L[:, :, :, 1] .= zero(eltype(fstar3_L))
+    fstar3_L[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_L))
+    fstar3_R[:, :, :, 1] .= zero(eltype(fstar3_R))
+    fstar3_R[:, :, :, nnodes(dg) + 1] .= zero(eltype(fstar3_R))
 
+    for k in 2:nnodes(dg), j in eachnode(dg), i in eachnode(dg)
+        u_ll = get_node_vars(u, equations, dg, i, j, k - 1, element)
+        u_rr = get_node_vars(u, equations, dg, i, j, k, element)
+
+        # Compute conservative part
+        flux = volume_flux(u_ll, u_rr, 3, equations) # orientation 3: z direction
+
+        # Compute nonconservative part
+        # Note the factor 0.5 necessary for the nonconservative fluxes based on
+        # the interpretation of global SBP operators coupled discontinuously via
+        # central fluxes/SATs
+        flux_L = flux + 0.5 * nonconservative_flux(u_ll, u_rr, 3, equations)
+        flux_R = flux + 0.5 * nonconservative_flux(u_rr, u_ll, 3, equations)
+
+        set_node_vars!(fstar3_L, flux_L, equations, dg, i, j, k)
+        set_node_vars!(fstar3_R, flux_R, equations, dg, i, j, k)
+    end
+
+    return nothing
+end
 
 # We pass the `surface_integral` argument solely for dispatch
 function prolong2interfaces!(cache, u,
                              mesh::TreeMesh{3}, equations, surface_integral, dg::DG)
-  @unpack interfaces = cache
-  @unpack orientations = interfaces
-
-  @threaded for interface in eachinterface(dg, cache)
-    left_element  = interfaces.neighbor_ids[1, interface]
-    right_element = interfaces.neighbor_ids[2, interface]
-
-    if orientations[interface] == 1
-      # interface in x-direction
-      for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[1, v, j, k, interface] = u[v, nnodes(dg), j, k, left_element]
-        interfaces.u[2, v, j, k, interface] = u[v,          1, j, k, right_element]
-      end
-    elseif orientations[interface] == 2
-      # interface in y-direction
-      for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[1, v, i, k, interface] = u[v, i, nnodes(dg), k, left_element]
-        interfaces.u[2, v, i, k, interface] = u[v, i,          1, k, right_element]
-      end
-    else # if orientations[interface] == 3
-      # interface in z-direction
-      for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[1, v, i, j, interface] = u[v, i, j, nnodes(dg), left_element]
-        interfaces.u[2, v, i, j, interface] = u[v, i, j,          1, right_element]
-      end
+    @unpack interfaces = cache
+    @unpack orientations = interfaces
+
+    @threaded for interface in eachinterface(dg, cache)
+        left_element = interfaces.neighbor_ids[1, interface]
+        right_element = interfaces.neighbor_ids[2, interface]
+
+        if orientations[interface] == 1
+            # interface in x-direction
+            for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[1, v, j, k, interface] = u[v, nnodes(dg), j, k,
+                                                        left_element]
+                interfaces.u[2, v, j, k, interface] = u[v, 1, j, k, right_element]
+            end
+        elseif orientations[interface] == 2
+            # interface in y-direction
+            for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[1, v, i, k, interface] = u[v, i, nnodes(dg), k,
+                                                        left_element]
+                interfaces.u[2, v, i, k, interface] = u[v, i, 1, k, right_element]
+            end
+        else # if orientations[interface] == 3
+            # interface in z-direction
+            for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[1, v, i, j, interface] = u[v, i, j, nnodes(dg),
+                                                        left_element]
+                interfaces.u[2, v, i, j, interface] = u[v, i, j, 1, right_element]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 function calc_interface_flux!(surface_flux_values,
                               mesh::TreeMesh{3},
                               nonconservative_terms::False, equations,
                               surface_integral, dg::DG, cache)
-  @unpack surface_flux = surface_integral
-  @unpack u, neighbor_ids, orientations = cache.interfaces
+    @unpack surface_flux = surface_integral
+    @unpack u, neighbor_ids, orientations = cache.interfaces
 
-  @threaded for interface in eachinterface(dg, cache)
-    # Get neighboring elements
-    left_id  = neighbor_ids[1, interface]
-    right_id = neighbor_ids[2, interface]
+    @threaded for interface in eachinterface(dg, cache)
+        # Get neighboring elements
+        left_id = neighbor_ids[1, interface]
+        right_id = neighbor_ids[2, interface]
 
-    # Determine interface direction with respect to elements:
-    # orientation = 1: left -> 2, right -> 1
-    # orientation = 2: left -> 4, right -> 3
-    # orientation = 3: left -> 6, right -> 5
-    left_direction  = 2 * orientations[interface]
-    right_direction = 2 * orientations[interface] - 1
+        # Determine interface direction with respect to elements:
+        # orientation = 1: left -> 2, right -> 1
+        # orientation = 2: left -> 4, right -> 3
+        # orientation = 3: left -> 6, right -> 5
+        left_direction = 2 * orientations[interface]
+        right_direction = 2 * orientations[interface] - 1
 
-    for j in eachnode(dg), i in eachnode(dg)
-      # Call pointwise Riemann solver
-      u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, interface)
-      flux = surface_flux(u_ll, u_rr, orientations[interface], equations)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations)
-        surface_flux_values[v, i, j, left_direction,  left_id]  = flux[v]
-        surface_flux_values[v, i, j, right_direction, right_id] = flux[v]
-      end
+        for j in eachnode(dg), i in eachnode(dg)
+            # Call pointwise Riemann solver
+            u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, interface)
+            flux = surface_flux(u_ll, u_rr, orientations[interface], equations)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, j, left_direction, left_id] = flux[v]
+                surface_flux_values[v, i, j, right_direction, right_id] = flux[v]
+            end
+        end
     end
-  end
 end
 
 function calc_interface_flux!(surface_flux_values,
                               mesh::TreeMesh{3},
                               nonconservative_terms::True, equations,
                               surface_integral, dg::DG, cache)
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-  @unpack u, neighbor_ids, orientations = cache.interfaces
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Get neighboring elements
-    left_id  = neighbor_ids[1, interface]
-    right_id = neighbor_ids[2, interface]
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack u, neighbor_ids, orientations = cache.interfaces
 
-    # Determine interface direction with respect to elements:
-    # orientation = 1: left -> 2, right -> 1
-    # orientation = 2: left -> 4, right -> 3
-    # orientation = 3: left -> 6, right -> 5
-    left_direction  = 2 * orientations[interface]
-    right_direction = 2 * orientations[interface] - 1
+    @threaded for interface in eachinterface(dg, cache)
+        # Get neighboring elements
+        left_id = neighbor_ids[1, interface]
+        right_id = neighbor_ids[2, interface]
 
-    for j in eachnode(dg), i in eachnode(dg)
-      # Call pointwise Riemann solver
-      orientation = orientations[interface]
-      u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, interface)
-      flux = surface_flux(u_ll, u_rr, orientation, equations)
+        # Determine interface direction with respect to elements:
+        # orientation = 1: left -> 2, right -> 1
+        # orientation = 2: left -> 4, right -> 3
+        # orientation = 3: left -> 6, right -> 5
+        left_direction = 2 * orientations[interface]
+        right_direction = 2 * orientations[interface] - 1
 
-      # Compute both nonconservative fluxes
-      noncons_left  = nonconservative_flux(u_ll, u_rr, orientation, equations)
-      noncons_right = nonconservative_flux(u_rr, u_ll, orientation, equations)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations)
-        # Note the factor 0.5 necessary for the nonconservative fluxes based on
-        # the interpretation of global SBP operators coupled discontinuously via
-        # central fluxes/SATs
-        surface_flux_values[v, i, j, left_direction,  left_id]  = flux[v] + 0.5 * noncons_left[v]
-        surface_flux_values[v, i, j, right_direction, right_id] = flux[v] + 0.5 * noncons_right[v]
-      end
+        for j in eachnode(dg), i in eachnode(dg)
+            # Call pointwise Riemann solver
+            orientation = orientations[interface]
+            u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, interface)
+            flux = surface_flux(u_ll, u_rr, orientation, equations)
+
+            # Compute both nonconservative fluxes
+            noncons_left = nonconservative_flux(u_ll, u_rr, orientation, equations)
+            noncons_right = nonconservative_flux(u_rr, u_ll, orientation, equations)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations)
+                # Note the factor 0.5 necessary for the nonconservative fluxes based on
+                # the interpretation of global SBP operators coupled discontinuously via
+                # central fluxes/SATs
+                surface_flux_values[v, i, j, left_direction, left_id] = flux[v] +
+                                                                        0.5 *
+                                                                        noncons_left[v]
+                surface_flux_values[v, i, j, right_direction, right_id] = flux[v] +
+                                                                          0.5 *
+                                                                          noncons_right[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function prolong2boundaries!(cache, u,
                              mesh::TreeMesh{3}, equations, surface_integral, dg::DG)
-  @unpack boundaries = cache
-  @unpack orientations, neighbor_sides = boundaries
-
-  @threaded for boundary in eachboundary(dg, cache)
-    element = boundaries.neighbor_ids[boundary]
-
-    if orientations[boundary] == 1
-      # boundary in x-direction
-      if neighbor_sides[boundary] == 1
-        # element in -x direction of boundary
-        for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations)
-          boundaries.u[1, v, j, k, boundary] = u[v, nnodes(dg), j, k, element]
-        end
-      else # Element in +x direction of boundary
-        for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations)
-          boundaries.u[2, v, j, k, boundary] = u[v, 1,          j, k, element]
+    @unpack boundaries = cache
+    @unpack orientations, neighbor_sides = boundaries
+
+    @threaded for boundary in eachboundary(dg, cache)
+        element = boundaries.neighbor_ids[boundary]
+
+        if orientations[boundary] == 1
+            # boundary in x-direction
+            if neighbor_sides[boundary] == 1
+                # element in -x direction of boundary
+                for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations)
+                    boundaries.u[1, v, j, k, boundary] = u[v, nnodes(dg), j, k, element]
+                end
+            else # Element in +x direction of boundary
+                for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations)
+                    boundaries.u[2, v, j, k, boundary] = u[v, 1, j, k, element]
+                end
+            end
+        elseif orientations[boundary] == 2
+            # boundary in y-direction
+            if neighbor_sides[boundary] == 1
+                # element in -y direction of boundary
+                for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
+                    boundaries.u[1, v, i, k, boundary] = u[v, i, nnodes(dg), k, element]
+                end
+            else
+                # element in +y direction of boundary
+                for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
+                    boundaries.u[2, v, i, k, boundary] = u[v, i, 1, k, element]
+                end
+            end
+        else #if orientations[boundary] == 3
+            # boundary in z-direction
+            if neighbor_sides[boundary] == 1
+                # element in -z direction of boundary
+                for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
+                    boundaries.u[1, v, i, j, boundary] = u[v, i, j, nnodes(dg), element]
+                end
+            else
+                # element in +z direction of boundary
+                for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
+                    boundaries.u[2, v, i, j, boundary] = u[v, i, j, 1, element]
+                end
+            end
         end
-      end
-    elseif orientations[boundary] == 2
-      # boundary in y-direction
-      if neighbor_sides[boundary] == 1
-        # element in -y direction of boundary
-        for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
-          boundaries.u[1, v, i, k, boundary] = u[v, i, nnodes(dg), k, element]
-        end
-      else
-        # element in +y direction of boundary
-        for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
-          boundaries.u[2, v, i, k, boundary] = u[v, i, 1,          k, element]
-        end
-      end
-    else #if orientations[boundary] == 3
-      # boundary in z-direction
-      if neighbor_sides[boundary] == 1
-        # element in -z direction of boundary
-        for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
-          boundaries.u[1, v, i, j, boundary] = u[v, i, j, nnodes(dg), element]
-        end
-      else
-        # element in +z direction of boundary
-        for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
-          boundaries.u[2, v, i, j, boundary] = u[v, i, j, 1,          element]
-        end
-      end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 # TODO: Taal dimension agnostic
 function calc_boundary_flux!(cache, t, boundary_condition::BoundaryConditionPeriodic,
                              mesh::TreeMesh{3}, equations, surface_integral, dg::DG)
-  @assert isempty(eachboundary(dg, cache))
+    @assert isempty(eachboundary(dg, cache))
 end
 
 function calc_boundary_flux!(cache, t, boundary_conditions::NamedTuple,
                              mesh::TreeMesh{3}, equations, surface_integral, dg::DG)
-  @unpack surface_flux_values = cache.elements
-  @unpack n_boundaries_per_direction = cache.boundaries
-
-  # Calculate indices
-  lasts = accumulate(+, n_boundaries_per_direction)
-  firsts = lasts - n_boundaries_per_direction .+ 1
-
-  # Calc boundary fluxes in each direction
-  calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[1],
-                                   equations, surface_integral, dg, cache,
-                                   1, firsts[1], lasts[1])
-  calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[2],
-                                   equations, surface_integral, dg, cache,
-                                   2, firsts[2], lasts[2])
-  calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[3],
-                                   equations, surface_integral, dg, cache,
-                                   3, firsts[3], lasts[3])
-  calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[4],
-                                   equations, surface_integral, dg, cache,
-                                   4, firsts[4], lasts[4])
-  calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[5],
-                                   equations, surface_integral, dg, cache,
-                                   5, firsts[5], lasts[5])
-  calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[6],
-                                   equations, surface_integral, dg, cache,
-                                   6, firsts[6], lasts[6])
+    @unpack surface_flux_values = cache.elements
+    @unpack n_boundaries_per_direction = cache.boundaries
+
+    # Calculate indices
+    lasts = accumulate(+, n_boundaries_per_direction)
+    firsts = lasts - n_boundaries_per_direction .+ 1
+
+    # Calc boundary fluxes in each direction
+    calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[1],
+                                     equations, surface_integral, dg, cache,
+                                     1, firsts[1], lasts[1])
+    calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[2],
+                                     equations, surface_integral, dg, cache,
+                                     2, firsts[2], lasts[2])
+    calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[3],
+                                     equations, surface_integral, dg, cache,
+                                     3, firsts[3], lasts[3])
+    calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[4],
+                                     equations, surface_integral, dg, cache,
+                                     4, firsts[4], lasts[4])
+    calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[5],
+                                     equations, surface_integral, dg, cache,
+                                     5, firsts[5], lasts[5])
+    calc_boundary_flux_by_direction!(surface_flux_values, t, boundary_conditions[6],
+                                     equations, surface_integral, dg, cache,
+                                     6, firsts[6], lasts[6])
 end
 
-function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any,5}, t,
+function calc_boundary_flux_by_direction!(surface_flux_values::AbstractArray{<:Any, 5},
+                                          t,
                                           boundary_condition, equations,
                                           surface_integral, dg::DG, cache,
                                           direction, first_boundary, last_boundary)
-  @unpack surface_flux = surface_integral
-  @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
+    @unpack surface_flux = surface_integral
+    @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
 
-  @threaded for boundary in first_boundary:last_boundary
-    # Get neighboring element
-    neighbor = neighbor_ids[boundary]
+    @threaded for boundary in first_boundary:last_boundary
+        # Get neighboring element
+        neighbor = neighbor_ids[boundary]
 
-    for j in eachnode(dg), i in eachnode(dg)
-      # Get boundary flux
-      u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, boundary)
-      if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
-        u_inner = u_ll
-      else # Element is on the right, boundary on the left
-        u_inner = u_rr
-      end
-      x = get_node_coords(node_coordinates, equations, dg, i, j, boundary)
-      flux = boundary_condition(u_inner, orientations[boundary], direction, x, t, surface_flux,
-                                equations)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations)
-        surface_flux_values[v, i, j, direction, neighbor] = flux[v]
-      end
+        for j in eachnode(dg), i in eachnode(dg)
+            # Get boundary flux
+            u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, boundary)
+            if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
+                u_inner = u_ll
+            else # Element is on the right, boundary on the left
+                u_inner = u_rr
+            end
+            x = get_node_coords(node_coordinates, equations, dg, i, j, boundary)
+            flux = boundary_condition(u_inner, orientations[boundary], direction, x, t,
+                                      surface_flux,
+                                      equations)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, j, direction, neighbor] = flux[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function prolong2mortars!(cache, u,
                           mesh::TreeMesh{3}, equations,
                           mortar_l2::LobattoLegendreMortarL2,
                           surface_integral, dg::DGSEM)
-  # temporary buffer for projections
-  @unpack fstar_tmp1_threaded = cache
-
-  @threaded for mortar in eachmortar(dg, cache)
-    fstar_tmp1 = fstar_tmp1_threaded[Threads.threadid()]
-
-    lower_left_element  = cache.mortars.neighbor_ids[1, mortar]
-    lower_right_element = cache.mortars.neighbor_ids[2, mortar]
-    upper_left_element  = cache.mortars.neighbor_ids[3, mortar]
-    upper_right_element = cache.mortars.neighbor_ids[4, mortar]
-    large_element       = cache.mortars.neighbor_ids[5, mortar]
-
-    # Copy solution small to small
-    if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
-      if cache.mortars.orientations[mortar] == 1
-        # L2 mortars in x-direction
-        for k in eachnode(dg), j in eachnode(dg)
-          for v in eachvariable(equations)
-            cache.mortars.u_upper_left[2, v, j, k, mortar]  = u[v, 1, j, k, upper_left_element]
-            cache.mortars.u_upper_right[2, v, j, k, mortar] = u[v, 1, j, k, upper_right_element]
-            cache.mortars.u_lower_left[2, v, j, k, mortar]  = u[v, 1, j, k, lower_left_element]
-            cache.mortars.u_lower_right[2, v, j, k, mortar] = u[v, 1, j, k, lower_right_element]
-          end
+    # temporary buffer for projections
+    @unpack fstar_tmp1_threaded = cache
+
+    @threaded for mortar in eachmortar(dg, cache)
+        fstar_tmp1 = fstar_tmp1_threaded[Threads.threadid()]
+
+        lower_left_element = cache.mortars.neighbor_ids[1, mortar]
+        lower_right_element = cache.mortars.neighbor_ids[2, mortar]
+        upper_left_element = cache.mortars.neighbor_ids[3, mortar]
+        upper_right_element = cache.mortars.neighbor_ids[4, mortar]
+        large_element = cache.mortars.neighbor_ids[5, mortar]
+
+        # Copy solution small to small
+        if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                for k in eachnode(dg), j in eachnode(dg)
+                    for v in eachvariable(equations)
+                        cache.mortars.u_upper_left[2, v, j, k, mortar] = u[v, 1, j, k,
+                                                                           upper_left_element]
+                        cache.mortars.u_upper_right[2, v, j, k, mortar] = u[v, 1, j, k,
+                                                                            upper_right_element]
+                        cache.mortars.u_lower_left[2, v, j, k, mortar] = u[v, 1, j, k,
+                                                                           lower_left_element]
+                        cache.mortars.u_lower_right[2, v, j, k, mortar] = u[v, 1, j, k,
+                                                                            lower_right_element]
+                    end
+                end
+            elseif cache.mortars.orientations[mortar] == 2
+                # L2 mortars in y-direction
+                for k in eachnode(dg), i in eachnode(dg)
+                    for v in eachvariable(equations)
+                        cache.mortars.u_upper_left[2, v, i, k, mortar] = u[v, i, 1, k,
+                                                                           upper_left_element]
+                        cache.mortars.u_upper_right[2, v, i, k, mortar] = u[v, i, 1, k,
+                                                                            upper_right_element]
+                        cache.mortars.u_lower_left[2, v, i, k, mortar] = u[v, i, 1, k,
+                                                                           lower_left_element]
+                        cache.mortars.u_lower_right[2, v, i, k, mortar] = u[v, i, 1, k,
+                                                                            lower_right_element]
+                    end
+                end
+            else # orientations[mortar] == 3
+                # L2 mortars in z-direction
+                for j in eachnode(dg), i in eachnode(dg)
+                    for v in eachvariable(equations)
+                        cache.mortars.u_upper_left[2, v, i, j, mortar] = u[v, i, j, 1,
+                                                                           upper_left_element]
+                        cache.mortars.u_upper_right[2, v, i, j, mortar] = u[v, i, j, 1,
+                                                                            upper_right_element]
+                        cache.mortars.u_lower_left[2, v, i, j, mortar] = u[v, i, j, 1,
+                                                                           lower_left_element]
+                        cache.mortars.u_lower_right[2, v, i, j, mortar] = u[v, i, j, 1,
+                                                                            lower_right_element]
+                    end
+                end
+            end
+        else # large_sides[mortar] == 2 -> small elements on left side
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                for k in eachnode(dg), j in eachnode(dg)
+                    for v in eachvariable(equations)
+                        cache.mortars.u_upper_left[1, v, j, k, mortar] = u[v,
+                                                                           nnodes(dg),
+                                                                           j, k,
+                                                                           upper_left_element]
+                        cache.mortars.u_upper_right[1, v, j, k, mortar] = u[v,
+                                                                            nnodes(dg),
+                                                                            j, k,
+                                                                            upper_right_element]
+                        cache.mortars.u_lower_left[1, v, j, k, mortar] = u[v,
+                                                                           nnodes(dg),
+                                                                           j, k,
+                                                                           lower_left_element]
+                        cache.mortars.u_lower_right[1, v, j, k, mortar] = u[v,
+                                                                            nnodes(dg),
+                                                                            j, k,
+                                                                            lower_right_element]
+                    end
+                end
+            elseif cache.mortars.orientations[mortar] == 2
+                # L2 mortars in y-direction
+                for k in eachnode(dg), i in eachnode(dg)
+                    for v in eachvariable(equations)
+                        cache.mortars.u_upper_left[1, v, i, k, mortar] = u[v, i,
+                                                                           nnodes(dg),
+                                                                           k,
+                                                                           upper_left_element]
+                        cache.mortars.u_upper_right[1, v, i, k, mortar] = u[v, i,
+                                                                            nnodes(dg),
+                                                                            k,
+                                                                            upper_right_element]
+                        cache.mortars.u_lower_left[1, v, i, k, mortar] = u[v, i,
+                                                                           nnodes(dg),
+                                                                           k,
+                                                                           lower_left_element]
+                        cache.mortars.u_lower_right[1, v, i, k, mortar] = u[v, i,
+                                                                            nnodes(dg),
+                                                                            k,
+                                                                            lower_right_element]
+                    end
+                end
+            else # if cache.mortars.orientations[mortar] == 3
+                # L2 mortars in z-direction
+                for j in eachnode(dg), i in eachnode(dg)
+                    for v in eachvariable(equations)
+                        cache.mortars.u_upper_left[1, v, i, j, mortar] = u[v, i, j,
+                                                                           nnodes(dg),
+                                                                           upper_left_element]
+                        cache.mortars.u_upper_right[1, v, i, j, mortar] = u[v, i, j,
+                                                                            nnodes(dg),
+                                                                            upper_right_element]
+                        cache.mortars.u_lower_left[1, v, i, j, mortar] = u[v, i, j,
+                                                                           nnodes(dg),
+                                                                           lower_left_element]
+                        cache.mortars.u_lower_right[1, v, i, j, mortar] = u[v, i, j,
+                                                                            nnodes(dg),
+                                                                            lower_right_element]
+                    end
+                end
+            end
         end
-      elseif cache.mortars.orientations[mortar] == 2
-        # L2 mortars in y-direction
-        for k in eachnode(dg), i in eachnode(dg)
-          for v in eachvariable(equations)
-            cache.mortars.u_upper_left[2, v, i, k, mortar]  = u[v, i, 1, k, upper_left_element]
-            cache.mortars.u_upper_right[2, v, i, k, mortar] = u[v, i, 1, k, upper_right_element]
-            cache.mortars.u_lower_left[2, v, i, k, mortar]  = u[v, i, 1, k, lower_left_element]
-            cache.mortars.u_lower_right[2, v, i, k, mortar] = u[v, i, 1, k, lower_right_element]
-          end
-        end
-      else # orientations[mortar] == 3
-        # L2 mortars in z-direction
-        for j in eachnode(dg), i in eachnode(dg)
-          for v in eachvariable(equations)
-            cache.mortars.u_upper_left[2, v, i, j, mortar]  = u[v, i, j, 1, upper_left_element]
-            cache.mortars.u_upper_right[2, v, i, j, mortar] = u[v, i, j, 1, upper_right_element]
-            cache.mortars.u_lower_left[2, v, i, j, mortar]  = u[v, i, j, 1, lower_left_element]
-            cache.mortars.u_lower_right[2, v, i, j, mortar] = u[v, i, j, 1, lower_right_element]
-          end
-        end
-      end
-    else # large_sides[mortar] == 2 -> small elements on left side
-      if cache.mortars.orientations[mortar] == 1
-        # L2 mortars in x-direction
-        for k in eachnode(dg), j in eachnode(dg)
-          for v in eachvariable(equations)
-            cache.mortars.u_upper_left[1, v, j, k, mortar]  = u[v, nnodes(dg), j, k, upper_left_element]
-            cache.mortars.u_upper_right[1, v, j, k, mortar] = u[v, nnodes(dg), j, k, upper_right_element]
-            cache.mortars.u_lower_left[1, v, j, k, mortar]  = u[v, nnodes(dg), j, k, lower_left_element]
-            cache.mortars.u_lower_right[1, v, j, k, mortar] = u[v, nnodes(dg), j, k, lower_right_element]
-          end
-        end
-      elseif cache.mortars.orientations[mortar] == 2
-        # L2 mortars in y-direction
-        for k in eachnode(dg), i in eachnode(dg)
-          for v in eachvariable(equations)
-            cache.mortars.u_upper_left[1, v, i, k, mortar]  = u[v, i, nnodes(dg), k, upper_left_element]
-            cache.mortars.u_upper_right[1, v, i, k, mortar] = u[v, i, nnodes(dg), k, upper_right_element]
-            cache.mortars.u_lower_left[1, v, i, k, mortar]  = u[v, i, nnodes(dg), k, lower_left_element]
-            cache.mortars.u_lower_right[1, v, i, k, mortar] = u[v, i, nnodes(dg), k, lower_right_element]
-          end
-        end
-      else # if cache.mortars.orientations[mortar] == 3
-        # L2 mortars in z-direction
-        for j in eachnode(dg), i in eachnode(dg)
-          for v in eachvariable(equations)
-            cache.mortars.u_upper_left[1, v, i, j, mortar]  = u[v, i, j, nnodes(dg), upper_left_element]
-            cache.mortars.u_upper_right[1, v, i, j, mortar] = u[v, i, j, nnodes(dg), upper_right_element]
-            cache.mortars.u_lower_left[1, v, i, j, mortar]  = u[v, i, j, nnodes(dg), lower_left_element]
-            cache.mortars.u_lower_right[1, v, i, j, mortar] = u[v, i, j, nnodes(dg), lower_right_element]
-          end
-        end
-      end
-    end
 
-    # Interpolate large element face data to small interface locations
-    if cache.mortars.large_sides[mortar] == 1 # -> large element on left side
-      leftright = 1
-      if cache.mortars.orientations[mortar] == 1
-        # L2 mortars in x-direction
-        u_large = view(u, :, nnodes(dg), :, :, large_element)
-        element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large, fstar_tmp1)
-      elseif cache.mortars.orientations[mortar] == 2
-        # L2 mortars in y-direction
-        u_large = view(u, :, :, nnodes(dg), :, large_element)
-        element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large, fstar_tmp1)
-      else # cache.mortars.orientations[mortar] == 3
-        # L2 mortars in z-direction
-        u_large = view(u, :, :, :, nnodes(dg), large_element)
-        element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large, fstar_tmp1)
-      end
-    else # large_sides[mortar] == 2 -> large element on right side
-      leftright = 2
-      if cache.mortars.orientations[mortar] == 1
-        # L2 mortars in x-direction
-        u_large = view(u, :, 1, :, :, large_element)
-        element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large, fstar_tmp1)
-      elseif cache.mortars.orientations[mortar] == 2
-        # L2 mortars in y-direction
-        u_large = view(u, :, :, 1, :, large_element)
-        element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large, fstar_tmp1)
-      else # cache.mortars.orientations[mortar] == 3
-        # L2 mortars in z-direction
-        u_large = view(u, :, :, :, 1, large_element)
-        element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright, mortar, u_large, fstar_tmp1)
-      end
+        # Interpolate large element face data to small interface locations
+        if cache.mortars.large_sides[mortar] == 1 # -> large element on left side
+            leftright = 1
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                u_large = view(u, :, nnodes(dg), :, :, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large, fstar_tmp1)
+            elseif cache.mortars.orientations[mortar] == 2
+                # L2 mortars in y-direction
+                u_large = view(u, :, :, nnodes(dg), :, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large, fstar_tmp1)
+            else # cache.mortars.orientations[mortar] == 3
+                # L2 mortars in z-direction
+                u_large = view(u, :, :, :, nnodes(dg), large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large, fstar_tmp1)
+            end
+        else # large_sides[mortar] == 2 -> large element on right side
+            leftright = 2
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                u_large = view(u, :, 1, :, :, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large, fstar_tmp1)
+            elseif cache.mortars.orientations[mortar] == 2
+                # L2 mortars in y-direction
+                u_large = view(u, :, :, 1, :, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large, fstar_tmp1)
+            else # cache.mortars.orientations[mortar] == 3
+                # L2 mortars in z-direction
+                u_large = view(u, :, :, :, 1, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large, fstar_tmp1)
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-@inline function element_solutions_to_mortars!(mortars, mortar_l2::LobattoLegendreMortarL2, leftright, mortar,
-                                               u_large::AbstractArray{<:Any,3}, fstar_tmp1)
-  multiply_dimensionwise!(view(mortars.u_upper_left,  leftright, :, :, :, mortar), mortar_l2.forward_lower, mortar_l2.forward_upper, u_large, fstar_tmp1)
-  multiply_dimensionwise!(view(mortars.u_upper_right, leftright, :, :, :, mortar), mortar_l2.forward_upper, mortar_l2.forward_upper, u_large, fstar_tmp1)
-  multiply_dimensionwise!(view(mortars.u_lower_left,  leftright, :, :, :, mortar), mortar_l2.forward_lower, mortar_l2.forward_lower, u_large, fstar_tmp1)
-  multiply_dimensionwise!(view(mortars.u_lower_right, leftright, :, :, :, mortar), mortar_l2.forward_upper, mortar_l2.forward_lower, u_large, fstar_tmp1)
-  return nothing
+@inline function element_solutions_to_mortars!(mortars,
+                                               mortar_l2::LobattoLegendreMortarL2,
+                                               leftright, mortar,
+                                               u_large::AbstractArray{<:Any, 3},
+                                               fstar_tmp1)
+    multiply_dimensionwise!(view(mortars.u_upper_left, leftright, :, :, :, mortar),
+                            mortar_l2.forward_lower, mortar_l2.forward_upper, u_large,
+                            fstar_tmp1)
+    multiply_dimensionwise!(view(mortars.u_upper_right, leftright, :, :, :, mortar),
+                            mortar_l2.forward_upper, mortar_l2.forward_upper, u_large,
+                            fstar_tmp1)
+    multiply_dimensionwise!(view(mortars.u_lower_left, leftright, :, :, :, mortar),
+                            mortar_l2.forward_lower, mortar_l2.forward_lower, u_large,
+                            fstar_tmp1)
+    multiply_dimensionwise!(view(mortars.u_lower_right, leftright, :, :, :, mortar),
+                            mortar_l2.forward_upper, mortar_l2.forward_lower, u_large,
+                            fstar_tmp1)
+    return nothing
 end
 
-
 function calc_mortar_flux!(surface_flux_values,
                            mesh::TreeMesh{3},
                            nonconservative_terms::False, equations,
                            mortar_l2::LobattoLegendreMortarL2,
                            surface_integral, dg::DG, cache)
-  @unpack surface_flux = surface_integral
-  @unpack u_lower_left, u_lower_right, u_upper_left, u_upper_right, orientations = cache.mortars
-  @unpack (fstar_upper_left_threaded, fstar_upper_right_threaded,
-           fstar_lower_left_threaded, fstar_lower_right_threaded,
-           fstar_tmp1_threaded) = cache
-
-  @threaded for mortar in eachmortar(dg, cache)
-    # Choose thread-specific pre-allocated container
-    fstar_upper_left  = fstar_upper_left_threaded[Threads.threadid()]
-    fstar_upper_right = fstar_upper_right_threaded[Threads.threadid()]
-    fstar_lower_left  = fstar_lower_left_threaded[Threads.threadid()]
-    fstar_lower_right = fstar_lower_right_threaded[Threads.threadid()]
-    fstar_tmp1        = fstar_tmp1_threaded[Threads.threadid()]
-
-    # Calculate fluxes
-    orientation = orientations[mortar]
-    calc_fstar!(fstar_upper_left,  equations, surface_flux, dg, u_upper_left,  mortar, orientation)
-    calc_fstar!(fstar_upper_right, equations, surface_flux, dg, u_upper_right, mortar, orientation)
-    calc_fstar!(fstar_lower_left,  equations, surface_flux, dg, u_lower_left,  mortar, orientation)
-    calc_fstar!(fstar_lower_right, equations, surface_flux, dg, u_lower_right, mortar, orientation)
-
-    mortar_fluxes_to_elements!(surface_flux_values,
-                               mesh, equations, mortar_l2, dg, cache, mortar,
-                               fstar_upper_left, fstar_upper_right,
-                               fstar_lower_left, fstar_lower_right,
-                               fstar_tmp1)
-  end
-
-  return nothing
+    @unpack surface_flux = surface_integral
+    @unpack u_lower_left, u_lower_right, u_upper_left, u_upper_right, orientations = cache.mortars
+    @unpack (fstar_upper_left_threaded, fstar_upper_right_threaded,
+    fstar_lower_left_threaded, fstar_lower_right_threaded,
+    fstar_tmp1_threaded) = cache
+
+    @threaded for mortar in eachmortar(dg, cache)
+        # Choose thread-specific pre-allocated container
+        fstar_upper_left = fstar_upper_left_threaded[Threads.threadid()]
+        fstar_upper_right = fstar_upper_right_threaded[Threads.threadid()]
+        fstar_lower_left = fstar_lower_left_threaded[Threads.threadid()]
+        fstar_lower_right = fstar_lower_right_threaded[Threads.threadid()]
+        fstar_tmp1 = fstar_tmp1_threaded[Threads.threadid()]
+
+        # Calculate fluxes
+        orientation = orientations[mortar]
+        calc_fstar!(fstar_upper_left, equations, surface_flux, dg, u_upper_left, mortar,
+                    orientation)
+        calc_fstar!(fstar_upper_right, equations, surface_flux, dg, u_upper_right,
+                    mortar, orientation)
+        calc_fstar!(fstar_lower_left, equations, surface_flux, dg, u_lower_left, mortar,
+                    orientation)
+        calc_fstar!(fstar_lower_right, equations, surface_flux, dg, u_lower_right,
+                    mortar, orientation)
+
+        mortar_fluxes_to_elements!(surface_flux_values,
+                                   mesh, equations, mortar_l2, dg, cache, mortar,
+                                   fstar_upper_left, fstar_upper_right,
+                                   fstar_lower_left, fstar_lower_right,
+                                   fstar_tmp1)
+    end
+
+    return nothing
 end
 
 function calc_mortar_flux!(surface_flux_values,
@@ -948,96 +1069,143 @@ function calc_mortar_flux!(surface_flux_values,
                            nonconservative_terms::True, equations,
                            mortar_l2::LobattoLegendreMortarL2,
                            surface_integral, dg::DG, cache)
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-  @unpack u_lower_left, u_lower_right, u_upper_left, u_upper_right, orientations, large_sides = cache.mortars
-  @unpack (fstar_upper_left_threaded, fstar_upper_right_threaded,
-           fstar_lower_left_threaded, fstar_lower_right_threaded,
-           fstar_tmp1_threaded) = cache
-
-  @threaded for mortar in eachmortar(dg, cache)
-    # Choose thread-specific pre-allocated container
-    fstar_upper_left  = fstar_upper_left_threaded[Threads.threadid()]
-    fstar_upper_right = fstar_upper_right_threaded[Threads.threadid()]
-    fstar_lower_left  = fstar_lower_left_threaded[Threads.threadid()]
-    fstar_lower_right = fstar_lower_right_threaded[Threads.threadid()]
-    fstar_tmp1        = fstar_tmp1_threaded[Threads.threadid()]
-
-    # Calculate fluxes
-    orientation = orientations[mortar]
-    calc_fstar!(fstar_upper_left,  equations, surface_flux, dg, u_upper_left,  mortar, orientation)
-    calc_fstar!(fstar_upper_right, equations, surface_flux, dg, u_upper_right, mortar, orientation)
-    calc_fstar!(fstar_lower_left,  equations, surface_flux, dg, u_lower_left,  mortar, orientation)
-    calc_fstar!(fstar_lower_right, equations, surface_flux, dg, u_lower_right, mortar, orientation)
-
-    # Add nonconservative fluxes.
-    # These need to be adapted on the geometry (left/right) since the order of
-    # the arguments matters, based on the global SBP operator interpretation.
-    # The same interpretation (global SBP operators coupled discontinuously via
-    # central fluxes/SATs) explains why we need the factor 0.5.
-    # Alternatively, you can also follow the argumentation of Bohm et al. 2018
-    # ("nonconservative diamond flux")
-    if large_sides[mortar] == 1 # -> small elements on right side
-      for j in eachnode(dg), i in eachnode(dg)
-        # Pull the left and right solutions
-        u_upper_left_ll,  u_upper_left_rr  = get_surface_node_vars(u_upper_left,  equations, dg, i, j, mortar)
-        u_upper_right_ll, u_upper_right_rr = get_surface_node_vars(u_upper_right, equations, dg, i, j, mortar)
-        u_lower_left_ll,  u_lower_left_rr  = get_surface_node_vars(u_lower_left,  equations, dg, i, j, mortar)
-        u_lower_right_ll, u_lower_right_rr = get_surface_node_vars(u_lower_right, equations, dg, i, j, mortar)
-        # Call pointwise nonconservative term
-        noncons_upper_left  = nonconservative_flux(u_upper_left_ll,  u_upper_left_rr,  orientation, equations)
-        noncons_upper_right = nonconservative_flux(u_upper_right_ll, u_upper_right_rr, orientation, equations)
-        noncons_lower_left  = nonconservative_flux(u_lower_left_ll,  u_lower_left_rr,  orientation, equations)
-        noncons_lower_right = nonconservative_flux(u_lower_right_ll, u_lower_right_rr, orientation, equations)
-        # Add to primary and secondary temporary storage
-        multiply_add_to_node_vars!(fstar_upper_left,  0.5, noncons_upper_left,  equations, dg, i, j)
-        multiply_add_to_node_vars!(fstar_upper_right, 0.5, noncons_upper_right, equations, dg, i, j)
-        multiply_add_to_node_vars!(fstar_lower_left,  0.5, noncons_lower_left,  equations, dg, i, j)
-        multiply_add_to_node_vars!(fstar_lower_right, 0.5, noncons_lower_right, equations, dg, i, j)
-      end
-    else # large_sides[mortar] == 2 -> small elements on the left
-      for j in eachnode(dg), i in eachnode(dg)
-        # Pull the left and right solutions
-        u_upper_left_ll,  u_upper_left_rr  = get_surface_node_vars(u_upper_left,  equations, dg, i, j, mortar)
-        u_upper_right_ll, u_upper_right_rr = get_surface_node_vars(u_upper_right, equations, dg, i, j, mortar)
-        u_lower_left_ll,  u_lower_left_rr  = get_surface_node_vars(u_lower_left,  equations, dg, i, j, mortar)
-        u_lower_right_ll, u_lower_right_rr = get_surface_node_vars(u_lower_right, equations, dg, i, j, mortar)
-        # Call pointwise nonconservative term
-        noncons_upper_left  = nonconservative_flux(u_upper_left_rr,  u_upper_left_ll,  orientation, equations)
-        noncons_upper_right = nonconservative_flux(u_upper_right_rr, u_upper_right_ll, orientation, equations)
-        noncons_lower_left  = nonconservative_flux(u_lower_left_rr,  u_lower_left_ll,  orientation, equations)
-        noncons_lower_right = nonconservative_flux(u_lower_right_rr, u_lower_right_ll, orientation, equations)
-        # Add to primary and secondary temporary storage
-        multiply_add_to_node_vars!(fstar_upper_left,  0.5, noncons_upper_left,  equations, dg, i, j)
-        multiply_add_to_node_vars!(fstar_upper_right, 0.5, noncons_upper_right, equations, dg, i, j)
-        multiply_add_to_node_vars!(fstar_lower_left,  0.5, noncons_lower_left,  equations, dg, i, j)
-        multiply_add_to_node_vars!(fstar_lower_right, 0.5, noncons_lower_right, equations, dg, i, j)
-      end
-    end
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack u_lower_left, u_lower_right, u_upper_left, u_upper_right, orientations, large_sides = cache.mortars
+    @unpack (fstar_upper_left_threaded, fstar_upper_right_threaded,
+    fstar_lower_left_threaded, fstar_lower_right_threaded,
+    fstar_tmp1_threaded) = cache
+
+    @threaded for mortar in eachmortar(dg, cache)
+        # Choose thread-specific pre-allocated container
+        fstar_upper_left = fstar_upper_left_threaded[Threads.threadid()]
+        fstar_upper_right = fstar_upper_right_threaded[Threads.threadid()]
+        fstar_lower_left = fstar_lower_left_threaded[Threads.threadid()]
+        fstar_lower_right = fstar_lower_right_threaded[Threads.threadid()]
+        fstar_tmp1 = fstar_tmp1_threaded[Threads.threadid()]
+
+        # Calculate fluxes
+        orientation = orientations[mortar]
+        calc_fstar!(fstar_upper_left, equations, surface_flux, dg, u_upper_left, mortar,
+                    orientation)
+        calc_fstar!(fstar_upper_right, equations, surface_flux, dg, u_upper_right,
+                    mortar, orientation)
+        calc_fstar!(fstar_lower_left, equations, surface_flux, dg, u_lower_left, mortar,
+                    orientation)
+        calc_fstar!(fstar_lower_right, equations, surface_flux, dg, u_lower_right,
+                    mortar, orientation)
+
+        # Add nonconservative fluxes.
+        # These need to be adapted on the geometry (left/right) since the order of
+        # the arguments matters, based on the global SBP operator interpretation.
+        # The same interpretation (global SBP operators coupled discontinuously via
+        # central fluxes/SATs) explains why we need the factor 0.5.
+        # Alternatively, you can also follow the argumentation of Bohm et al. 2018
+        # ("nonconservative diamond flux")
+        if large_sides[mortar] == 1 # -> small elements on right side
+            for j in eachnode(dg), i in eachnode(dg)
+                # Pull the left and right solutions
+                u_upper_left_ll, u_upper_left_rr = get_surface_node_vars(u_upper_left,
+                                                                         equations, dg,
+                                                                         i, j, mortar)
+                u_upper_right_ll, u_upper_right_rr = get_surface_node_vars(u_upper_right,
+                                                                           equations,
+                                                                           dg, i, j,
+                                                                           mortar)
+                u_lower_left_ll, u_lower_left_rr = get_surface_node_vars(u_lower_left,
+                                                                         equations, dg,
+                                                                         i, j, mortar)
+                u_lower_right_ll, u_lower_right_rr = get_surface_node_vars(u_lower_right,
+                                                                           equations,
+                                                                           dg, i, j,
+                                                                           mortar)
+                # Call pointwise nonconservative term
+                noncons_upper_left = nonconservative_flux(u_upper_left_ll,
+                                                          u_upper_left_rr, orientation,
+                                                          equations)
+                noncons_upper_right = nonconservative_flux(u_upper_right_ll,
+                                                           u_upper_right_rr,
+                                                           orientation, equations)
+                noncons_lower_left = nonconservative_flux(u_lower_left_ll,
+                                                          u_lower_left_rr, orientation,
+                                                          equations)
+                noncons_lower_right = nonconservative_flux(u_lower_right_ll,
+                                                           u_lower_right_rr,
+                                                           orientation, equations)
+                # Add to primary and secondary temporary storage
+                multiply_add_to_node_vars!(fstar_upper_left, 0.5, noncons_upper_left,
+                                           equations, dg, i, j)
+                multiply_add_to_node_vars!(fstar_upper_right, 0.5, noncons_upper_right,
+                                           equations, dg, i, j)
+                multiply_add_to_node_vars!(fstar_lower_left, 0.5, noncons_lower_left,
+                                           equations, dg, i, j)
+                multiply_add_to_node_vars!(fstar_lower_right, 0.5, noncons_lower_right,
+                                           equations, dg, i, j)
+            end
+        else # large_sides[mortar] == 2 -> small elements on the left
+            for j in eachnode(dg), i in eachnode(dg)
+                # Pull the left and right solutions
+                u_upper_left_ll, u_upper_left_rr = get_surface_node_vars(u_upper_left,
+                                                                         equations, dg,
+                                                                         i, j, mortar)
+                u_upper_right_ll, u_upper_right_rr = get_surface_node_vars(u_upper_right,
+                                                                           equations,
+                                                                           dg, i, j,
+                                                                           mortar)
+                u_lower_left_ll, u_lower_left_rr = get_surface_node_vars(u_lower_left,
+                                                                         equations, dg,
+                                                                         i, j, mortar)
+                u_lower_right_ll, u_lower_right_rr = get_surface_node_vars(u_lower_right,
+                                                                           equations,
+                                                                           dg, i, j,
+                                                                           mortar)
+                # Call pointwise nonconservative term
+                noncons_upper_left = nonconservative_flux(u_upper_left_rr,
+                                                          u_upper_left_ll, orientation,
+                                                          equations)
+                noncons_upper_right = nonconservative_flux(u_upper_right_rr,
+                                                           u_upper_right_ll,
+                                                           orientation, equations)
+                noncons_lower_left = nonconservative_flux(u_lower_left_rr,
+                                                          u_lower_left_ll, orientation,
+                                                          equations)
+                noncons_lower_right = nonconservative_flux(u_lower_right_rr,
+                                                           u_lower_right_ll,
+                                                           orientation, equations)
+                # Add to primary and secondary temporary storage
+                multiply_add_to_node_vars!(fstar_upper_left, 0.5, noncons_upper_left,
+                                           equations, dg, i, j)
+                multiply_add_to_node_vars!(fstar_upper_right, 0.5, noncons_upper_right,
+                                           equations, dg, i, j)
+                multiply_add_to_node_vars!(fstar_lower_left, 0.5, noncons_lower_left,
+                                           equations, dg, i, j)
+                multiply_add_to_node_vars!(fstar_lower_right, 0.5, noncons_lower_right,
+                                           equations, dg, i, j)
+            end
+        end
 
-    mortar_fluxes_to_elements!(surface_flux_values,
-                               mesh, equations, mortar_l2, dg, cache, mortar,
-                               fstar_upper_left, fstar_upper_right,
-                               fstar_lower_left, fstar_lower_right,
-                               fstar_tmp1)
-  end
+        mortar_fluxes_to_elements!(surface_flux_values,
+                                   mesh, equations, mortar_l2, dg, cache, mortar,
+                                   fstar_upper_left, fstar_upper_right,
+                                   fstar_lower_left, fstar_lower_right,
+                                   fstar_tmp1)
+    end
 
-  return nothing
+    return nothing
 end
 
-@inline function calc_fstar!(destination::AbstractArray{<:Any,3}, equations,
+@inline function calc_fstar!(destination::AbstractArray{<:Any, 3}, equations,
                              surface_flux, dg::DGSEM,
                              u_interfaces, interface, orientation)
+    for j in eachnode(dg), i in eachnode(dg)
+        # Call pointwise two-point numerical flux function
+        u_ll, u_rr = get_surface_node_vars(u_interfaces, equations, dg, i, j, interface)
+        flux = surface_flux(u_ll, u_rr, orientation, equations)
 
-  for j in eachnode(dg), i in eachnode(dg)
-    # Call pointwise two-point numerical flux function
-    u_ll, u_rr = get_surface_node_vars(u_interfaces, equations, dg, i, j, interface)
-    flux = surface_flux(u_ll, u_rr, orientation, equations)
-
-    # Copy flux to left and right element storage
-    set_node_vars!(destination, flux, equations, dg, i, j)
-  end
+        # Copy flux to left and right element storage
+        set_node_vars!(destination, flux, equations, dg, i, j)
+    end
 
-  return nothing
+    return nothing
 end
 
 @inline function mortar_fluxes_to_elements!(surface_flux_values,
@@ -1048,164 +1216,171 @@ end
                                             fstar_upper_left, fstar_upper_right,
                                             fstar_lower_left, fstar_lower_right,
                                             fstar_tmp1)
-  lower_left_element  = cache.mortars.neighbor_ids[1, mortar]
-  lower_right_element = cache.mortars.neighbor_ids[2, mortar]
-  upper_left_element  = cache.mortars.neighbor_ids[3, mortar]
-  upper_right_element = cache.mortars.neighbor_ids[4, mortar]
-  large_element       = cache.mortars.neighbor_ids[5, mortar]
-
-  # Copy flux small to small
-  if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
-    if cache.mortars.orientations[mortar] == 1
-      # L2 mortars in x-direction
-      direction = 1
-    elseif cache.mortars.orientations[mortar] == 2
-      # L2 mortars in y-direction
-      direction = 3
-    else # if cache.mortars.orientations[mortar] == 3
-      # L2 mortars in z-direction
-      direction = 5
-    end
-  else # large_sides[mortar] == 2 -> small elements on left side
-    if cache.mortars.orientations[mortar] == 1
-      # L2 mortars in x-direction
-      direction = 2
-    elseif cache.mortars.orientations[mortar] == 2
-      # L2 mortars in y-direction
-      direction = 4
-    else # if cache.mortars.orientations[mortar] == 3
-      # L2 mortars in z-direction
-      direction = 6
-    end
-  end
-  surface_flux_values[:, :, :, direction, upper_left_element]  .= fstar_upper_left
-  surface_flux_values[:, :, :, direction, upper_right_element] .= fstar_upper_right
-  surface_flux_values[:, :, :, direction, lower_left_element]  .= fstar_lower_left
-  surface_flux_values[:, :, :, direction, lower_right_element] .= fstar_lower_right
-
-  # Project small fluxes to large element
-  if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
-    if cache.mortars.orientations[mortar] == 1
-      # L2 mortars in x-direction
-      direction = 2
-    elseif cache.mortars.orientations[mortar] == 2
-      # L2 mortars in y-direction
-      direction = 4
-    else # if cache.mortars.orientations[mortar] == 3
-      # L2 mortars in z-direction
-      direction = 6
+    lower_left_element = cache.mortars.neighbor_ids[1, mortar]
+    lower_right_element = cache.mortars.neighbor_ids[2, mortar]
+    upper_left_element = cache.mortars.neighbor_ids[3, mortar]
+    upper_right_element = cache.mortars.neighbor_ids[4, mortar]
+    large_element = cache.mortars.neighbor_ids[5, mortar]
+
+    # Copy flux small to small
+    if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
+        if cache.mortars.orientations[mortar] == 1
+            # L2 mortars in x-direction
+            direction = 1
+        elseif cache.mortars.orientations[mortar] == 2
+            # L2 mortars in y-direction
+            direction = 3
+        else # if cache.mortars.orientations[mortar] == 3
+            # L2 mortars in z-direction
+            direction = 5
+        end
+    else # large_sides[mortar] == 2 -> small elements on left side
+        if cache.mortars.orientations[mortar] == 1
+            # L2 mortars in x-direction
+            direction = 2
+        elseif cache.mortars.orientations[mortar] == 2
+            # L2 mortars in y-direction
+            direction = 4
+        else # if cache.mortars.orientations[mortar] == 3
+            # L2 mortars in z-direction
+            direction = 6
+        end
     end
-  else # large_sides[mortar] == 2 -> small elements on left side
-    if cache.mortars.orientations[mortar] == 1
-      # L2 mortars in x-direction
-      direction = 1
-    elseif cache.mortars.orientations[mortar] == 2
-      # L2 mortars in y-direction
-      direction = 3
-    else # if cache.mortars.orientations[mortar] == 3
-      # L2 mortars in z-direction
-      direction = 5
+    surface_flux_values[:, :, :, direction, upper_left_element] .= fstar_upper_left
+    surface_flux_values[:, :, :, direction, upper_right_element] .= fstar_upper_right
+    surface_flux_values[:, :, :, direction, lower_left_element] .= fstar_lower_left
+    surface_flux_values[:, :, :, direction, lower_right_element] .= fstar_lower_right
+
+    # Project small fluxes to large element
+    if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
+        if cache.mortars.orientations[mortar] == 1
+            # L2 mortars in x-direction
+            direction = 2
+        elseif cache.mortars.orientations[mortar] == 2
+            # L2 mortars in y-direction
+            direction = 4
+        else # if cache.mortars.orientations[mortar] == 3
+            # L2 mortars in z-direction
+            direction = 6
+        end
+    else # large_sides[mortar] == 2 -> small elements on left side
+        if cache.mortars.orientations[mortar] == 1
+            # L2 mortars in x-direction
+            direction = 1
+        elseif cache.mortars.orientations[mortar] == 2
+            # L2 mortars in y-direction
+            direction = 3
+        else # if cache.mortars.orientations[mortar] == 3
+            # L2 mortars in z-direction
+            direction = 5
+        end
     end
-  end
-
-  multiply_dimensionwise!(
-    view(surface_flux_values, :, :, :, direction, large_element),
-    mortar_l2.reverse_lower, mortar_l2.reverse_upper, fstar_upper_left, fstar_tmp1)
-  add_multiply_dimensionwise!(
-    view(surface_flux_values, :, :, :, direction, large_element),
-    mortar_l2.reverse_upper, mortar_l2.reverse_upper, fstar_upper_right, fstar_tmp1)
-  add_multiply_dimensionwise!(
-    view(surface_flux_values, :, :, :, direction, large_element),
-    mortar_l2.reverse_lower, mortar_l2.reverse_lower, fstar_lower_left, fstar_tmp1)
-  add_multiply_dimensionwise!(
-    view(surface_flux_values, :, :, :, direction, large_element),
-    mortar_l2.reverse_upper, mortar_l2.reverse_lower, fstar_lower_right, fstar_tmp1)
-
-  return nothing
-end
 
+    multiply_dimensionwise!(view(surface_flux_values, :, :, :, direction,
+                                 large_element),
+                            mortar_l2.reverse_lower, mortar_l2.reverse_upper,
+                            fstar_upper_left, fstar_tmp1)
+    add_multiply_dimensionwise!(view(surface_flux_values, :, :, :, direction,
+                                     large_element),
+                                mortar_l2.reverse_upper, mortar_l2.reverse_upper,
+                                fstar_upper_right, fstar_tmp1)
+    add_multiply_dimensionwise!(view(surface_flux_values, :, :, :, direction,
+                                     large_element),
+                                mortar_l2.reverse_lower, mortar_l2.reverse_lower,
+                                fstar_lower_left, fstar_tmp1)
+    add_multiply_dimensionwise!(view(surface_flux_values, :, :, :, direction,
+                                     large_element),
+                                mortar_l2.reverse_upper, mortar_l2.reverse_lower,
+                                fstar_lower_right, fstar_tmp1)
+
+    return nothing
+end
 
 function calc_surface_integral!(du, u, mesh::Union{TreeMesh{3}, StructuredMesh{3}},
                                 equations, surface_integral, dg::DGSEM, cache)
-  @unpack boundary_interpolation = dg.basis
-  @unpack surface_flux_values = cache.elements
-
-  # Access the factors only once before beginning the loop to increase performance.
-  # We also use explicit assignments instead of `+=` and `-=` to let `@muladd`
-  # turn these into FMAs (see comment at the top of the file).
-  factor_1 = boundary_interpolation[1,          1]
-  factor_2 = boundary_interpolation[nnodes(dg), 2]
-  @threaded for element in eachelement(dg, cache)
-    for m in eachnode(dg), l in eachnode(dg)
-      for v in eachvariable(equations)
-        # surface at -x
-        du[v, 1,          l, m, element] = (
-          du[v, 1,          l, m, element] - surface_flux_values[v, l, m, 1, element] * factor_1)
-
-        # surface at +x
-        du[v, nnodes(dg), l, m, element] = (
-          du[v, nnodes(dg), l, m, element] + surface_flux_values[v, l, m, 2, element] * factor_2)
-
-        # surface at -y
-        du[v, l, 1,          m, element] = (
-          du[v, l, 1,          m, element] - surface_flux_values[v, l, m, 3, element] * factor_1)
-
-        # surface at +y
-        du[v, l, nnodes(dg), m, element] = (
-          du[v, l, nnodes(dg), m, element]  + surface_flux_values[v, l, m, 4, element] * factor_2)
-
-        # surface at -z
-        du[v, l, m, 1,          element] = (
-          du[v, l, m, 1,          element] - surface_flux_values[v, l, m, 5, element] * factor_1)
-
-        # surface at +z
-        du[v, l, m, nnodes(dg), element] = (
-          du[v, l, m, nnodes(dg), element] + surface_flux_values[v, l, m, 6, element] * factor_2)
-      end
+    @unpack boundary_interpolation = dg.basis
+    @unpack surface_flux_values = cache.elements
+
+    # Access the factors only once before beginning the loop to increase performance.
+    # We also use explicit assignments instead of `+=` and `-=` to let `@muladd`
+    # turn these into FMAs (see comment at the top of the file).
+    factor_1 = boundary_interpolation[1, 1]
+    factor_2 = boundary_interpolation[nnodes(dg), 2]
+    @threaded for element in eachelement(dg, cache)
+        for m in eachnode(dg), l in eachnode(dg)
+            for v in eachvariable(equations)
+                # surface at -x
+                du[v, 1, l, m, element] = (du[v, 1, l, m, element] -
+                                           surface_flux_values[v, l, m, 1, element] *
+                                           factor_1)
+
+                # surface at +x
+                du[v, nnodes(dg), l, m, element] = (du[v, nnodes(dg), l, m, element] +
+                                                    surface_flux_values[v, l, m, 2,
+                                                                        element] *
+                                                    factor_2)
+
+                # surface at -y
+                du[v, l, 1, m, element] = (du[v, l, 1, m, element] -
+                                           surface_flux_values[v, l, m, 3, element] *
+                                           factor_1)
+
+                # surface at +y
+                du[v, l, nnodes(dg), m, element] = (du[v, l, nnodes(dg), m, element] +
+                                                    surface_flux_values[v, l, m, 4,
+                                                                        element] *
+                                                    factor_2)
+
+                # surface at -z
+                du[v, l, m, 1, element] = (du[v, l, m, 1, element] -
+                                           surface_flux_values[v, l, m, 5, element] *
+                                           factor_1)
+
+                # surface at +z
+                du[v, l, m, nnodes(dg), element] = (du[v, l, m, nnodes(dg), element] +
+                                                    surface_flux_values[v, l, m, 6,
+                                                                        element] *
+                                                    factor_2)
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function apply_jacobian!(du, mesh::TreeMesh{3},
                          equations, dg::DG, cache)
+    @threaded for element in eachelement(dg, cache)
+        factor = -cache.elements.inverse_jacobian[element]
 
-  @threaded for element in eachelement(dg, cache)
-    factor = -cache.elements.inverse_jacobian[element]
-
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      for v in eachvariable(equations)
-        du[v, i, j, k, element] *= factor
-      end
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            for v in eachvariable(equations)
+                du[v, i, j, k, element] *= factor
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # TODO: Taal dimension agnostic
 function calc_sources!(du, u, t, source_terms::Nothing,
                        equations::AbstractEquations{3}, dg::DG, cache)
-  return nothing
+    return nothing
 end
 
 function calc_sources!(du, u, t, source_terms,
                        equations::AbstractEquations{3}, dg::DG, cache)
-
-  @threaded for element in eachelement(dg, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, j, k, element)
-      x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i, j, k, element)
-      du_local = source_terms(u_local, x_local, t, equations)
-      add_to_node_vars!(du, du_local, equations, dg, i, j, k, element)
+    @threaded for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, j, k, element)
+            x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i,
+                                      j, k, element)
+            du_local = source_terms(u_local, x_local, t, equations)
+            add_to_node_vars!(du, du_local, equations, dg, i, j, k, element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl b/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl
index 4fe7a5477de..ec3647ed649 100644
--- a/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl
+++ b/src/solvers/dgsem_tree/dg_3d_compressible_euler.jl
@@ -13,7 +13,6 @@
 # We do not wrap this code in `@muladd begin ... end` block. Optimizations like
 # this are handled automatically by LoopVectorization.jl.
 
-
 # We specialize on `PtrArray` since these will be returned by `Trixi.wrap_array`
 # if LoopVectorization.jl can handle the array types. This ensures that `@turbo`
 # works efficiently here.
@@ -23,244 +22,245 @@
                                            equations::CompressibleEulerEquations3D,
                                            volume_flux::typeof(flux_shima_etal_turbo),
                                            dg::DGSEM, cache, alpha)
-  @unpack derivative_split = dg.basis
-
-  # Create a temporary array that will be used to store the RHS with permuted
-  # indices `[i, j, k, v]` to allow using SIMD instructions.
-  # `StrideArray`s with purely static dimensions do not allocate on the heap.
-  du = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations))))
-
-  # Convert conserved to primitive variables on the given `element`.
-  u_prim = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations))))
-
-  @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    rho    = u_cons[1, i, j, k, element]
-    rho_v1 = u_cons[2, i, j, k, element]
-    rho_v2 = u_cons[3, i, j, k, element]
-    rho_v3 = u_cons[4, i, j, k, element]
-    rho_e  = u_cons[5, i, j, k, element]
-
-    v1 = rho_v1 / rho
-    v2 = rho_v2 / rho
-    v3 = rho_v3 / rho
-    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
-
-    u_prim[i, j, k, 1] = rho
-    u_prim[i, j, k, 2] = v1
-    u_prim[i, j, k, 3] = v2
-    u_prim[i, j, k, 4] = v3
-    u_prim[i, j, k, 5] = p
-  end
-
-
-  # x direction
-  # At first, we create new temporary arrays with permuted memory layout to
-  # allow using SIMD instructions along the first dimension (which is contiguous
-  # in memory).
-  du_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations))))
-
-  u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations))))
-
-  @turbo for v in eachvariable(equations),
-             k in eachnode(dg),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    jk = j + nnodes(dg) * (k- 1)
-    u_prim_permuted[jk, i, v] = u_prim[i, j, k, v]
-  end
-  fill!(du_permuted, zero(eltype(du_permuted)))
-
-  # Next, we basically inline the volume flux. To allow SIMD vectorization and
-  # still use the symmetry of the volume flux and the derivative matrix, we
-  # loop over the triangular part in an outer loop and use a plain inner loop.
-  for i in eachnode(dg), ii in (i+1):nnodes(dg)
-    @turbo for jk in Base.OneTo(nnodes(dg)^2)
-      rho_ll = u_prim_permuted[jk, i, 1]
-      v1_ll  = u_prim_permuted[jk, i, 2]
-      v2_ll  = u_prim_permuted[jk, i, 3]
-      v3_ll  = u_prim_permuted[jk, i, 4]
-      p_ll   = u_prim_permuted[jk, i, 5]
-
-      rho_rr = u_prim_permuted[jk, ii, 1]
-      v1_rr  = u_prim_permuted[jk, ii, 2]
-      v2_rr  = u_prim_permuted[jk, ii, 3]
-      v3_rr  = u_prim_permuted[jk, ii, 4]
-      p_rr   = u_prim_permuted[jk, ii, 5]
-
-      # Compute required mean values
-      rho_avg = 0.5 * (rho_ll + rho_rr)
-      v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-      v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-      v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-      p_avg   = 0.5 * (  p_ll +   p_rr)
-      kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-      pv1_avg = 0.5 * (p_ll * v1_rr + p_rr * v1_ll)
-
-      # Calculate fluxes depending on Cartesian orientation
-      f1 = rho_avg * v1_avg
-      f2 = f1 * v1_avg + p_avg
-      f3 = f1 * v2_avg
-      f4 = f1 * v3_avg
-      f5 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg
-
-      # Add scaled fluxes to RHS
-      factor_i = alpha * derivative_split[i, ii]
-      du_permuted[jk, i, 1] += factor_i * f1
-      du_permuted[jk, i, 2] += factor_i * f2
-      du_permuted[jk, i, 3] += factor_i * f3
-      du_permuted[jk, i, 4] += factor_i * f4
-      du_permuted[jk, i, 5] += factor_i * f5
-
-      factor_ii = alpha * derivative_split[ii, i]
-      du_permuted[jk, ii, 1] += factor_ii * f1
-      du_permuted[jk, ii, 2] += factor_ii * f2
-      du_permuted[jk, ii, 3] += factor_ii * f3
-      du_permuted[jk, ii, 4] += factor_ii * f4
-      du_permuted[jk, ii, 5] += factor_ii * f5
+    @unpack derivative_split = dg.basis
+
+    # Create a temporary array that will be used to store the RHS with permuted
+    # indices `[i, j, k, v]` to allow using SIMD instructions.
+    # `StrideArray`s with purely static dimensions do not allocate on the heap.
+    du = StrideArray{eltype(u_cons)}(undef,
+                                     (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
+                                      StaticInt(nvariables(equations))))
+
+    # Convert conserved to primitive variables on the given `element`.
+    u_prim = StrideArray{eltype(u_cons)}(undef,
+                                         (ntuple(_ -> StaticInt(nnodes(dg)),
+                                                 ndims(mesh))...,
+                                          StaticInt(nvariables(equations))))
+
+    @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        rho = u_cons[1, i, j, k, element]
+        rho_v1 = u_cons[2, i, j, k, element]
+        rho_v2 = u_cons[3, i, j, k, element]
+        rho_v3 = u_cons[4, i, j, k, element]
+        rho_e = u_cons[5, i, j, k, element]
+
+        v1 = rho_v1 / rho
+        v2 = rho_v2 / rho
+        v3 = rho_v3 / rho
+        p = (equations.gamma - 1) *
+            (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
+
+        u_prim[i, j, k, 1] = rho
+        u_prim[i, j, k, 2] = v1
+        u_prim[i, j, k, 3] = v2
+        u_prim[i, j, k, 4] = v3
+        u_prim[i, j, k, 5] = p
     end
-  end
-
-  @turbo for v in eachvariable(equations),
-             k in eachnode(dg),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    jk = j + nnodes(dg) * (k- 1)
-    du[i, j, k, v] = du_permuted[jk, i, v]
-  end
-
-
-  # y direction
-  # A possible permutation of array dimensions with improved opportunities for
-  # SIMD vectorization appeared to be slower than the direct version used here
-  # in preliminary numerical experiments on an AVX2 system.
-  for j in eachnode(dg), jj in (j+1):nnodes(dg)
-    @turbo for k in eachnode(dg), i in eachnode(dg)
-      rho_ll = u_prim[i, j, k, 1]
-      v1_ll  = u_prim[i, j, k, 2]
-      v2_ll  = u_prim[i, j, k, 3]
-      v3_ll  = u_prim[i, j, k, 4]
-      p_ll   = u_prim[i, j, k, 5]
-
-      rho_rr = u_prim[i, jj, k, 1]
-      v1_rr  = u_prim[i, jj, k, 2]
-      v2_rr  = u_prim[i, jj, k, 3]
-      v3_rr  = u_prim[i, jj, k, 4]
-      p_rr   = u_prim[i, jj, k, 5]
-
-      # Compute required mean values
-      rho_avg = 0.5 * (rho_ll + rho_rr)
-      v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-      v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-      v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-      p_avg   = 0.5 * (  p_ll +   p_rr)
-      kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-      pv2_avg = 0.5 * (p_ll * v2_rr + p_rr * v2_ll)
-
-      # Calculate fluxes depending on Cartesian orientation
-      f1 = rho_avg * v2_avg
-      f2 = f1 * v1_avg
-      f3 = f1 * v2_avg + p_avg
-      f4 = f1 * v3_avg
-      f5 = p_avg*v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg
-
-      # Add scaled fluxes to RHS
-      factor_j = alpha * derivative_split[j, jj]
-      du[i, j, k, 1] += factor_j * f1
-      du[i, j, k, 2] += factor_j * f2
-      du[i, j, k, 3] += factor_j * f3
-      du[i, j, k, 4] += factor_j * f4
-      du[i, j, k, 5] += factor_j * f5
-
-      factor_jj = alpha * derivative_split[jj, j]
-      du[i, jj, k, 1] += factor_jj * f1
-      du[i, jj, k, 2] += factor_jj * f2
-      du[i, jj, k, 3] += factor_jj * f3
-      du[i, jj, k, 4] += factor_jj * f4
-      du[i, jj, k, 5] += factor_jj * f5
+
+    # x direction
+    # At first, we create new temporary arrays with permuted memory layout to
+    # allow using SIMD instructions along the first dimension (which is contiguous
+    # in memory).
+    du_permuted = StrideArray{eltype(u_cons)}(undef,
+                                              (StaticInt(nnodes(dg)^2),
+                                               StaticInt(nnodes(dg)),
+                                               StaticInt(nvariables(equations))))
+
+    u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
+                                                  (StaticInt(nnodes(dg)^2),
+                                                   StaticInt(nnodes(dg)),
+                                                   StaticInt(nvariables(equations))))
+
+    @turbo for v in eachvariable(equations),
+               k in eachnode(dg),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        jk = j + nnodes(dg) * (k - 1)
+        u_prim_permuted[jk, i, v] = u_prim[i, j, k, v]
     end
-  end
-
-
-  # z direction
-  # The memory layout is already optimal for SIMD vectorization in this loop.
-  # We just squeeze the first two dimensions to make the code slightly faster.
-  GC.@preserve u_prim begin
-    u_prim_reshaped = PtrArray(pointer(u_prim),
-      (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-       StaticInt(nvariables(equations))))
-
-    du_reshaped = PtrArray(pointer(du),
-      (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-       StaticInt(nvariables(equations))))
-
-    for k in eachnode(dg), kk in (k+1):nnodes(dg)
-      @turbo for ij in Base.OneTo(nnodes(dg)^2)
-        rho_ll = u_prim_reshaped[ij, k, 1]
-        v1_ll  = u_prim_reshaped[ij, k, 2]
-        v2_ll  = u_prim_reshaped[ij, k, 3]
-        v3_ll  = u_prim_reshaped[ij, k, 4]
-        p_ll   = u_prim_reshaped[ij, k, 5]
-
-        rho_rr = u_prim_reshaped[ij, kk, 1]
-        v1_rr  = u_prim_reshaped[ij, kk, 2]
-        v2_rr  = u_prim_reshaped[ij, kk, 3]
-        v3_rr  = u_prim_reshaped[ij, kk, 4]
-        p_rr   = u_prim_reshaped[ij, kk, 5]
-
-        # Compute required mean values
-        rho_avg = 0.5 * (rho_ll + rho_rr)
-        v1_avg  = 0.5 * ( v1_ll +  v1_rr)
-        v2_avg  = 0.5 * ( v2_ll +  v2_rr)
-        v3_avg  = 0.5 * ( v3_ll +  v3_rr)
-        p_avg   = 0.5 * (  p_ll +   p_rr)
-        kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-        pv3_avg = 0.5 * (p_ll * v3_rr + p_rr * v3_ll)
-
-        # Calculate fluxes depending on Cartesian orientation
-        f1 = rho_avg * v3_avg
-        f2 = f1 * v1_avg
-        f3 = f1 * v2_avg
-        f4 = f1 * v3_avg + p_avg
-        f5 = p_avg*v3_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv3_avg
-
-        # Add scaled fluxes to RHS
-        factor_k = alpha * derivative_split[k, kk]
-        du_reshaped[ij, k, 1] += factor_k * f1
-        du_reshaped[ij, k, 2] += factor_k * f2
-        du_reshaped[ij, k, 3] += factor_k * f3
-        du_reshaped[ij, k, 4] += factor_k * f4
-        du_reshaped[ij, k, 5] += factor_k * f5
-
-        factor_kk = alpha * derivative_split[kk, k]
-        du_reshaped[ij, kk, 1] += factor_kk * f1
-        du_reshaped[ij, kk, 2] += factor_kk * f2
-        du_reshaped[ij, kk, 3] += factor_kk * f3
-        du_reshaped[ij, kk, 4] += factor_kk * f4
-        du_reshaped[ij, kk, 5] += factor_kk * f5
-      end
+    fill!(du_permuted, zero(eltype(du_permuted)))
+
+    # Next, we basically inline the volume flux. To allow SIMD vectorization and
+    # still use the symmetry of the volume flux and the derivative matrix, we
+    # loop over the triangular part in an outer loop and use a plain inner loop.
+    for i in eachnode(dg), ii in (i + 1):nnodes(dg)
+        @turbo for jk in Base.OneTo(nnodes(dg)^2)
+            rho_ll = u_prim_permuted[jk, i, 1]
+            v1_ll = u_prim_permuted[jk, i, 2]
+            v2_ll = u_prim_permuted[jk, i, 3]
+            v3_ll = u_prim_permuted[jk, i, 4]
+            p_ll = u_prim_permuted[jk, i, 5]
+
+            rho_rr = u_prim_permuted[jk, ii, 1]
+            v1_rr = u_prim_permuted[jk, ii, 2]
+            v2_rr = u_prim_permuted[jk, ii, 3]
+            v3_rr = u_prim_permuted[jk, ii, 4]
+            p_rr = u_prim_permuted[jk, ii, 5]
+
+            # Compute required mean values
+            rho_avg = 0.5 * (rho_ll + rho_rr)
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            v3_avg = 0.5 * (v3_ll + v3_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+            pv1_avg = 0.5 * (p_ll * v1_rr + p_rr * v1_ll)
+
+            # Calculate fluxes depending on Cartesian orientation
+            f1 = rho_avg * v1_avg
+            f2 = f1 * v1_avg + p_avg
+            f3 = f1 * v2_avg
+            f4 = f1 * v3_avg
+            f5 = p_avg * v1_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv1_avg
+
+            # Add scaled fluxes to RHS
+            factor_i = alpha * derivative_split[i, ii]
+            du_permuted[jk, i, 1] += factor_i * f1
+            du_permuted[jk, i, 2] += factor_i * f2
+            du_permuted[jk, i, 3] += factor_i * f3
+            du_permuted[jk, i, 4] += factor_i * f4
+            du_permuted[jk, i, 5] += factor_i * f5
+
+            factor_ii = alpha * derivative_split[ii, i]
+            du_permuted[jk, ii, 1] += factor_ii * f1
+            du_permuted[jk, ii, 2] += factor_ii * f2
+            du_permuted[jk, ii, 3] += factor_ii * f3
+            du_permuted[jk, ii, 4] += factor_ii * f4
+            du_permuted[jk, ii, 5] += factor_ii * f5
+        end
     end
-  end # GC.@preserve u_prim
 
+    @turbo for v in eachvariable(equations),
+               k in eachnode(dg),
+               j in eachnode(dg),
+               i in eachnode(dg)
 
-  # Finally, we add the temporary RHS computed here to the global RHS in the
-  # given `element`.
-  @turbo for v in eachvariable(equations),
-             k in eachnode(dg),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    _du[v, i, j, k, element] += du[i, j, k, v]
-  end
-end
+        jk = j + nnodes(dg) * (k - 1)
+        du[i, j, k, v] = du_permuted[jk, i, v]
+    end
 
+    # y direction
+    # A possible permutation of array dimensions with improved opportunities for
+    # SIMD vectorization appeared to be slower than the direct version used here
+    # in preliminary numerical experiments on an AVX2 system.
+    for j in eachnode(dg), jj in (j + 1):nnodes(dg)
+        @turbo for k in eachnode(dg), i in eachnode(dg)
+            rho_ll = u_prim[i, j, k, 1]
+            v1_ll = u_prim[i, j, k, 2]
+            v2_ll = u_prim[i, j, k, 3]
+            v3_ll = u_prim[i, j, k, 4]
+            p_ll = u_prim[i, j, k, 5]
+
+            rho_rr = u_prim[i, jj, k, 1]
+            v1_rr = u_prim[i, jj, k, 2]
+            v2_rr = u_prim[i, jj, k, 3]
+            v3_rr = u_prim[i, jj, k, 4]
+            p_rr = u_prim[i, jj, k, 5]
+
+            # Compute required mean values
+            rho_avg = 0.5 * (rho_ll + rho_rr)
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            v3_avg = 0.5 * (v3_ll + v3_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+            pv2_avg = 0.5 * (p_ll * v2_rr + p_rr * v2_ll)
+
+            # Calculate fluxes depending on Cartesian orientation
+            f1 = rho_avg * v2_avg
+            f2 = f1 * v1_avg
+            f3 = f1 * v2_avg + p_avg
+            f4 = f1 * v3_avg
+            f5 = p_avg * v2_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv2_avg
+
+            # Add scaled fluxes to RHS
+            factor_j = alpha * derivative_split[j, jj]
+            du[i, j, k, 1] += factor_j * f1
+            du[i, j, k, 2] += factor_j * f2
+            du[i, j, k, 3] += factor_j * f3
+            du[i, j, k, 4] += factor_j * f4
+            du[i, j, k, 5] += factor_j * f5
+
+            factor_jj = alpha * derivative_split[jj, j]
+            du[i, jj, k, 1] += factor_jj * f1
+            du[i, jj, k, 2] += factor_jj * f2
+            du[i, jj, k, 3] += factor_jj * f3
+            du[i, jj, k, 4] += factor_jj * f4
+            du[i, jj, k, 5] += factor_jj * f5
+        end
+    end
 
+    # z direction
+    # The memory layout is already optimal for SIMD vectorization in this loop.
+    # We just squeeze the first two dimensions to make the code slightly faster.
+    GC.@preserve u_prim begin
+        u_prim_reshaped = PtrArray(pointer(u_prim),
+                                   (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
+                                    StaticInt(nvariables(equations))))
+
+        du_reshaped = PtrArray(pointer(du),
+                               (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
+                                StaticInt(nvariables(equations))))
+
+        for k in eachnode(dg), kk in (k + 1):nnodes(dg)
+            @turbo for ij in Base.OneTo(nnodes(dg)^2)
+                rho_ll = u_prim_reshaped[ij, k, 1]
+                v1_ll = u_prim_reshaped[ij, k, 2]
+                v2_ll = u_prim_reshaped[ij, k, 3]
+                v3_ll = u_prim_reshaped[ij, k, 4]
+                p_ll = u_prim_reshaped[ij, k, 5]
+
+                rho_rr = u_prim_reshaped[ij, kk, 1]
+                v1_rr = u_prim_reshaped[ij, kk, 2]
+                v2_rr = u_prim_reshaped[ij, kk, 3]
+                v3_rr = u_prim_reshaped[ij, kk, 4]
+                p_rr = u_prim_reshaped[ij, kk, 5]
+
+                # Compute required mean values
+                rho_avg = 0.5 * (rho_ll + rho_rr)
+                v1_avg = 0.5 * (v1_ll + v1_rr)
+                v2_avg = 0.5 * (v2_ll + v2_rr)
+                v3_avg = 0.5 * (v3_ll + v3_rr)
+                p_avg = 0.5 * (p_ll + p_rr)
+                kin_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+                pv3_avg = 0.5 * (p_ll * v3_rr + p_rr * v3_ll)
+
+                # Calculate fluxes depending on Cartesian orientation
+                f1 = rho_avg * v3_avg
+                f2 = f1 * v1_avg
+                f3 = f1 * v2_avg
+                f4 = f1 * v3_avg + p_avg
+                f5 = p_avg * v3_avg * equations.inv_gamma_minus_one + f1 * kin_avg + pv3_avg
+
+                # Add scaled fluxes to RHS
+                factor_k = alpha * derivative_split[k, kk]
+                du_reshaped[ij, k, 1] += factor_k * f1
+                du_reshaped[ij, k, 2] += factor_k * f2
+                du_reshaped[ij, k, 3] += factor_k * f3
+                du_reshaped[ij, k, 4] += factor_k * f4
+                du_reshaped[ij, k, 5] += factor_k * f5
+
+                factor_kk = alpha * derivative_split[kk, k]
+                du_reshaped[ij, kk, 1] += factor_kk * f1
+                du_reshaped[ij, kk, 2] += factor_kk * f2
+                du_reshaped[ij, kk, 3] += factor_kk * f3
+                du_reshaped[ij, kk, 4] += factor_kk * f4
+                du_reshaped[ij, kk, 5] += factor_kk * f5
+            end
+        end
+    end # GC.@preserve u_prim
+
+    # Finally, we add the temporary RHS computed here to the global RHS in the
+    # given `element`.
+    @turbo for v in eachvariable(equations),
+               k in eachnode(dg),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        _du[v, i, j, k, element] += du[i, j, k, v]
+    end
+end
 
 @inline function flux_differencing_kernel!(_du::PtrArray, u_cons::PtrArray,
                                            element, mesh::TreeMesh{3},
@@ -268,341 +268,350 @@ end
                                            equations::CompressibleEulerEquations3D,
                                            volume_flux::typeof(flux_ranocha_turbo),
                                            dg::DGSEM, cache, alpha)
-  @unpack derivative_split = dg.basis
-
-  # Create a temporary array that will be used to store the RHS with permuted
-  # indices `[i, j, k, v]` to allow using SIMD instructions.
-  # `StrideArray`s with purely static dimensions do not allocate on the heap.
-  du = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations))))
-
-  # Convert conserved to primitive variables on the given `element`. In addition
-  # to the usual primitive variables, we also compute logarithms of the density
-  # and pressure to increase the performance of the required logarithmic mean
-  # values.
-  u_prim = StrideArray{eltype(u_cons)}(undef,
-    (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
-     StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs
-
-  @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    rho    = u_cons[1, i, j, k, element]
-    rho_v1 = u_cons[2, i, j, k, element]
-    rho_v2 = u_cons[3, i, j, k, element]
-    rho_v3 = u_cons[4, i, j, k, element]
-    rho_e  = u_cons[5, i, j, k, element]
-
-    v1 = rho_v1 / rho
-    v2 = rho_v2 / rho
-    v3 = rho_v3 / rho
-    p = (equations.gamma - 1) * (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
-
-    u_prim[i, j, k, 1] = rho
-    u_prim[i, j, k, 2] = v1
-    u_prim[i, j, k, 3] = v2
-    u_prim[i, j, k, 4] = v3
-    u_prim[i, j, k, 5] = p
-    u_prim[i, j, k, 6] = log(rho)
-    u_prim[i, j, k, 7] = log(p)
-  end
-
-
-  # x direction
-  # At first, we create new temporary arrays with permuted memory layout to
-  # allow using SIMD instructions along the first dimension (which is contiguous
-  # in memory).
-  du_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations))))
-
-  u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
-    (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-     StaticInt(nvariables(equations) + 2)))
-
-  @turbo for v in indices(u_prim, 4), # v in eachvariable(equations) misses +2 logs
-             k in eachnode(dg),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    jk = j + nnodes(dg) * (k- 1)
-    u_prim_permuted[jk, i, v] = u_prim[i, j, k, v]
-  end
-  fill!(du_permuted, zero(eltype(du_permuted)))
-
-  # Next, we basically inline the volume flux. To allow SIMD vectorization and
-  # still use the symmetry of the volume flux and the derivative matrix, we
-  # loop over the triangular part in an outer loop and use a plain inner loop.
-  for i in eachnode(dg), ii in (i+1):nnodes(dg)
-    @turbo for jk in Base.OneTo(nnodes(dg)^2)
-      rho_ll     = u_prim_permuted[jk, i, 1]
-      v1_ll      = u_prim_permuted[jk, i, 2]
-      v2_ll      = u_prim_permuted[jk, i, 3]
-      v3_ll      = u_prim_permuted[jk, i, 4]
-      p_ll       = u_prim_permuted[jk, i, 5]
-      log_rho_ll = u_prim_permuted[jk, i, 6]
-      log_p_ll   = u_prim_permuted[jk, i, 7]
-
-      rho_rr     = u_prim_permuted[jk, ii, 1]
-      v1_rr      = u_prim_permuted[jk, ii, 2]
-      v2_rr      = u_prim_permuted[jk, ii, 3]
-      v3_rr      = u_prim_permuted[jk, ii, 4]
-      p_rr       = u_prim_permuted[jk, ii, 5]
-      log_rho_rr = u_prim_permuted[jk, ii, 6]
-      log_p_rr   = u_prim_permuted[jk, ii, 7]
-
-      # Compute required mean values
-      # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
-      # it efficiently. This is equivalent to
-      #   rho_mean = ln_mean(rho_ll, rho_rr)
-      x1 = rho_ll
-      log_x1 = log_rho_ll
-      y1 = rho_rr
-      log_y1 = log_rho_rr
-      x1_plus_y1 = x1 + y1
-      y1_minus_x1 = y1 - x1
-      z1 = y1_minus_x1^2 / x1_plus_y1^2
-      special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1)))
-      regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
-      rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
-
-      # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-      # in exact arithmetic since
-      #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-      #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-      # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-      x2 = rho_ll * p_rr
-      log_x2 = log_rho_ll + log_p_rr
-      y2 = rho_rr * p_ll
-      log_y2 = log_rho_rr + log_p_ll
-      x2_plus_y2 = x2 + y2
-      y2_minus_x2 = y2 - x2
-      z2 = y2_minus_x2^2 / x2_plus_y2^2
-      special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2
-      regular_path2 = (log_y2 - log_x2) / y2_minus_x2
-      inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
-
-      v1_avg = 0.5 * (v1_ll + v1_rr)
-      v2_avg = 0.5 * (v2_ll + v2_rr)
-      v3_avg = 0.5 * (v3_ll + v3_rr)
-      p_avg  = 0.5 * ( p_ll +  p_rr)
-      velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-
-      # Calculate fluxes depending on Cartesian orientation
-      f1 = rho_mean * v1_avg
-      f2 = f1 * v1_avg + p_avg
-      f3 = f1 * v2_avg
-      f4 = f1 * v3_avg
-      f5 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v1_rr + p_rr*v1_ll)
-
-      # Add scaled fluxes to RHS
-      factor_i = alpha * derivative_split[i, ii]
-      du_permuted[jk, i, 1] += factor_i * f1
-      du_permuted[jk, i, 2] += factor_i * f2
-      du_permuted[jk, i, 3] += factor_i * f3
-      du_permuted[jk, i, 4] += factor_i * f4
-      du_permuted[jk, i, 5] += factor_i * f5
-
-      factor_ii = alpha * derivative_split[ii, i]
-      du_permuted[jk, ii, 1] += factor_ii * f1
-      du_permuted[jk, ii, 2] += factor_ii * f2
-      du_permuted[jk, ii, 3] += factor_ii * f3
-      du_permuted[jk, ii, 4] += factor_ii * f4
-      du_permuted[jk, ii, 5] += factor_ii * f5
+    @unpack derivative_split = dg.basis
+
+    # Create a temporary array that will be used to store the RHS with permuted
+    # indices `[i, j, k, v]` to allow using SIMD instructions.
+    # `StrideArray`s with purely static dimensions do not allocate on the heap.
+    du = StrideArray{eltype(u_cons)}(undef,
+                                     (ntuple(_ -> StaticInt(nnodes(dg)), ndims(mesh))...,
+                                      StaticInt(nvariables(equations))))
+
+    # Convert conserved to primitive variables on the given `element`. In addition
+    # to the usual primitive variables, we also compute logarithms of the density
+    # and pressure to increase the performance of the required logarithmic mean
+    # values.
+    u_prim = StrideArray{eltype(u_cons)}(undef,
+                                         (ntuple(_ -> StaticInt(nnodes(dg)),
+                                                 ndims(mesh))...,
+                                          StaticInt(nvariables(equations) + 2))) # We also compute "+ 2" logs
+
+    @turbo for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        rho = u_cons[1, i, j, k, element]
+        rho_v1 = u_cons[2, i, j, k, element]
+        rho_v2 = u_cons[3, i, j, k, element]
+        rho_v3 = u_cons[4, i, j, k, element]
+        rho_e = u_cons[5, i, j, k, element]
+
+        v1 = rho_v1 / rho
+        v2 = rho_v2 / rho
+        v3 = rho_v3 / rho
+        p = (equations.gamma - 1) *
+            (rho_e - 0.5 * (rho_v1 * v1 + rho_v2 * v2 + rho_v3 * v3))
+
+        u_prim[i, j, k, 1] = rho
+        u_prim[i, j, k, 2] = v1
+        u_prim[i, j, k, 3] = v2
+        u_prim[i, j, k, 4] = v3
+        u_prim[i, j, k, 5] = p
+        u_prim[i, j, k, 6] = log(rho)
+        u_prim[i, j, k, 7] = log(p)
     end
-  end
-
-  @turbo for v in eachvariable(equations),
-             k in eachnode(dg),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    jk = j + nnodes(dg) * (k- 1)
-    du[i, j, k, v] = du_permuted[jk, i, v]
-  end
-
-
-  # y direction
-  # A possible permutation of array dimensions with improved opportunities for
-  # SIMD vectorization appeared to be slower than the direct version used here
-  # in preliminary numerical experiments on an AVX2 system.
-  for j in eachnode(dg), jj in (j+1):nnodes(dg)
-    @turbo for k in eachnode(dg), i in eachnode(dg)
-      rho_ll     = u_prim[i, j, k, 1]
-      v1_ll      = u_prim[i, j, k, 2]
-      v2_ll      = u_prim[i, j, k, 3]
-      v3_ll      = u_prim[i, j, k, 4]
-      p_ll       = u_prim[i, j, k, 5]
-      log_rho_ll = u_prim[i, j, k, 6]
-      log_p_ll   = u_prim[i, j, k, 7]
-
-      rho_rr     = u_prim[i, jj, k, 1]
-      v1_rr      = u_prim[i, jj, k, 2]
-      v2_rr      = u_prim[i, jj, k, 3]
-      v3_rr      = u_prim[i, jj, k, 4]
-      p_rr       = u_prim[i, jj, k, 5]
-      log_rho_rr = u_prim[i, jj, k, 6]
-      log_p_rr   = u_prim[i, jj, k, 7]
-
-      # Compute required mean values
-      # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
-      # it efficiently. This is equivalent to
-      #   rho_mean = ln_mean(rho_ll, rho_rr)
-      x1 = rho_ll
-      log_x1 = log_rho_ll
-      y1 = rho_rr
-      log_y1 = log_rho_rr
-      x1_plus_y1 = x1 + y1
-      y1_minus_x1 = y1 - x1
-      z1 = y1_minus_x1^2 / x1_plus_y1^2
-      special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1)))
-      regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
-      rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
-
-      # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-      # in exact arithmetic since
-      #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-      #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-      # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-      x2 = rho_ll * p_rr
-      log_x2 = log_rho_ll + log_p_rr
-      y2 = rho_rr * p_ll
-      log_y2 = log_rho_rr + log_p_ll
-      x2_plus_y2 = x2 + y2
-      y2_minus_x2 = y2 - x2
-      z2 = y2_minus_x2^2 / x2_plus_y2^2
-      special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2
-      regular_path2 = (log_y2 - log_x2) / y2_minus_x2
-      inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
-
-      v1_avg = 0.5 * (v1_ll + v1_rr)
-      v2_avg = 0.5 * (v2_ll + v2_rr)
-      v3_avg = 0.5 * (v3_ll + v3_rr)
-      p_avg  = 0.5 * ( p_ll +  p_rr)
-      velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-
-      # Calculate fluxes depending on Cartesian orientation
-      f1 = rho_mean * v2_avg
-      f2 = f1 * v1_avg
-      f3 = f1 * v2_avg + p_avg
-      f4 = f1 * v3_avg
-      f5 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v2_rr + p_rr*v2_ll)
-
-      # Add scaled fluxes to RHS
-      factor_j = alpha * derivative_split[j, jj]
-      du[i, j, k, 1] += factor_j * f1
-      du[i, j, k, 2] += factor_j * f2
-      du[i, j, k, 3] += factor_j * f3
-      du[i, j, k, 4] += factor_j * f4
-      du[i, j, k, 5] += factor_j * f5
-
-      factor_jj = alpha * derivative_split[jj, j]
-      du[i, jj, k, 1] += factor_jj * f1
-      du[i, jj, k, 2] += factor_jj * f2
-      du[i, jj, k, 3] += factor_jj * f3
-      du[i, jj, k, 4] += factor_jj * f4
-      du[i, jj, k, 5] += factor_jj * f5
+
+    # x direction
+    # At first, we create new temporary arrays with permuted memory layout to
+    # allow using SIMD instructions along the first dimension (which is contiguous
+    # in memory).
+    du_permuted = StrideArray{eltype(u_cons)}(undef,
+                                              (StaticInt(nnodes(dg)^2),
+                                               StaticInt(nnodes(dg)),
+                                               StaticInt(nvariables(equations))))
+
+    u_prim_permuted = StrideArray{eltype(u_cons)}(undef,
+                                                  (StaticInt(nnodes(dg)^2),
+                                                   StaticInt(nnodes(dg)),
+                                                   StaticInt(nvariables(equations) + 2)))
+
+    @turbo for v in indices(u_prim, 4), # v in eachvariable(equations) misses +2 logs
+               k in eachnode(dg),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        jk = j + nnodes(dg) * (k - 1)
+        u_prim_permuted[jk, i, v] = u_prim[i, j, k, v]
     end
-  end
-
-
-  # z direction
-  # The memory layout is already optimal for SIMD vectorization in this loop.
-  # We just squeeze the first two dimensions to make the code slightly faster.
-  GC.@preserve u_prim begin
-    u_prim_reshaped = PtrArray(pointer(u_prim),
-       (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-        StaticInt(nvariables(equations) + 2)))
-
-    du_reshaped = PtrArray(pointer(du),
-      (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
-       StaticInt(nvariables(equations))))
-
-    for k in eachnode(dg), kk in (k+1):nnodes(dg)
-      @turbo for ij in Base.OneTo(nnodes(dg)^2)
-        rho_ll     = u_prim_reshaped[ij, k, 1]
-        v1_ll      = u_prim_reshaped[ij, k, 2]
-        v2_ll      = u_prim_reshaped[ij, k, 3]
-        v3_ll      = u_prim_reshaped[ij, k, 4]
-        p_ll       = u_prim_reshaped[ij, k, 5]
-        log_rho_ll = u_prim_reshaped[ij, k, 6]
-        log_p_ll   = u_prim_reshaped[ij, k, 7]
-
-        rho_rr     = u_prim_reshaped[ij, kk, 1]
-        v1_rr      = u_prim_reshaped[ij, kk, 2]
-        v2_rr      = u_prim_reshaped[ij, kk, 3]
-        v3_rr      = u_prim_reshaped[ij, kk, 4]
-        p_rr       = u_prim_reshaped[ij, kk, 5]
-        log_rho_rr = u_prim_reshaped[ij, kk, 6]
-        log_p_rr   = u_prim_reshaped[ij, kk, 7]
-
-        # Compute required mean values
-        # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
-        # it efficiently. This is equivalent to
-        #   rho_mean = ln_mean(rho_ll, rho_rr)
-        x1 = rho_ll
-        log_x1 = log_rho_ll
-        y1 = rho_rr
-        log_y1 = log_rho_rr
-        x1_plus_y1 = x1 + y1
-        y1_minus_x1 = y1 - x1
-        z1 = y1_minus_x1^2 / x1_plus_y1^2
-        special_path1 = x1_plus_y1 / (2 + z1*(2/3 + z1*(2/5 + 2/7*z1)))
-        regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
-        rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
-
-        # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
-        # in exact arithmetic since
-        #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
-        #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
-        # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
-        x2 = rho_ll * p_rr
-        log_x2 = log_rho_ll + log_p_rr
-        y2 = rho_rr * p_ll
-        log_y2 = log_rho_rr + log_p_ll
-        x2_plus_y2 = x2 + y2
-        y2_minus_x2 = y2 - x2
-        z2 = y2_minus_x2^2 / x2_plus_y2^2
-        special_path2 = (2 + z2*(2/3 + z2*(2/5 + 2/7*z2))) / x2_plus_y2
-        regular_path2 = (log_y2 - log_x2) / y2_minus_x2
-        inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
-
-        v1_avg = 0.5 * (v1_ll + v1_rr)
-        v2_avg = 0.5 * (v2_ll + v2_rr)
-        v3_avg = 0.5 * (v3_ll + v3_rr)
-        p_avg  = 0.5 * ( p_ll +  p_rr)
-        velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
-
-        # Calculate fluxes depending on Cartesian orientation
-        f1 = rho_mean * v3_avg
-        f2 = f1 * v1_avg
-        f3 = f1 * v2_avg
-        f4 = f1 * v3_avg + p_avg
-        f5 = f1 * ( velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one ) + 0.5 * (p_ll*v3_rr + p_rr*v3_ll)
-
-        # Add scaled fluxes to RHS
-        factor_k = alpha * derivative_split[k, kk]
-        du_reshaped[ij, k, 1] += factor_k * f1
-        du_reshaped[ij, k, 2] += factor_k * f2
-        du_reshaped[ij, k, 3] += factor_k * f3
-        du_reshaped[ij, k, 4] += factor_k * f4
-        du_reshaped[ij, k, 5] += factor_k * f5
-
-        factor_kk = alpha * derivative_split[kk, k]
-        du_reshaped[ij, kk, 1] += factor_kk * f1
-        du_reshaped[ij, kk, 2] += factor_kk * f2
-        du_reshaped[ij, kk, 3] += factor_kk * f3
-        du_reshaped[ij, kk, 4] += factor_kk * f4
-        du_reshaped[ij, kk, 5] += factor_kk * f5
-      end
+    fill!(du_permuted, zero(eltype(du_permuted)))
+
+    # Next, we basically inline the volume flux. To allow SIMD vectorization and
+    # still use the symmetry of the volume flux and the derivative matrix, we
+    # loop over the triangular part in an outer loop and use a plain inner loop.
+    for i in eachnode(dg), ii in (i + 1):nnodes(dg)
+        @turbo for jk in Base.OneTo(nnodes(dg)^2)
+            rho_ll = u_prim_permuted[jk, i, 1]
+            v1_ll = u_prim_permuted[jk, i, 2]
+            v2_ll = u_prim_permuted[jk, i, 3]
+            v3_ll = u_prim_permuted[jk, i, 4]
+            p_ll = u_prim_permuted[jk, i, 5]
+            log_rho_ll = u_prim_permuted[jk, i, 6]
+            log_p_ll = u_prim_permuted[jk, i, 7]
+
+            rho_rr = u_prim_permuted[jk, ii, 1]
+            v1_rr = u_prim_permuted[jk, ii, 2]
+            v2_rr = u_prim_permuted[jk, ii, 3]
+            v3_rr = u_prim_permuted[jk, ii, 4]
+            p_rr = u_prim_permuted[jk, ii, 5]
+            log_rho_rr = u_prim_permuted[jk, ii, 6]
+            log_p_rr = u_prim_permuted[jk, ii, 7]
+
+            # Compute required mean values
+            # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
+            # it efficiently. This is equivalent to
+            #   rho_mean = ln_mean(rho_ll, rho_rr)
+            x1 = rho_ll
+            log_x1 = log_rho_ll
+            y1 = rho_rr
+            log_y1 = log_rho_rr
+            x1_plus_y1 = x1 + y1
+            y1_minus_x1 = y1 - x1
+            z1 = y1_minus_x1^2 / x1_plus_y1^2
+            special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1)))
+            regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
+            rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
+
+            # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+            # in exact arithmetic since
+            #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+            #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+            # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+            x2 = rho_ll * p_rr
+            log_x2 = log_rho_ll + log_p_rr
+            y2 = rho_rr * p_ll
+            log_y2 = log_rho_rr + log_p_ll
+            x2_plus_y2 = x2 + y2
+            y2_minus_x2 = y2 - x2
+            z2 = y2_minus_x2^2 / x2_plus_y2^2
+            special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2
+            regular_path2 = (log_y2 - log_x2) / y2_minus_x2
+            inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
+
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            v3_avg = 0.5 * (v3_ll + v3_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+            # Calculate fluxes depending on Cartesian orientation
+            f1 = rho_mean * v1_avg
+            f2 = f1 * v1_avg + p_avg
+            f3 = f1 * v2_avg
+            f4 = f1 * v3_avg
+            f5 = f1 *
+                 (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) +
+                 0.5 * (p_ll * v1_rr + p_rr * v1_ll)
+
+            # Add scaled fluxes to RHS
+            factor_i = alpha * derivative_split[i, ii]
+            du_permuted[jk, i, 1] += factor_i * f1
+            du_permuted[jk, i, 2] += factor_i * f2
+            du_permuted[jk, i, 3] += factor_i * f3
+            du_permuted[jk, i, 4] += factor_i * f4
+            du_permuted[jk, i, 5] += factor_i * f5
+
+            factor_ii = alpha * derivative_split[ii, i]
+            du_permuted[jk, ii, 1] += factor_ii * f1
+            du_permuted[jk, ii, 2] += factor_ii * f2
+            du_permuted[jk, ii, 3] += factor_ii * f3
+            du_permuted[jk, ii, 4] += factor_ii * f4
+            du_permuted[jk, ii, 5] += factor_ii * f5
+        end
     end
-  end # GC.@preserve u_prim
 
+    @turbo for v in eachvariable(equations),
+               k in eachnode(dg),
+               j in eachnode(dg),
+               i in eachnode(dg)
 
-  # Finally, we add the temporary RHS computed here to the global RHS in the
-  # given `element`.
-  @turbo for v in eachvariable(equations),
-             k in eachnode(dg),
-             j in eachnode(dg),
-             i in eachnode(dg)
-    _du[v, i, j, k, element] += du[i, j, k, v]
-  end
-end
+        jk = j + nnodes(dg) * (k - 1)
+        du[i, j, k, v] = du_permuted[jk, i, v]
+    end
 
+    # y direction
+    # A possible permutation of array dimensions with improved opportunities for
+    # SIMD vectorization appeared to be slower than the direct version used here
+    # in preliminary numerical experiments on an AVX2 system.
+    for j in eachnode(dg), jj in (j + 1):nnodes(dg)
+        @turbo for k in eachnode(dg), i in eachnode(dg)
+            rho_ll = u_prim[i, j, k, 1]
+            v1_ll = u_prim[i, j, k, 2]
+            v2_ll = u_prim[i, j, k, 3]
+            v3_ll = u_prim[i, j, k, 4]
+            p_ll = u_prim[i, j, k, 5]
+            log_rho_ll = u_prim[i, j, k, 6]
+            log_p_ll = u_prim[i, j, k, 7]
+
+            rho_rr = u_prim[i, jj, k, 1]
+            v1_rr = u_prim[i, jj, k, 2]
+            v2_rr = u_prim[i, jj, k, 3]
+            v3_rr = u_prim[i, jj, k, 4]
+            p_rr = u_prim[i, jj, k, 5]
+            log_rho_rr = u_prim[i, jj, k, 6]
+            log_p_rr = u_prim[i, jj, k, 7]
+
+            # Compute required mean values
+            # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
+            # it efficiently. This is equivalent to
+            #   rho_mean = ln_mean(rho_ll, rho_rr)
+            x1 = rho_ll
+            log_x1 = log_rho_ll
+            y1 = rho_rr
+            log_y1 = log_rho_rr
+            x1_plus_y1 = x1 + y1
+            y1_minus_x1 = y1 - x1
+            z1 = y1_minus_x1^2 / x1_plus_y1^2
+            special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1)))
+            regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
+            rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
+
+            # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+            # in exact arithmetic since
+            #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+            #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+            # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+            x2 = rho_ll * p_rr
+            log_x2 = log_rho_ll + log_p_rr
+            y2 = rho_rr * p_ll
+            log_y2 = log_rho_rr + log_p_ll
+            x2_plus_y2 = x2 + y2
+            y2_minus_x2 = y2 - x2
+            z2 = y2_minus_x2^2 / x2_plus_y2^2
+            special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2
+            regular_path2 = (log_y2 - log_x2) / y2_minus_x2
+            inv_rho_p_mean = p_ll * p_rr * ifelse(z2 < 1.0e-4, special_path2, regular_path2)
+
+            v1_avg = 0.5 * (v1_ll + v1_rr)
+            v2_avg = 0.5 * (v2_ll + v2_rr)
+            v3_avg = 0.5 * (v3_ll + v3_rr)
+            p_avg = 0.5 * (p_ll + p_rr)
+            velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+            # Calculate fluxes depending on Cartesian orientation
+            f1 = rho_mean * v2_avg
+            f2 = f1 * v1_avg
+            f3 = f1 * v2_avg + p_avg
+            f4 = f1 * v3_avg
+            f5 = f1 *
+                 (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) +
+                 0.5 * (p_ll * v2_rr + p_rr * v2_ll)
+
+            # Add scaled fluxes to RHS
+            factor_j = alpha * derivative_split[j, jj]
+            du[i, j, k, 1] += factor_j * f1
+            du[i, j, k, 2] += factor_j * f2
+            du[i, j, k, 3] += factor_j * f3
+            du[i, j, k, 4] += factor_j * f4
+            du[i, j, k, 5] += factor_j * f5
+
+            factor_jj = alpha * derivative_split[jj, j]
+            du[i, jj, k, 1] += factor_jj * f1
+            du[i, jj, k, 2] += factor_jj * f2
+            du[i, jj, k, 3] += factor_jj * f3
+            du[i, jj, k, 4] += factor_jj * f4
+            du[i, jj, k, 5] += factor_jj * f5
+        end
+    end
+
+    # z direction
+    # The memory layout is already optimal for SIMD vectorization in this loop.
+    # We just squeeze the first two dimensions to make the code slightly faster.
+    GC.@preserve u_prim begin
+        u_prim_reshaped = PtrArray(pointer(u_prim),
+                                   (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
+                                    StaticInt(nvariables(equations) + 2)))
+
+        du_reshaped = PtrArray(pointer(du),
+                               (StaticInt(nnodes(dg)^2), StaticInt(nnodes(dg)),
+                                StaticInt(nvariables(equations))))
+
+        for k in eachnode(dg), kk in (k + 1):nnodes(dg)
+            @turbo for ij in Base.OneTo(nnodes(dg)^2)
+                rho_ll = u_prim_reshaped[ij, k, 1]
+                v1_ll = u_prim_reshaped[ij, k, 2]
+                v2_ll = u_prim_reshaped[ij, k, 3]
+                v3_ll = u_prim_reshaped[ij, k, 4]
+                p_ll = u_prim_reshaped[ij, k, 5]
+                log_rho_ll = u_prim_reshaped[ij, k, 6]
+                log_p_ll = u_prim_reshaped[ij, k, 7]
+
+                rho_rr = u_prim_reshaped[ij, kk, 1]
+                v1_rr = u_prim_reshaped[ij, kk, 2]
+                v2_rr = u_prim_reshaped[ij, kk, 3]
+                v3_rr = u_prim_reshaped[ij, kk, 4]
+                p_rr = u_prim_reshaped[ij, kk, 5]
+                log_rho_rr = u_prim_reshaped[ij, kk, 6]
+                log_p_rr = u_prim_reshaped[ij, kk, 7]
+
+                # Compute required mean values
+                # We inline the logarithmic mean to allow LoopVectorization.jl to optimize
+                # it efficiently. This is equivalent to
+                #   rho_mean = ln_mean(rho_ll, rho_rr)
+                x1 = rho_ll
+                log_x1 = log_rho_ll
+                y1 = rho_rr
+                log_y1 = log_rho_rr
+                x1_plus_y1 = x1 + y1
+                y1_minus_x1 = y1 - x1
+                z1 = y1_minus_x1^2 / x1_plus_y1^2
+                special_path1 = x1_plus_y1 / (2 + z1 * (2 / 3 + z1 * (2 / 5 + 2 / 7 * z1)))
+                regular_path1 = y1_minus_x1 / (log_y1 - log_x1)
+                rho_mean = ifelse(z1 < 1.0e-4, special_path1, regular_path1)
+
+                # Algebraically equivalent to `inv_ln_mean(rho_ll / p_ll, rho_rr / p_rr)`
+                # in exact arithmetic since
+                #     log((ϱₗ/pₗ) / (ϱᵣ/pᵣ)) / (ϱₗ/pₗ - ϱᵣ/pᵣ)
+                #   = pₗ pᵣ log((ϱₗ pᵣ) / (ϱᵣ pₗ)) / (ϱₗ pᵣ - ϱᵣ pₗ)
+                # inv_rho_p_mean = p_ll * p_rr * inv_ln_mean(rho_ll * p_rr, rho_rr * p_ll)
+                x2 = rho_ll * p_rr
+                log_x2 = log_rho_ll + log_p_rr
+                y2 = rho_rr * p_ll
+                log_y2 = log_rho_rr + log_p_ll
+                x2_plus_y2 = x2 + y2
+                y2_minus_x2 = y2 - x2
+                z2 = y2_minus_x2^2 / x2_plus_y2^2
+                special_path2 = (2 + z2 * (2 / 3 + z2 * (2 / 5 + 2 / 7 * z2))) / x2_plus_y2
+                regular_path2 = (log_y2 - log_x2) / y2_minus_x2
+                inv_rho_p_mean = p_ll * p_rr *
+                                 ifelse(z2 < 1.0e-4, special_path2, regular_path2)
+
+                v1_avg = 0.5 * (v1_ll + v1_rr)
+                v2_avg = 0.5 * (v2_ll + v2_rr)
+                v3_avg = 0.5 * (v3_ll + v3_rr)
+                p_avg = 0.5 * (p_ll + p_rr)
+                velocity_square_avg = 0.5 * (v1_ll * v1_rr + v2_ll * v2_rr + v3_ll * v3_rr)
+
+                # Calculate fluxes depending on Cartesian orientation
+                f1 = rho_mean * v3_avg
+                f2 = f1 * v1_avg
+                f3 = f1 * v2_avg
+                f4 = f1 * v3_avg + p_avg
+                f5 = f1 *
+                     (velocity_square_avg + inv_rho_p_mean * equations.inv_gamma_minus_one) +
+                     0.5 * (p_ll * v3_rr + p_rr * v3_ll)
+
+                # Add scaled fluxes to RHS
+                factor_k = alpha * derivative_split[k, kk]
+                du_reshaped[ij, k, 1] += factor_k * f1
+                du_reshaped[ij, k, 2] += factor_k * f2
+                du_reshaped[ij, k, 3] += factor_k * f3
+                du_reshaped[ij, k, 4] += factor_k * f4
+                du_reshaped[ij, k, 5] += factor_k * f5
+
+                factor_kk = alpha * derivative_split[kk, k]
+                du_reshaped[ij, kk, 1] += factor_kk * f1
+                du_reshaped[ij, kk, 2] += factor_kk * f2
+                du_reshaped[ij, kk, 3] += factor_kk * f3
+                du_reshaped[ij, kk, 4] += factor_kk * f4
+                du_reshaped[ij, kk, 5] += factor_kk * f5
+            end
+        end
+    end # GC.@preserve u_prim
+
+    # Finally, we add the temporary RHS computed here to the global RHS in the
+    # given `element`.
+    @turbo for v in eachvariable(equations),
+               k in eachnode(dg),
+               j in eachnode(dg),
+               i in eachnode(dg)
+
+        _du[v, i, j, k, element] += du[i, j, k, v]
+    end
+end
diff --git a/src/solvers/dgsem_tree/dg_3d_parabolic.jl b/src/solvers/dgsem_tree/dg_3d_parabolic.jl
index d4a197de172..d6d74637021 100644
--- a/src/solvers/dgsem_tree/dg_3d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_3d_parabolic.jl
@@ -3,6 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 # This file collects all methods that have been updated to work with parabolic systems of equations
 #
@@ -12,74 +13,93 @@
 #               2. compute f(u, grad(u))
 #               3. compute div(f(u, grad(u))) (i.e., the "regular" rhs! call)
 # boundary conditions will be applied to both grad(u) and div(f(u, grad(u))).
-function rhs_parabolic!(du, u, t, mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic,
+function rhs_parabolic!(du, u, t, mesh::TreeMesh{3},
+                        equations_parabolic::AbstractEquationsParabolic,
                         initial_condition, boundary_conditions_parabolic, source_terms,
                         dg::DG, parabolic_scheme, cache, cache_parabolic)
-  @unpack u_transformed, gradients, flux_viscous = cache_parabolic
-
-  # Convert conservative variables to a form more suitable for viscous flux calculations
-  @trixi_timeit timer() "transform variables" transform_variables!(
-    u_transformed, u, mesh, equations_parabolic, dg, parabolic_scheme, cache, cache_parabolic)
-
-  # Compute the gradients of the transformed variables
-  @trixi_timeit timer() "calculate gradient" calc_gradient!(
-    gradients, u_transformed, t, mesh, equations_parabolic, boundary_conditions_parabolic, dg,
-    cache, cache_parabolic)
-
-  # Compute and store the viscous fluxes
-  @trixi_timeit timer() "calculate viscous fluxes" calc_viscous_fluxes!(
-    flux_viscous, gradients, u_transformed, mesh, equations_parabolic, dg, cache, cache_parabolic)
-
-  # The remainder of this function is essentially a regular rhs! for parabolic equations (i.e., it
-  # computes the divergence of the viscous fluxes)
-  #
-  # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have
-  # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the
-  # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the
-  # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it
-  # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values*
-  # and *not the solution*.  The advantage is that a) we do not need to allocate more storage, b) we
-  # do not need to recreate the existing data structure only with a different name, and c) we do not
-  # need to interpolate solutions *and* gradients to the surfaces.
-
-  # TODO: parabolic; reconsider current data structure reuse strategy
-
-  # Reset du
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, flux_viscous, mesh, equations_parabolic, dg, cache)
-
-  # Prolong solution to interfaces
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache_parabolic, flux_viscous, mesh, equations_parabolic, dg.surface_integral, dg, cache)
-
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache_parabolic.elements.surface_flux_values, mesh, equations_parabolic, dg, cache_parabolic)
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache_parabolic, flux_viscous, mesh, equations_parabolic, dg.surface_integral, dg, cache)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux_divergence!(
-    cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic,
-    dg.surface_integral, dg)
-
-  # TODO: parabolic; extend to mortars
-  @assert nmortars(dg, cache) == 0
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations_parabolic, dg.surface_integral, dg, cache_parabolic)
-
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" apply_jacobian_parabolic!(
-    du, mesh, equations_parabolic, dg, cache_parabolic)
-
-  return nothing
+    @unpack u_transformed, gradients, flux_viscous = cache_parabolic
+
+    # Convert conservative variables to a form more suitable for viscous flux calculations
+    @trixi_timeit timer() "transform variables" begin
+        transform_variables!(u_transformed, u, mesh, equations_parabolic,
+                             dg, parabolic_scheme, cache, cache_parabolic)
+    end
+
+    # Compute the gradients of the transformed variables
+    @trixi_timeit timer() "calculate gradient" begin
+        calc_gradient!(gradients, u_transformed, t, mesh, equations_parabolic,
+                       boundary_conditions_parabolic, dg, cache, cache_parabolic)
+    end
+
+    # Compute and store the viscous fluxes
+    @trixi_timeit timer() "calculate viscous fluxes" begin
+        calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh,
+                             equations_parabolic, dg, cache, cache_parabolic)
+    end
+
+    # The remainder of this function is essentially a regular rhs! for parabolic
+    # equations (i.e., it computes the divergence of the viscous fluxes)
+    #
+    # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have
+    # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the
+    # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the
+    # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it
+    # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values*
+    # and *not the solution*.  The advantage is that a) we do not need to allocate more storage, b) we
+    # do not need to recreate the existing data structure only with a different name, and c) we do not
+    # need to interpolate solutions *and* gradients to the surfaces.
+
+    # TODO: parabolic; reconsider current data structure reuse strategy
+
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, flux_viscous, mesh, equations_parabolic, dg, cache)
+    end
+
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache_parabolic, flux_viscous, mesh, equations_parabolic,
+                            dg.surface_integral, dg, cache)
+    end
+
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache_parabolic.elements.surface_flux_values, mesh,
+                             equations_parabolic, dg, cache_parabolic)
+    end
+
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache_parabolic, flux_viscous, mesh, equations_parabolic,
+                            dg.surface_integral, dg, cache)
+    end
+
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux_divergence!(cache_parabolic, t,
+                                       boundary_conditions_parabolic,
+                                       mesh, equations_parabolic,
+                                       dg.surface_integral, dg)
+    end
+
+    # TODO: parabolic; extend to mortars
+    @assert nmortars(dg, cache) == 0
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations_parabolic,
+                               dg.surface_integral, dg, cache_parabolic)
+    end
+
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" begin
+        apply_jacobian_parabolic!(du, mesh, equations_parabolic, dg, cache_parabolic)
+    end
+
+    return nothing
 end
 
 # Transform solution variables prior to taking the gradient
@@ -88,580 +108,716 @@ end
 function transform_variables!(u_transformed, u, mesh::TreeMesh{3},
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, parabolic_scheme, cache, cache_parabolic)
-  @threaded for element in eachelement(dg, cache)
-    # Calculate volume terms in one element
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations_parabolic, dg, i, j, k, element)
-      u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node, equations_parabolic)
-      set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg, i, j, k, element)
+    @threaded for element in eachelement(dg, cache)
+        # Calculate volume terms in one element
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations_parabolic, dg, i, j, k, element)
+            u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node,
+                                                                                       equations_parabolic)
+            set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg,
+                           i, j, k, element)
+        end
     end
-  end
 end
 
 # This is the version used when calculating the divergence of the viscous fluxes
 function calc_volume_integral!(du, flux_viscous,
-                               mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic,
+                               mesh::TreeMesh{3},
+                               equations_parabolic::AbstractEquationsParabolic,
                                dg::DGSEM, cache)
-  @unpack derivative_dhat = dg.basis
-  flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
-
-  @threaded for element in eachelement(dg, cache)
-    # Calculate volume terms in one element
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      flux_1_node = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j, k, element)
-      flux_2_node = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j, k, element)
-      flux_3_node = get_node_vars(flux_viscous_z, equations_parabolic, dg, i, j, k, element)
-
-      for ii in eachnode(dg)
-        multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node, equations_parabolic, dg, ii, j, k, element)
-      end
-
-      for jj in eachnode(dg)
-        multiply_add_to_node_vars!(du, derivative_dhat[jj, j], flux_2_node, equations_parabolic, dg, i, jj, k, element)
-      end
-
-      for kk in eachnode(dg)
-        multiply_add_to_node_vars!(du, derivative_dhat[kk, k], flux_3_node, equations_parabolic, dg, i, j, kk, element)
-      end
+    @unpack derivative_dhat = dg.basis
+    flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
+
+    @threaded for element in eachelement(dg, cache)
+        # Calculate volume terms in one element
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            flux_1_node = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j,
+                                        k, element)
+            flux_2_node = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j,
+                                        k, element)
+            flux_3_node = get_node_vars(flux_viscous_z, equations_parabolic, dg, i, j,
+                                        k, element)
+
+            for ii in eachnode(dg)
+                multiply_add_to_node_vars!(du, derivative_dhat[ii, i], flux_1_node,
+                                           equations_parabolic, dg, ii, j, k, element)
+            end
+
+            for jj in eachnode(dg)
+                multiply_add_to_node_vars!(du, derivative_dhat[jj, j], flux_2_node,
+                                           equations_parabolic, dg, i, jj, k, element)
+            end
+
+            for kk in eachnode(dg)
+                multiply_add_to_node_vars!(du, derivative_dhat[kk, k], flux_3_node,
+                                           equations_parabolic, dg, i, j, kk, element)
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # This is the version used when calculating the divergence of the viscous fluxes
 # We pass the `surface_integral` argument solely for dispatch
 function prolong2interfaces!(cache_parabolic, flux_viscous,
-                             mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic,
+                             mesh::TreeMesh{3},
+                             equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
-  @unpack interfaces = cache_parabolic
-  @unpack orientations = interfaces
-
-  flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
-
-  @threaded for interface in eachinterface(dg, cache)
-    left_element  = interfaces.neighbor_ids[1, interface]
-    right_element = interfaces.neighbor_ids[2, interface]
-
-    if orientations[interface] == 1
-      # interface in x-direction
-      for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations_parabolic)
-        # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-        interfaces.u[1, v, j, k, interface] = flux_viscous_x[v, nnodes(dg), j, k, left_element]
-        interfaces.u[2, v, j, k, interface] = flux_viscous_x[v,          1, j, k, right_element]
-      end
-    elseif orientations[interface] == 2
-      # interface in y-direction
-      for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations_parabolic)
-        # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-        interfaces.u[1, v, i, k, interface] = flux_viscous_y[v, i, nnodes(dg), k, left_element]
-        interfaces.u[2, v, i, k, interface] = flux_viscous_y[v, i,          1, k, right_element]
-      end
-    else # if orientations[interface] == 3
-      # interface in z-direction
-      for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations_parabolic)
-        # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-        interfaces.u[1, v, i, j, interface] = flux_viscous_z[v, i, j, nnodes(dg), left_element]
-        interfaces.u[2, v, i, j, interface] = flux_viscous_z[v, i, j,          1, right_element]
-      end
+    @unpack interfaces = cache_parabolic
+    @unpack orientations = interfaces
+
+    flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
+
+    @threaded for interface in eachinterface(dg, cache)
+        left_element = interfaces.neighbor_ids[1, interface]
+        right_element = interfaces.neighbor_ids[2, interface]
+
+        if orientations[interface] == 1
+            # interface in x-direction
+            for k in eachnode(dg), j in eachnode(dg),
+                v in eachvariable(equations_parabolic)
+                # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
+                interfaces.u[1, v, j, k, interface] = flux_viscous_x[v, nnodes(dg), j,
+                                                                     k, left_element]
+                interfaces.u[2, v, j, k, interface] = flux_viscous_x[v, 1, j, k,
+                                                                     right_element]
+            end
+        elseif orientations[interface] == 2
+            # interface in y-direction
+            for k in eachnode(dg), i in eachnode(dg),
+                v in eachvariable(equations_parabolic)
+                # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
+                interfaces.u[1, v, i, k, interface] = flux_viscous_y[v, i, nnodes(dg),
+                                                                     k, left_element]
+                interfaces.u[2, v, i, k, interface] = flux_viscous_y[v, i, 1, k,
+                                                                     right_element]
+            end
+        else # if orientations[interface] == 3
+            # interface in z-direction
+            for j in eachnode(dg), i in eachnode(dg),
+                v in eachvariable(equations_parabolic)
+                # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
+                interfaces.u[1, v, i, j, interface] = flux_viscous_z[v, i, j,
+                                                                     nnodes(dg),
+                                                                     left_element]
+                interfaces.u[2, v, i, j, interface] = flux_viscous_z[v, i, j, 1,
+                                                                     right_element]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # This is the version used when calculating the divergence of the viscous fluxes
 function calc_interface_flux!(surface_flux_values,
                               mesh::TreeMesh{3}, equations_parabolic,
                               dg::DG, cache_parabolic)
-  @unpack neighbor_ids, orientations = cache_parabolic.interfaces
-
-  @threaded for interface in eachinterface(dg, cache_parabolic)
-    # Get neighboring elements
-    left_id  = neighbor_ids[1, interface]
-    right_id = neighbor_ids[2, interface]
-
-    # Determine interface direction with respect to elements:
-    # orientation = 1: left -> 2, right -> 1
-    # orientation = 2: left -> 4, right -> 3
-    # orientation = 3: left -> 6, right -> 5
-    left_direction  = 2 * orientations[interface]
-    right_direction = 2 * orientations[interface] - 1
-
-    for j in eachnode(dg), i in eachnode(dg)
-      # Get precomputed fluxes at interfaces
-      flux_ll, flux_rr = get_surface_node_vars(cache_parabolic.interfaces.u, equations_parabolic,
-                                               dg, i, j, interface)
-
-      # Compute interface flux as mean of left and right viscous fluxes
-      # TODO: parabolic; only BR1 at the moment
-      flux = 0.5 * (flux_ll + flux_rr)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations_parabolic)
-        surface_flux_values[v, i, j, left_direction,  left_id]  = flux[v]
-        surface_flux_values[v, i, j, right_direction, right_id] = flux[v]
-      end
+    @unpack neighbor_ids, orientations = cache_parabolic.interfaces
+
+    @threaded for interface in eachinterface(dg, cache_parabolic)
+        # Get neighboring elements
+        left_id = neighbor_ids[1, interface]
+        right_id = neighbor_ids[2, interface]
+
+        # Determine interface direction with respect to elements:
+        # orientation = 1: left -> 2, right -> 1
+        # orientation = 2: left -> 4, right -> 3
+        # orientation = 3: left -> 6, right -> 5
+        left_direction = 2 * orientations[interface]
+        right_direction = 2 * orientations[interface] - 1
+
+        for j in eachnode(dg), i in eachnode(dg)
+            # Get precomputed fluxes at interfaces
+            flux_ll, flux_rr = get_surface_node_vars(cache_parabolic.interfaces.u,
+                                                     equations_parabolic,
+                                                     dg, i, j, interface)
+
+            # Compute interface flux as mean of left and right viscous fluxes
+            # TODO: parabolic; only BR1 at the moment
+            flux = 0.5 * (flux_ll + flux_rr)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations_parabolic)
+                surface_flux_values[v, i, j, left_direction, left_id] = flux[v]
+                surface_flux_values[v, i, j, right_direction, right_id] = flux[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # This is the version used when calculating the divergence of the viscous fluxes
 function prolong2boundaries!(cache_parabolic, flux_viscous,
-                             mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic,
+                             mesh::TreeMesh{3},
+                             equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
-  @unpack boundaries = cache_parabolic
-  @unpack orientations, neighbor_sides = boundaries
-  flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
-
-  @threaded for boundary in eachboundary(dg, cache_parabolic)
-    element = boundaries.neighbor_ids[boundary]
-
-    if orientations[boundary] == 1
-      # boundary in x-direction
-      if neighbor_sides[boundary] == 1
-        # element in -x direction of boundary
-        for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations_parabolic)
-          # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-          boundaries.u[1, v, j, k, boundary] = flux_viscous_x[v, nnodes(dg), j, k, element]
-        end
-      else # Element in +x direction of boundary
-        for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations_parabolic)
-          # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-          boundaries.u[2, v, j, k, boundary] = flux_viscous_x[v, 1,          j, k, element]
-        end
-      end
-    elseif orientations[boundary] == 2
-      # boundary in y-direction
-      if neighbor_sides[boundary] == 1
-        # element in -y direction of boundary
-        for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations_parabolic)
-          # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-          boundaries.u[1, v, i, k, boundary] = flux_viscous_y[v, i, nnodes(dg), k, element]
-        end
-      else
-        # element in +y direction of boundary
-        for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations_parabolic)
-          # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-          boundaries.u[2, v, i, k, boundary] = flux_viscous_y[v, i, 1,          k, element]
-        end
-      end
-    else # if orientations[boundary] == 3
-      # boundary in z-direction
-      if neighbor_sides[boundary] == 1
-        # element in -z direction of boundary
-        for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations_parabolic)
-          # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-          boundaries.u[1, v, i, j, boundary] = flux_viscous_z[v, i, j, nnodes(dg), element]
+    @unpack boundaries = cache_parabolic
+    @unpack orientations, neighbor_sides = boundaries
+    flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
+
+    @threaded for boundary in eachboundary(dg, cache_parabolic)
+        element = boundaries.neighbor_ids[boundary]
+
+        if orientations[boundary] == 1
+            # boundary in x-direction
+            if neighbor_sides[boundary] == 1
+                # element in -x direction of boundary
+                for k in eachnode(dg), j in eachnode(dg),
+                    v in eachvariable(equations_parabolic)
+                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries.u[1, v, j, k, boundary] = flux_viscous_x[v, nnodes(dg),
+                                                                        j, k, element]
+                end
+            else # Element in +x direction of boundary
+                for k in eachnode(dg), j in eachnode(dg),
+                    v in eachvariable(equations_parabolic)
+                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries.u[2, v, j, k, boundary] = flux_viscous_x[v, 1, j, k,
+                                                                        element]
+                end
+            end
+        elseif orientations[boundary] == 2
+            # boundary in y-direction
+            if neighbor_sides[boundary] == 1
+                # element in -y direction of boundary
+                for k in eachnode(dg), i in eachnode(dg),
+                    v in eachvariable(equations_parabolic)
+                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries.u[1, v, i, k, boundary] = flux_viscous_y[v, i,
+                                                                        nnodes(dg), k,
+                                                                        element]
+                end
+            else
+                # element in +y direction of boundary
+                for k in eachnode(dg), i in eachnode(dg),
+                    v in eachvariable(equations_parabolic)
+                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries.u[2, v, i, k, boundary] = flux_viscous_y[v, i, 1, k,
+                                                                        element]
+                end
+            end
+        else # if orientations[boundary] == 3
+            # boundary in z-direction
+            if neighbor_sides[boundary] == 1
+                # element in -z direction of boundary
+                for j in eachnode(dg), i in eachnode(dg),
+                    v in eachvariable(equations_parabolic)
+                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries.u[1, v, i, j, boundary] = flux_viscous_z[v, i, j,
+                                                                        nnodes(dg),
+                                                                        element]
+                end
+            else
+                # element in +z direction of boundary
+                for j in eachnode(dg), i in eachnode(dg),
+                    v in eachvariable(equations_parabolic)
+                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries.u[2, v, i, j, boundary] = flux_viscous_z[v, i, j, 1,
+                                                                        element]
+                end
+            end
         end
-      else
-        # element in +z direction of boundary
-        for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations_parabolic)
-          # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-          boundaries.u[2, v, i, j, boundary] = flux_viscous_z[v, i, j, 1,          element]
-        end
-      end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh::TreeMesh{3},
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, cache, cache_parabolic)
-  gradients_x, gradients_y, gradients_z = gradients
-  flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous # output arrays
-
-  @threaded for element in eachelement(dg, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      # Get solution and gradients
-      u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, k, element)
-      gradients_1_node = get_node_vars(gradients_x, equations_parabolic, dg, i, j, k, element)
-      gradients_2_node = get_node_vars(gradients_y, equations_parabolic, dg, i, j, k, element)
-      gradients_3_node = get_node_vars(gradients_z, equations_parabolic, dg, i, j, k, element)
-
-      # Calculate viscous flux and store each component for later use
-      flux_viscous_node_x = flux(u_node, (gradients_1_node, gradients_2_node, gradients_3_node), 1, equations_parabolic)
-      flux_viscous_node_y = flux(u_node, (gradients_1_node, gradients_2_node, gradients_3_node), 2, equations_parabolic)
-      flux_viscous_node_z = flux(u_node, (gradients_1_node, gradients_2_node, gradients_3_node), 3, equations_parabolic)
-      set_node_vars!(flux_viscous_x, flux_viscous_node_x, equations_parabolic, dg, i, j, k, element)
-      set_node_vars!(flux_viscous_y, flux_viscous_node_y, equations_parabolic, dg, i, j, k, element)
-      set_node_vars!(flux_viscous_z, flux_viscous_node_z, equations_parabolic, dg, i, j, k, element)
+    gradients_x, gradients_y, gradients_z = gradients
+    flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous # output arrays
+
+    @threaded for element in eachelement(dg, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            # Get solution and gradients
+            u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, k,
+                                   element)
+            gradients_1_node = get_node_vars(gradients_x, equations_parabolic, dg, i, j,
+                                             k, element)
+            gradients_2_node = get_node_vars(gradients_y, equations_parabolic, dg, i, j,
+                                             k, element)
+            gradients_3_node = get_node_vars(gradients_z, equations_parabolic, dg, i, j,
+                                             k, element)
+
+            # Calculate viscous flux and store each component for later use
+            flux_viscous_node_x = flux(u_node,
+                                       (gradients_1_node, gradients_2_node,
+                                        gradients_3_node), 1, equations_parabolic)
+            flux_viscous_node_y = flux(u_node,
+                                       (gradients_1_node, gradients_2_node,
+                                        gradients_3_node), 2, equations_parabolic)
+            flux_viscous_node_z = flux(u_node,
+                                       (gradients_1_node, gradients_2_node,
+                                        gradients_3_node), 3, equations_parabolic)
+            set_node_vars!(flux_viscous_x, flux_viscous_node_x, equations_parabolic, dg,
+                           i, j, k, element)
+            set_node_vars!(flux_viscous_y, flux_viscous_node_y, equations_parabolic, dg,
+                           i, j, k, element)
+            set_node_vars!(flux_viscous_z, flux_viscous_node_z, equations_parabolic, dg,
+                           i, j, k, element)
+        end
     end
-  end
 end
 
-
 # TODO: parabolic; decide if we should keep this.
 function get_unsigned_normal_vector_3d(direction)
-  if direction > 6 || direction < 1
-    error("Direction = $direction; in 3D, direction should be 1, 2, 3, 4, 5, or 6.")
-  end
-  if direction == 1 || direction == 2
-    return SVector(1.0, 0.0, 0.0)
-  elseif direction == 3 || direction == 4
-    return SVector(0.0, 1.0, 0.0)
-  else
-    return SVector(0.0, 0.0, 1.0)
-  end
+    if direction > 6 || direction < 1
+        error("Direction = $direction; in 3D, direction should be 1, 2, 3, 4, 5, or 6.")
+    end
+    if direction == 1 || direction == 2
+        return SVector(1.0, 0.0, 0.0)
+    elseif direction == 3 || direction == 4
+        return SVector(0.0, 1.0, 0.0)
+    else
+        return SVector(0.0, 0.0, 1.0)
+    end
 end
 
-function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic,
-                                      mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic,
-                                      surface_integral, dg::DG)
-  return nothing
+function calc_boundary_flux_gradients!(cache, t,
+                                       boundary_conditions_parabolic::BoundaryConditionPeriodic,
+                                       mesh::TreeMesh{3},
+                                       equations_parabolic::AbstractEquationsParabolic,
+                                       surface_integral, dg::DG)
+    return nothing
 end
 
-function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::BoundaryConditionPeriodic,
-                                        mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic,
+function calc_boundary_flux_divergence!(cache, t,
+                                        boundary_conditions_parabolic::BoundaryConditionPeriodic,
+                                        mesh::TreeMesh{3},
+                                        equations_parabolic::AbstractEquationsParabolic,
                                         surface_integral, dg::DG)
-  return nothing
+    return nothing
 end
 
-function calc_boundary_flux_gradients!(cache, t, boundary_conditions_parabolic::NamedTuple,
-                                      mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic,
-                                      surface_integral, dg::DG)
-  @unpack surface_flux_values = cache.elements
-  @unpack n_boundaries_per_direction = cache.boundaries
-
-  # Calculate indices
-  lasts = accumulate(+, n_boundaries_per_direction)
-  firsts = lasts - n_boundaries_per_direction .+ 1
-
-  # Calc boundary fluxes in each direction
-  calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[1],
-                                            equations_parabolic, surface_integral, dg, cache,
-                                            1, firsts[1], lasts[1])
-  calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[2],
-                                            equations_parabolic, surface_integral, dg, cache,
-                                            2, firsts[2], lasts[2])
-  calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[3],
-                                            equations_parabolic, surface_integral, dg, cache,
-                                            3, firsts[3], lasts[3])
-  calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[4],
-                                            equations_parabolic, surface_integral, dg, cache,
-                                            4, firsts[4], lasts[4])
-  calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[5],
-                                            equations_parabolic, surface_integral, dg, cache,
-                                            5, firsts[5], lasts[5])
-  calc_boundary_flux_by_direction_gradient!(surface_flux_values, t, boundary_conditions_parabolic[6],
-                                            equations_parabolic, surface_integral, dg, cache,
-                                            6, firsts[6], lasts[6])
+function calc_boundary_flux_gradients!(cache, t,
+                                       boundary_conditions_parabolic::NamedTuple,
+                                       mesh::TreeMesh{3},
+                                       equations_parabolic::AbstractEquationsParabolic,
+                                       surface_integral, dg::DG)
+    @unpack surface_flux_values = cache.elements
+    @unpack n_boundaries_per_direction = cache.boundaries
+
+    # Calculate indices
+    lasts = accumulate(+, n_boundaries_per_direction)
+    firsts = lasts - n_boundaries_per_direction .+ 1
+
+    # Calc boundary fluxes in each direction
+    calc_boundary_flux_by_direction_gradient!(surface_flux_values, t,
+                                              boundary_conditions_parabolic[1],
+                                              equations_parabolic, surface_integral, dg,
+                                              cache,
+                                              1, firsts[1], lasts[1])
+    calc_boundary_flux_by_direction_gradient!(surface_flux_values, t,
+                                              boundary_conditions_parabolic[2],
+                                              equations_parabolic, surface_integral, dg,
+                                              cache,
+                                              2, firsts[2], lasts[2])
+    calc_boundary_flux_by_direction_gradient!(surface_flux_values, t,
+                                              boundary_conditions_parabolic[3],
+                                              equations_parabolic, surface_integral, dg,
+                                              cache,
+                                              3, firsts[3], lasts[3])
+    calc_boundary_flux_by_direction_gradient!(surface_flux_values, t,
+                                              boundary_conditions_parabolic[4],
+                                              equations_parabolic, surface_integral, dg,
+                                              cache,
+                                              4, firsts[4], lasts[4])
+    calc_boundary_flux_by_direction_gradient!(surface_flux_values, t,
+                                              boundary_conditions_parabolic[5],
+                                              equations_parabolic, surface_integral, dg,
+                                              cache,
+                                              5, firsts[5], lasts[5])
+    calc_boundary_flux_by_direction_gradient!(surface_flux_values, t,
+                                              boundary_conditions_parabolic[6],
+                                              equations_parabolic, surface_integral, dg,
+                                              cache,
+                                              6, firsts[6], lasts[6])
 end
 
-
-function calc_boundary_flux_by_direction_gradient!(surface_flux_values::AbstractArray{<:Any,5}, t,
+function calc_boundary_flux_by_direction_gradient!(surface_flux_values::AbstractArray{
+                                                                                      <:Any,
+                                                                                      5
+                                                                                      },
+                                                   t,
                                                    boundary_condition,
                                                    equations_parabolic::AbstractEquationsParabolic,
                                                    surface_integral, dg::DG, cache,
-                                                   direction, first_boundary, last_boundary)
-  @unpack surface_flux = surface_integral
-  @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
-
-  @threaded for boundary in first_boundary:last_boundary
-    # Get neighboring element
-    neighbor = neighbor_ids[boundary]
-
-    for j in eachnode(dg), i in eachnode(dg)
-      # Get boundary flux
-      u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, i, j, boundary)
-      if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
-        u_inner = u_ll
-      else # Element is on the right, boundary on the left
-        u_inner = u_rr
-      end
-
-      # TODO: revisit if we want more general boundary treatments.
-      # This assumes the gradient numerical flux at the boundary is the gradient variable,
-      # which is consistent with BR1, LDG.
-      flux_inner = u_inner
-
-      x = get_node_coords(node_coordinates, equations_parabolic, dg, i, j, boundary)
-      flux = boundary_condition(flux_inner, u_inner, get_unsigned_normal_vector_3d(direction),
-                                x, t, Gradient(), equations_parabolic)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations_parabolic)
-        surface_flux_values[v, i, j, direction, neighbor] = flux[v]
-      end
+                                                   direction, first_boundary,
+                                                   last_boundary)
+    @unpack surface_flux = surface_integral
+    @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
+
+    @threaded for boundary in first_boundary:last_boundary
+        # Get neighboring element
+        neighbor = neighbor_ids[boundary]
+
+        for j in eachnode(dg), i in eachnode(dg)
+            # Get boundary flux
+            u_ll, u_rr = get_surface_node_vars(u, equations_parabolic, dg, i, j,
+                                               boundary)
+            if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
+                u_inner = u_ll
+            else # Element is on the right, boundary on the left
+                u_inner = u_rr
+            end
+
+            # TODO: revisit if we want more general boundary treatments.
+            # This assumes the gradient numerical flux at the boundary is the gradient variable,
+            # which is consistent with BR1, LDG.
+            flux_inner = u_inner
+
+            x = get_node_coords(node_coordinates, equations_parabolic, dg, i, j,
+                                boundary)
+            flux = boundary_condition(flux_inner, u_inner,
+                                      get_unsigned_normal_vector_3d(direction),
+                                      x, t, Gradient(), equations_parabolic)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations_parabolic)
+                surface_flux_values[v, i, j, direction, neighbor] = flux[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-function calc_boundary_flux_divergence!(cache, t, boundary_conditions_parabolic::NamedTuple,
-                                        mesh::TreeMesh{3}, equations_parabolic::AbstractEquationsParabolic,
+function calc_boundary_flux_divergence!(cache, t,
+                                        boundary_conditions_parabolic::NamedTuple,
+                                        mesh::TreeMesh{3},
+                                        equations_parabolic::AbstractEquationsParabolic,
                                         surface_integral, dg::DG)
-  @unpack surface_flux_values = cache.elements
-  @unpack n_boundaries_per_direction = cache.boundaries
-
-  # Calculate indices
-  lasts = accumulate(+, n_boundaries_per_direction)
-  firsts = lasts - n_boundaries_per_direction .+ 1
-
-  # Calc boundary fluxes in each direction
-  calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[1],
-                                              equations_parabolic, surface_integral, dg, cache,
-                                              1, firsts[1], lasts[1])
-  calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[2],
-                                              equations_parabolic, surface_integral, dg, cache,
-                                              2, firsts[2], lasts[2])
-  calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[3],
-                                              equations_parabolic, surface_integral, dg, cache,
-                                              3, firsts[3], lasts[3])
-  calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[4],
-                                              equations_parabolic, surface_integral, dg, cache,
-                                              4, firsts[4], lasts[4])
-  calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[5],
-                                              equations_parabolic, surface_integral, dg, cache,
-                                              5, firsts[5], lasts[5])
-  calc_boundary_flux_by_direction_divergence!(surface_flux_values, t, boundary_conditions_parabolic[6],
-                                              equations_parabolic, surface_integral, dg, cache,
-                                              6, firsts[6], lasts[6])
+    @unpack surface_flux_values = cache.elements
+    @unpack n_boundaries_per_direction = cache.boundaries
+
+    # Calculate indices
+    lasts = accumulate(+, n_boundaries_per_direction)
+    firsts = lasts - n_boundaries_per_direction .+ 1
+
+    # Calc boundary fluxes in each direction
+    calc_boundary_flux_by_direction_divergence!(surface_flux_values, t,
+                                                boundary_conditions_parabolic[1],
+                                                equations_parabolic, surface_integral,
+                                                dg, cache,
+                                                1, firsts[1], lasts[1])
+    calc_boundary_flux_by_direction_divergence!(surface_flux_values, t,
+                                                boundary_conditions_parabolic[2],
+                                                equations_parabolic, surface_integral,
+                                                dg, cache,
+                                                2, firsts[2], lasts[2])
+    calc_boundary_flux_by_direction_divergence!(surface_flux_values, t,
+                                                boundary_conditions_parabolic[3],
+                                                equations_parabolic, surface_integral,
+                                                dg, cache,
+                                                3, firsts[3], lasts[3])
+    calc_boundary_flux_by_direction_divergence!(surface_flux_values, t,
+                                                boundary_conditions_parabolic[4],
+                                                equations_parabolic, surface_integral,
+                                                dg, cache,
+                                                4, firsts[4], lasts[4])
+    calc_boundary_flux_by_direction_divergence!(surface_flux_values, t,
+                                                boundary_conditions_parabolic[5],
+                                                equations_parabolic, surface_integral,
+                                                dg, cache,
+                                                5, firsts[5], lasts[5])
+    calc_boundary_flux_by_direction_divergence!(surface_flux_values, t,
+                                                boundary_conditions_parabolic[6],
+                                                equations_parabolic, surface_integral,
+                                                dg, cache,
+                                                6, firsts[6], lasts[6])
 end
-function calc_boundary_flux_by_direction_divergence!(surface_flux_values::AbstractArray{<:Any,5}, t,
+function calc_boundary_flux_by_direction_divergence!(surface_flux_values::AbstractArray{
+                                                                                        <:Any,
+                                                                                        5
+                                                                                        },
+                                                     t,
                                                      boundary_condition,
                                                      equations_parabolic::AbstractEquationsParabolic,
                                                      surface_integral, dg::DG, cache,
-                                                     direction, first_boundary, last_boundary)
-  @unpack surface_flux = surface_integral
-
-  # Note: cache.boundaries.u contains the unsigned normal component (using "orientation", not "direction")
-  # of the viscous flux, as computed in `prolong2boundaries!`
-  @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
-
-  @threaded for boundary in first_boundary:last_boundary
-    # Get neighboring element
-    neighbor = neighbor_ids[boundary]
-
-    for j in eachnode(dg), i in eachnode(dg)
-      # Get viscous boundary fluxes
-      flux_ll, flux_rr = get_surface_node_vars(u, equations_parabolic, dg, i, j, boundary)
-      if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
-        flux_inner = flux_ll
-      else # Element is on the right, boundary on the left
-        flux_inner = flux_rr
-      end
-
-      x = get_node_coords(node_coordinates, equations_parabolic, dg, i, j, boundary)
-
-      # TODO: add a field in `cache.boundaries` for gradient information. UPDATE THIS COMMENT
-      # Here, we pass in `u_inner = nothing` since we overwrite cache.boundaries.u with gradient information.
-      # This currently works with Dirichlet/Neuman boundary conditions for LaplaceDiffusion3D and
-      # NoSlipWall/Adiabatic boundary conditions for CompressibleNavierStokesDiffusion3D as of 2022-6-27.
-      # It will not work with implementations which utilize `u_inner` to impose boundary conditions.
-      flux = boundary_condition(flux_inner, nothing, get_unsigned_normal_vector_3d(direction),
-                                x, t, Divergence(), equations_parabolic)
-
-      # Copy flux to left and right element storage
-      for v in eachvariable(equations_parabolic)
-        surface_flux_values[v, i, j, direction, neighbor] = flux[v]
-      end
+                                                     direction, first_boundary,
+                                                     last_boundary)
+    @unpack surface_flux = surface_integral
+
+    # Note: cache.boundaries.u contains the unsigned normal component (using "orientation", not "direction")
+    # of the viscous flux, as computed in `prolong2boundaries!`
+    @unpack u, neighbor_ids, neighbor_sides, node_coordinates, orientations = cache.boundaries
+
+    @threaded for boundary in first_boundary:last_boundary
+        # Get neighboring element
+        neighbor = neighbor_ids[boundary]
+
+        for j in eachnode(dg), i in eachnode(dg)
+            # Get viscous boundary fluxes
+            flux_ll, flux_rr = get_surface_node_vars(u, equations_parabolic, dg, i, j,
+                                                     boundary)
+            if neighbor_sides[boundary] == 1 # Element is on the left, boundary on the right
+                flux_inner = flux_ll
+            else # Element is on the right, boundary on the left
+                flux_inner = flux_rr
+            end
+
+            x = get_node_coords(node_coordinates, equations_parabolic, dg, i, j,
+                                boundary)
+
+            # TODO: add a field in `cache.boundaries` for gradient information. UPDATE THIS COMMENT
+            # Here, we pass in `u_inner = nothing` since we overwrite cache.boundaries.u with gradient information.
+            # This currently works with Dirichlet/Neuman boundary conditions for LaplaceDiffusion3D and
+            # NoSlipWall/Adiabatic boundary conditions for CompressibleNavierStokesDiffusion3D as of 2022-6-27.
+            # It will not work with implementations which utilize `u_inner` to impose boundary conditions.
+            flux = boundary_condition(flux_inner, nothing,
+                                      get_unsigned_normal_vector_3d(direction),
+                                      x, t, Divergence(), equations_parabolic)
+
+            # Copy flux to left and right element storage
+            for v in eachvariable(equations_parabolic)
+                surface_flux_values[v, i, j, direction, neighbor] = flux[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Calculate the gradient of the transformed variables
 function calc_gradient!(gradients, u_transformed, t,
                         mesh::TreeMesh{3}, equations_parabolic,
                         boundary_conditions_parabolic, dg::DG, cache, cache_parabolic)
+    gradients_x, gradients_y, gradients_z = gradients
 
-  gradients_x, gradients_y, gradients_z = gradients
-
-  # Reset du
-  @trixi_timeit timer() "reset gradients" begin
-    reset_du!(gradients_x, dg, cache)
-    reset_du!(gradients_y, dg, cache)
-    reset_du!(gradients_z, dg, cache)
-  end
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" begin
-    @unpack derivative_dhat = dg.basis
-    @threaded for element in eachelement(dg, cache)
-
-      # Calculate volume terms in one element
-      for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-        u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, k, element)
+    # Reset du
+    @trixi_timeit timer() "reset gradients" begin
+        reset_du!(gradients_x, dg, cache)
+        reset_du!(gradients_y, dg, cache)
+        reset_du!(gradients_z, dg, cache)
+    end
 
-        for ii in eachnode(dg)
-          multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i], u_node, equations_parabolic, dg, ii, j, k, element)
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        @unpack derivative_dhat = dg.basis
+        @threaded for element in eachelement(dg, cache)
+
+            # Calculate volume terms in one element
+            for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+                u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, k,
+                                       element)
+
+                for ii in eachnode(dg)
+                    multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i],
+                                               u_node, equations_parabolic, dg, ii, j,
+                                               k, element)
+                end
+
+                for jj in eachnode(dg)
+                    multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j],
+                                               u_node, equations_parabolic, dg, i, jj,
+                                               k, element)
+                end
+
+                for kk in eachnode(dg)
+                    multiply_add_to_node_vars!(gradients_z, derivative_dhat[kk, k],
+                                               u_node, equations_parabolic, dg, i, j,
+                                               kk, element)
+                end
+            end
         end
+    end
 
-        for jj in eachnode(dg)
-          multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j], u_node, equations_parabolic, dg, i, jj, k, element)
-        end
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache_parabolic, u_transformed, mesh, equations_parabolic,
+                            dg.surface_integral, dg)
+    end
 
-        for kk in eachnode(dg)
-          multiply_add_to_node_vars!(gradients_z, derivative_dhat[kk, k], u_node, equations_parabolic, dg, i, j, kk, element)
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        @unpack surface_flux_values = cache_parabolic.elements
+        @unpack neighbor_ids, orientations = cache_parabolic.interfaces
+
+        @threaded for interface in eachinterface(dg, cache_parabolic)
+            # Get neighboring elements
+            left_id = neighbor_ids[1, interface]
+            right_id = neighbor_ids[2, interface]
+
+            # Determine interface direction with respect to elements:
+            # orientation = 1: left -> 2, right -> 1
+            # orientation = 2: left -> 4, right -> 3
+            # orientation = 3: left -> 6, right -> 5
+            left_direction = 2 * orientations[interface]
+            right_direction = 2 * orientations[interface] - 1
+
+            for j in eachnode(dg), i in eachnode(dg)
+                # Call pointwise Riemann solver
+                u_ll, u_rr = get_surface_node_vars(cache_parabolic.interfaces.u,
+                                                   equations_parabolic, dg, i, j,
+                                                   interface)
+                flux = 0.5 * (u_ll + u_rr)
+
+                # Copy flux to left and right element storage
+                for v in eachvariable(equations_parabolic)
+                    surface_flux_values[v, i, j, left_direction, left_id] = flux[v]
+                    surface_flux_values[v, i, j, right_direction, right_id] = flux[v]
+                end
+            end
         end
-      end
     end
-  end
-
-  # Prolong solution to interfaces
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg)
 
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" begin
-    @unpack surface_flux_values = cache_parabolic.elements
-    @unpack neighbor_ids, orientations = cache_parabolic.interfaces
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache_parabolic, u_transformed, mesh, equations_parabolic,
+                            dg.surface_integral, dg)
+    end
 
-    @threaded for interface in eachinterface(dg, cache_parabolic)
-      # Get neighboring elements
-      left_id  = neighbor_ids[1, interface]
-      right_id = neighbor_ids[2, interface]
-
-      # Determine interface direction with respect to elements:
-      # orientation = 1: left -> 2, right -> 1
-      # orientation = 2: left -> 4, right -> 3
-      # orientation = 3: left -> 6, right -> 5
-      left_direction  = 2 * orientations[interface]
-      right_direction = 2 * orientations[interface] - 1
-
-      for j in eachnode(dg), i in eachnode(dg)
-        # Call pointwise Riemann solver
-        u_ll, u_rr = get_surface_node_vars(cache_parabolic.interfaces.u,
-                                           equations_parabolic, dg, i, j, interface)
-        flux = 0.5 * (u_ll + u_rr)
-
-        # Copy flux to left and right element storage
-        for v in eachvariable(equations_parabolic)
-          surface_flux_values[v, i, j, left_direction,  left_id]  = flux[v]
-          surface_flux_values[v, i, j, right_direction, right_id] = flux[v]
-        end
-      end
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux_gradients!(cache_parabolic, t, boundary_conditions_parabolic,
+                                      mesh, equations_parabolic,
+                                      dg.surface_integral, dg)
     end
-  end
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache_parabolic, u_transformed, mesh, equations_parabolic, dg.surface_integral, dg)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux_gradients!(
-    cache_parabolic, t, boundary_conditions_parabolic, mesh, equations_parabolic,
-    dg.surface_integral, dg)
-
-  # TODO: parabolic; mortars
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" begin
-    @unpack boundary_interpolation = dg.basis
-    @unpack surface_flux_values = cache_parabolic.elements
-
-    # Note that all fluxes have been computed with outward-pointing normal vectors.
-    # Access the factors only once before beginning the loop to increase performance.
-    # We also use explicit assignments instead of `+=` to let `@muladd` turn these
-    # into FMAs (see comment at the top of the file).
-    factor_1 = boundary_interpolation[1,          1]
-    factor_2 = boundary_interpolation[nnodes(dg), 2]
-    @threaded for element in eachelement(dg, cache)
-      for m in eachnode(dg), l in eachnode(dg)
-        for v in eachvariable(equations_parabolic)
-          # surface at -x
-          gradients_x[v, 1,          l, m, element] = (
-            gradients_x[v, 1,          l, m, element] - surface_flux_values[v, l, m, 1, element] * factor_1)
-
-          # surface at +x
-          gradients_x[v, nnodes(dg), l, m, element] = (
-            gradients_x[v, nnodes(dg), l, m, element] + surface_flux_values[v, l, m, 2, element] * factor_2)
-
-          # surface at -y
-          gradients_y[v, l, 1,       m, element] = (
-            gradients_y[v, l, 1,       m, element] - surface_flux_values[v, l, m, 3, element] * factor_1)
-
-          # surface at +y
-          gradients_y[v, l, nnodes(dg), m, element] = (
-            gradients_y[v, l, nnodes(dg), m, element] + surface_flux_values[v, l, m, 4, element] * factor_2)
-
-          # surface at -z
-          gradients_z[v, l, m, 1,       element] = (
-            gradients_z[v, l, m, 1,       element] - surface_flux_values[v, l, m, 5, element] * factor_1)
-
-          # surface at +z
-          gradients_z[v, l, m, nnodes(dg), element] = (
-            gradients_z[v, l, m, nnodes(dg), element] + surface_flux_values[v, l, m, 6, element] * factor_2)
+
+    # TODO: parabolic; mortars
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        @unpack boundary_interpolation = dg.basis
+        @unpack surface_flux_values = cache_parabolic.elements
+
+        # Note that all fluxes have been computed with outward-pointing normal vectors.
+        # Access the factors only once before beginning the loop to increase performance.
+        # We also use explicit assignments instead of `+=` to let `@muladd` turn these
+        # into FMAs (see comment at the top of the file).
+        factor_1 = boundary_interpolation[1, 1]
+        factor_2 = boundary_interpolation[nnodes(dg), 2]
+        @threaded for element in eachelement(dg, cache)
+            for m in eachnode(dg), l in eachnode(dg)
+                for v in eachvariable(equations_parabolic)
+                    # surface at -x
+                    gradients_x[v, 1, l, m, element] = (gradients_x[v, 1, l, m,
+                                                                    element] -
+                                                        surface_flux_values[v, l, m, 1,
+                                                                            element] *
+                                                        factor_1)
+
+                    # surface at +x
+                    gradients_x[v, nnodes(dg), l, m, element] = (gradients_x[v,
+                                                                             nnodes(dg),
+                                                                             l, m,
+                                                                             element] +
+                                                                 surface_flux_values[v,
+                                                                                     l,
+                                                                                     m,
+                                                                                     2,
+                                                                                     element] *
+                                                                 factor_2)
+
+                    # surface at -y
+                    gradients_y[v, l, 1, m, element] = (gradients_y[v, l, 1, m,
+                                                                    element] -
+                                                        surface_flux_values[v, l, m, 3,
+                                                                            element] *
+                                                        factor_1)
+
+                    # surface at +y
+                    gradients_y[v, l, nnodes(dg), m, element] = (gradients_y[v, l,
+                                                                             nnodes(dg),
+                                                                             m,
+                                                                             element] +
+                                                                 surface_flux_values[v,
+                                                                                     l,
+                                                                                     m,
+                                                                                     4,
+                                                                                     element] *
+                                                                 factor_2)
+
+                    # surface at -z
+                    gradients_z[v, l, m, 1, element] = (gradients_z[v, l, m, 1,
+                                                                    element] -
+                                                        surface_flux_values[v, l, m, 5,
+                                                                            element] *
+                                                        factor_1)
+
+                    # surface at +z
+                    gradients_z[v, l, m, nnodes(dg), element] = (gradients_z[v, l, m,
+                                                                             nnodes(dg),
+                                                                             element] +
+                                                                 surface_flux_values[v,
+                                                                                     l,
+                                                                                     m,
+                                                                                     6,
+                                                                                     element] *
+                                                                 factor_2)
+                end
+            end
         end
-      end
     end
-  end
 
-  # Apply Jacobian from mapping to reference element
-  @trixi_timeit timer() "Jacobian" begin
-    apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg, cache_parabolic)
-    apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg, cache_parabolic)
-    apply_jacobian_parabolic!(gradients_z, mesh, equations_parabolic, dg, cache_parabolic)
-  end
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" begin
+        apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg,
+                                  cache_parabolic)
+        apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg,
+                                  cache_parabolic)
+        apply_jacobian_parabolic!(gradients_z, mesh, equations_parabolic, dg,
+                                  cache_parabolic)
+    end
 
-  return nothing
+    return nothing
 end
 
-
 # This method is called when a SemidiscretizationHyperbolic is constructed.
 # It constructs the basic `cache` used throughout the simulation to compute
 # the RHS etc.
-function create_cache_parabolic(mesh::TreeMesh{3}, equations_hyperbolic::AbstractEquations,
+function create_cache_parabolic(mesh::TreeMesh{3},
+                                equations_hyperbolic::AbstractEquations,
                                 equations_parabolic::AbstractEquationsParabolic,
                                 dg::DG, parabolic_scheme, RealT, uEltype)
-  # Get cells for which an element needs to be created (i.e. all leaf cells)
-  leaf_cell_ids = local_leaf_cells(mesh.tree)
+    # Get cells for which an element needs to be created (i.e. all leaf cells)
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
 
-  elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT, uEltype)
+    elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT,
+                             uEltype)
 
-  n_vars = nvariables(equations_hyperbolic)
-  n_nodes = nnodes(elements)
-  n_elements = nelements(elements)
-  u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_nodes, n_elements)
-  gradients = ntuple(_ -> similar(u_transformed), ndims(mesh))
-  flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh))
+    n_vars = nvariables(equations_hyperbolic)
+    n_nodes = nnodes(elements)
+    n_elements = nelements(elements)
+    u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_nodes, n_elements)
+    gradients = ntuple(_ -> similar(u_transformed), ndims(mesh))
+    flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh))
 
-  interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
+    interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
 
-  boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
+    boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
 
-  # mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
+    # mortars = init_mortars(leaf_cell_ids, mesh, elements, dg.mortar)
 
-  # cache = (; elements, interfaces, boundaries, mortars)
-  cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed)
+    # cache = (; elements, interfaces, boundaries, mortars)
+    cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed)
 
-  # Add specialized parts of the cache required to compute the mortars etc.
-  # cache = (;cache..., create_cache(mesh, equations_parabolic, dg.mortar, uEltype)...)
+    # Add specialized parts of the cache required to compute the mortars etc.
+    # cache = (;cache..., create_cache(mesh, equations_parabolic, dg.mortar, uEltype)...)
 
-  return cache
+    return cache
 end
 
-
 # Needed to *not* flip the sign of the inverse Jacobian.
 # This is because the parabolic fluxes are assumed to be of the form
 #   `du/dt + df/dx = dg/dx + source(x,t)`,
 # where f(u) is the inviscid flux and g(u) is the viscous flux.
 function apply_jacobian_parabolic!(du, mesh::TreeMesh{3},
                                    equations::AbstractEquationsParabolic, dg::DG, cache)
+    @threaded for element in eachelement(dg, cache)
+        factor = cache.elements.inverse_jacobian[element]
 
-  @threaded for element in eachelement(dg, cache)
-    factor = cache.elements.inverse_jacobian[element]
-
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      for v in eachvariable(equations)
-        du[v, i, j, k, element] *= factor
-      end
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            for v in eachvariable(equations)
+                du[v, i, j, k, element] *= factor
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_parallel.jl b/src/solvers/dgsem_tree/dg_parallel.jl
index 7ca4bc159ee..c614fe0d0e6 100644
--- a/src/solvers/dgsem_tree/dg_parallel.jl
+++ b/src/solvers/dgsem_tree/dg_parallel.jl
@@ -3,27 +3,32 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Initialize MPI data structures. This works for both the
 # `TreeMesh` and the `P4estMesh` and is dimension-agnostic.
-function init_mpi_data_structures(mpi_neighbor_interfaces, mpi_neighbor_mortars, n_dims, nvars, n_nodes, uEltype)
-  data_size = nvars * n_nodes^(n_dims - 1)
-  n_small_elements = 2^(n_dims-1)
-  mpi_send_buffers = Vector{Vector{uEltype}}(undef, length(mpi_neighbor_interfaces))
-  mpi_recv_buffers = Vector{Vector{uEltype}}(undef, length(mpi_neighbor_interfaces))
-  for index in 1:length(mpi_neighbor_interfaces)
-    mpi_send_buffers[index] = Vector{uEltype}(undef, length(mpi_neighbor_interfaces[index]) * data_size +
-                                                     length(mpi_neighbor_mortars[index]) * n_small_elements * 2 * data_size)
-    mpi_recv_buffers[index] = Vector{uEltype}(undef, length(mpi_neighbor_interfaces[index]) * data_size +
-                                                     length(mpi_neighbor_mortars[index]) * n_small_elements * 2 * data_size)
-  end
+function init_mpi_data_structures(mpi_neighbor_interfaces, mpi_neighbor_mortars, n_dims,
+                                  nvars, n_nodes, uEltype)
+    data_size = nvars * n_nodes^(n_dims - 1)
+    n_small_elements = 2^(n_dims - 1)
+    mpi_send_buffers = Vector{Vector{uEltype}}(undef, length(mpi_neighbor_interfaces))
+    mpi_recv_buffers = Vector{Vector{uEltype}}(undef, length(mpi_neighbor_interfaces))
+    for index in 1:length(mpi_neighbor_interfaces)
+        mpi_send_buffers[index] = Vector{uEltype}(undef,
+                                                  length(mpi_neighbor_interfaces[index]) *
+                                                  data_size +
+                                                  length(mpi_neighbor_mortars[index]) *
+                                                  n_small_elements * 2 * data_size)
+        mpi_recv_buffers[index] = Vector{uEltype}(undef,
+                                                  length(mpi_neighbor_interfaces[index]) *
+                                                  data_size +
+                                                  length(mpi_neighbor_mortars[index]) *
+                                                  n_small_elements * 2 * data_size)
+    end
 
-  mpi_send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces))
-  mpi_recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces))
+    mpi_send_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces))
+    mpi_recv_requests = Vector{MPI.Request}(undef, length(mpi_neighbor_interfaces))
 
-  return mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests
+    return mpi_send_buffers, mpi_recv_buffers, mpi_send_requests, mpi_recv_requests
 end
-
-
 end # muladd
diff --git a/src/solvers/dgsem_tree/indicators.jl b/src/solvers/dgsem_tree/indicators.jl
index 30d3b2c0448..2eb0af87148 100644
--- a/src/solvers/dgsem_tree/indicators.jl
+++ b/src/solvers/dgsem_tree/indicators.jl
@@ -3,21 +3,21 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 abstract type AbstractIndicator end
 
-function create_cache(typ::Type{IndicatorType}, semi) where {IndicatorType<:AbstractIndicator}
-  create_cache(typ, mesh_equations_solver_cache(semi)...)
+function create_cache(typ::Type{IndicatorType},
+                      semi) where {IndicatorType <: AbstractIndicator}
+    create_cache(typ, mesh_equations_solver_cache(semi)...)
 end
 
-function get_element_variables!(element_variables, indicator::AbstractIndicator, ::VolumeIntegralShockCapturingHG)
-  element_variables[:indicator_shock_capturing] = indicator.cache.alpha
-  return nothing
+function get_element_variables!(element_variables, indicator::AbstractIndicator,
+                                ::VolumeIntegralShockCapturingHG)
+    element_variables[:indicator_shock_capturing] = indicator.cache.alpha
+    return nothing
 end
 
-
-
 """
     IndicatorHennemannGassner(equations::AbstractEquations, basis;
                               alpha_max=0.5,
@@ -41,101 +41,103 @@ See also [`VolumeIntegralShockCapturingHG`](@ref).
   "A provably entropy stable subcell shock capturing approach for high order split form DG"
   [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
 """
-struct IndicatorHennemannGassner{RealT<:Real, Variable, Cache} <: AbstractIndicator
-  alpha_max::RealT
-  alpha_min::RealT
-  alpha_smooth::Bool
-  variable::Variable
-  cache::Cache
+struct IndicatorHennemannGassner{RealT <: Real, Variable, Cache} <: AbstractIndicator
+    alpha_max::RealT
+    alpha_min::RealT
+    alpha_smooth::Bool
+    variable::Variable
+    cache::Cache
 end
 
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
 function IndicatorHennemannGassner(equations::AbstractEquations, basis;
-                                   alpha_max=0.5,
-                                   alpha_min=0.001,
-                                   alpha_smooth=true,
+                                   alpha_max = 0.5,
+                                   alpha_min = 0.001,
+                                   alpha_smooth = true,
                                    variable)
-  alpha_max, alpha_min = promote(alpha_max, alpha_min)
-  cache = create_cache(IndicatorHennemannGassner, equations, basis)
-  IndicatorHennemannGassner{typeof(alpha_max), typeof(variable), typeof(cache)}(
-    alpha_max, alpha_min, alpha_smooth, variable, cache)
+    alpha_max, alpha_min = promote(alpha_max, alpha_min)
+    cache = create_cache(IndicatorHennemannGassner, equations, basis)
+    IndicatorHennemannGassner{typeof(alpha_max), typeof(variable), typeof(cache)}(alpha_max,
+                                                                                  alpha_min,
+                                                                                  alpha_smooth,
+                                                                                  variable,
+                                                                                  cache)
 end
 
 # this method is used when the indicator is constructed as for AMR
 function IndicatorHennemannGassner(semi::AbstractSemidiscretization;
-                                   alpha_max=0.5,
-                                   alpha_min=0.001,
-                                   alpha_smooth=true,
+                                   alpha_max = 0.5,
+                                   alpha_min = 0.001,
+                                   alpha_smooth = true,
                                    variable)
-  alpha_max, alpha_min = promote(alpha_max, alpha_min)
-  cache = create_cache(IndicatorHennemannGassner, semi)
-  IndicatorHennemannGassner{typeof(alpha_max), typeof(variable), typeof(cache)}(
-    alpha_max, alpha_min, alpha_smooth, variable, cache)
+    alpha_max, alpha_min = promote(alpha_max, alpha_min)
+    cache = create_cache(IndicatorHennemannGassner, semi)
+    IndicatorHennemannGassner{typeof(alpha_max), typeof(variable), typeof(cache)}(alpha_max,
+                                                                                  alpha_min,
+                                                                                  alpha_smooth,
+                                                                                  variable,
+                                                                                  cache)
 end
 
-
 function Base.show(io::IO, indicator::IndicatorHennemannGassner)
-  @nospecialize indicator # reduce precompilation time
-
-  print(io, "IndicatorHennemannGassner(")
-  print(io, indicator.variable)
-  print(io, ", alpha_max=", indicator.alpha_max)
-  print(io, ", alpha_min=", indicator.alpha_min)
-  print(io, ", alpha_smooth=", indicator.alpha_smooth)
-  print(io, ")")
+    @nospecialize indicator # reduce precompilation time
+
+    print(io, "IndicatorHennemannGassner(")
+    print(io, indicator.variable)
+    print(io, ", alpha_max=", indicator.alpha_max)
+    print(io, ", alpha_min=", indicator.alpha_min)
+    print(io, ", alpha_smooth=", indicator.alpha_smooth)
+    print(io, ")")
 end
 
 function Base.show(io::IO, ::MIME"text/plain", indicator::IndicatorHennemannGassner)
-  @nospecialize indicator # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, indicator)
-  else
-    setup = [
-             "indicator variable" => indicator.variable,
-             "max. α" => indicator.alpha_max,
-             "min. α" => indicator.alpha_min,
-             "smooth α" => (indicator.alpha_smooth ? "yes" : "no"),
-            ]
-    summary_box(io, "IndicatorHennemannGassner", setup)
-  end
-end
+    @nospecialize indicator # reduce precompilation time
 
+    if get(io, :compact, false)
+        show(io, indicator)
+    else
+        setup = [
+            "indicator variable" => indicator.variable,
+            "max. α" => indicator.alpha_max,
+            "min. α" => indicator.alpha_min,
+            "smooth α" => (indicator.alpha_smooth ? "yes" : "no"),
+        ]
+        summary_box(io, "IndicatorHennemannGassner", setup)
+    end
+end
 
 function (indicator_hg::IndicatorHennemannGassner)(u, mesh, equations, dg::DGSEM, cache;
                                                    kwargs...)
-  @unpack alpha_smooth = indicator_hg
-  @unpack alpha, alpha_tmp = indicator_hg.cache
-  # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
-  #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
-  #       or just `resize!` whenever we call the relevant methods as we do now?
-  resize!(alpha, nelements(dg, cache))
-  if alpha_smooth
-    resize!(alpha_tmp, nelements(dg, cache))
-  end
-
-  # magic parameters
-  threshold = 0.5 * 10^(-1.8 * (nnodes(dg))^0.25)
-  parameter_s = log((1 - 0.0001) / 0.0001)
-
-  @threaded for element in eachelement(dg, cache)
-    # This is dispatched by mesh dimension.
-    # Use this function barrier and unpack inside to avoid passing closures to
-    # Polyester.jl with `@batch` (`@threaded`).
-    # Otherwise, `@threaded` does not work here with Julia ARM on macOS.
-    # See https://github.com/JuliaSIMD/Polyester.jl/issues/88.
-    calc_indicator_hennemann_gassner!(
-      indicator_hg, threshold, parameter_s, u,
-      element, mesh, equations, dg, cache)
-  end
-
-  if alpha_smooth
-    apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
-  end
-
-  return alpha
-end
+    @unpack alpha_smooth = indicator_hg
+    @unpack alpha, alpha_tmp = indicator_hg.cache
+    # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
+    #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
+    #       or just `resize!` whenever we call the relevant methods as we do now?
+    resize!(alpha, nelements(dg, cache))
+    if alpha_smooth
+        resize!(alpha_tmp, nelements(dg, cache))
+    end
 
+    # magic parameters
+    threshold = 0.5 * 10^(-1.8 * (nnodes(dg))^0.25)
+    parameter_s = log((1 - 0.0001) / 0.0001)
+
+    @threaded for element in eachelement(dg, cache)
+        # This is dispatched by mesh dimension.
+        # Use this function barrier and unpack inside to avoid passing closures to
+        # Polyester.jl with `@batch` (`@threaded`).
+        # Otherwise, `@threaded` does not work here with Julia ARM on macOS.
+        # See https://github.com/JuliaSIMD/Polyester.jl/issues/88.
+        calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, u,
+                                          element, mesh, equations, dg, cache)
+    end
+
+    if alpha_smooth
+        apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
+    end
+
+    return alpha
+end
 
 """
     IndicatorLöhner (equivalent to IndicatorLoehner)
@@ -159,59 +161,60 @@ and `basis` if this indicator should be used for shock capturing.
   [doi: 10.1016/0045-7825(87)90098-3](https://doi.org/10.1016/0045-7825(87)90098-3)
 - http://flash.uchicago.edu/site/flashcode/user_support/flash4_ug_4p62/node59.html#SECTION05163100000000000000
 """
-struct IndicatorLöhner{RealT<:Real, Variable, Cache} <: AbstractIndicator
-  f_wave::RealT # TODO: Taal documentation
-  variable::Variable
-  cache::Cache
+struct IndicatorLöhner{RealT <: Real, Variable, Cache} <: AbstractIndicator
+    f_wave::RealT # TODO: Taal documentation
+    variable::Variable
+    cache::Cache
 end
 
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
 function IndicatorLöhner(equations::AbstractEquations, basis;
-                         f_wave=0.2, variable)
-  cache = create_cache(IndicatorLöhner, equations, basis)
-  IndicatorLöhner{typeof(f_wave), typeof(variable), typeof(cache)}(f_wave, variable, cache)
+                         f_wave = 0.2, variable)
+    cache = create_cache(IndicatorLöhner, equations, basis)
+    IndicatorLöhner{typeof(f_wave), typeof(variable), typeof(cache)}(f_wave, variable,
+                                                                     cache)
 end
 
 # this method is used when the indicator is constructed as for AMR
 function IndicatorLöhner(semi::AbstractSemidiscretization;
-                         f_wave=0.2, variable)
-  cache = create_cache(IndicatorLöhner, semi)
-  IndicatorLöhner{typeof(f_wave), typeof(variable), typeof(cache)}(f_wave, variable, cache)
+                         f_wave = 0.2, variable)
+    cache = create_cache(IndicatorLöhner, semi)
+    IndicatorLöhner{typeof(f_wave), typeof(variable), typeof(cache)}(f_wave, variable,
+                                                                     cache)
 end
 
-
 function Base.show(io::IO, indicator::IndicatorLöhner)
-  @nospecialize indicator # reduce precompilation time
+    @nospecialize indicator # reduce precompilation time
 
-  print(io, "IndicatorLöhner(")
-  print(io, "f_wave=", indicator.f_wave, ", variable=", indicator.variable, ")")
+    print(io, "IndicatorLöhner(")
+    print(io, "f_wave=", indicator.f_wave, ", variable=", indicator.variable, ")")
 end
 
 function Base.show(io::IO, ::MIME"text/plain", indicator::IndicatorLöhner)
-  @nospecialize indicator # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, indicator)
-  else
-    setup = [
-             "indicator variable" => indicator.variable,
-             "f_wave" => indicator.f_wave,
-            ]
-    summary_box(io, "IndicatorLöhner", setup)
-  end
+    @nospecialize indicator # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, indicator)
+    else
+        setup = [
+            "indicator variable" => indicator.variable,
+            "f_wave" => indicator.f_wave,
+        ]
+        summary_box(io, "IndicatorLöhner", setup)
+    end
 end
 
 const IndicatorLoehner = IndicatorLöhner
 
 # dirty Löhner estimate, direction by direction, assuming constant nodes
-@inline function local_löhner_estimate(um::Real, u0::Real, up::Real, löhner::IndicatorLöhner)
-  num = abs(up - 2 * u0 + um)
-  den = abs(up - u0) + abs(u0-um) + löhner.f_wave * (abs(up) + 2 * abs(u0) + abs(um))
-  return num / den
+@inline function local_löhner_estimate(um::Real, u0::Real, up::Real,
+                                       löhner::IndicatorLöhner)
+    num = abs(up - 2 * u0 + um)
+    den = abs(up - u0) + abs(u0 - um) +
+          löhner.f_wave * (abs(up) + 2 * abs(u0) + abs(um))
+    return num / den
 end
 
-
-
 """
     IndicatorMax(equations::AbstractEquations, basis; variable)
     IndicatorMax(semi::AbstractSemidiscretization; variable)
@@ -220,44 +223,43 @@ A simple indicator returning the maximum of `variable` in an element.
 When constructed to be used for AMR, pass the `semi`. Pass the `equations`,
 and `basis` if this indicator should be used for shock capturing.
 """
-struct IndicatorMax{Variable, Cache<:NamedTuple} <: AbstractIndicator
-  variable::Variable
-  cache::Cache
+struct IndicatorMax{Variable, Cache <: NamedTuple} <: AbstractIndicator
+    variable::Variable
+    cache::Cache
 end
 
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
 function IndicatorMax(equations::AbstractEquations, basis;
                       variable)
-  cache = create_cache(IndicatorMax, equations, basis)
-  IndicatorMax{typeof(variable), typeof(cache)}(variable, cache)
+    cache = create_cache(IndicatorMax, equations, basis)
+    IndicatorMax{typeof(variable), typeof(cache)}(variable, cache)
 end
 
 # this method is used when the indicator is constructed as for AMR
 function IndicatorMax(semi::AbstractSemidiscretization;
                       variable)
-  cache = create_cache(IndicatorMax, semi)
-  return IndicatorMax{typeof(variable), typeof(cache)}(variable, cache)
+    cache = create_cache(IndicatorMax, semi)
+    return IndicatorMax{typeof(variable), typeof(cache)}(variable, cache)
 end
 
-
 function Base.show(io::IO, indicator::IndicatorMax)
-  @nospecialize indicator # reduce precompilation time
+    @nospecialize indicator # reduce precompilation time
 
-  print(io, "IndicatorMax(")
-  print(io, "variable=", indicator.variable, ")")
+    print(io, "IndicatorMax(")
+    print(io, "variable=", indicator.variable, ")")
 end
 
 function Base.show(io::IO, ::MIME"text/plain", indicator::IndicatorMax)
-  @nospecialize indicator # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, indicator)
-  else
-    setup = [
-             "indicator variable" => indicator.variable,
-            ]
-    summary_box(io, "IndicatorMax", setup)
-  end
+    @nospecialize indicator # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, indicator)
+    else
+        setup = [
+            "indicator variable" => indicator.variable,
+        ]
+        summary_box(io, "IndicatorMax", setup)
+    end
 end
 
 """
@@ -293,127 +295,132 @@ If `alpha_continuous == false`, the blending factor is set to `alpha = 0` for go
     This is an experimental feature and may change in future releases.
 
 """
-struct IndicatorNeuralNetwork{IndicatorType, RealT<:Real, Variable, Chain, Cache} <: AbstractIndicator
-  indicator_type::IndicatorType
-  alpha_max::RealT
-  alpha_min::RealT
-  alpha_smooth::Bool
-  alpha_continuous::Bool
-  alpha_amr::Bool
-  variable::Variable
-  network::Chain
-  cache::Cache
+struct IndicatorNeuralNetwork{IndicatorType, RealT <: Real, Variable, Chain, Cache} <:
+       AbstractIndicator
+    indicator_type::IndicatorType
+    alpha_max::RealT
+    alpha_min::RealT
+    alpha_smooth::Bool
+    alpha_continuous::Bool
+    alpha_amr::Bool
+    variable::Variable
+    network::Chain
+    cache::Cache
 end
 
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
 function IndicatorNeuralNetwork(equations::AbstractEquations, basis;
                                 indicator_type,
-                                alpha_max=0.5,
-                                alpha_min=0.001,
-                                alpha_smooth=true,
-                                alpha_continuous=true,
-                                alpha_amr=false,
+                                alpha_max = 0.5,
+                                alpha_min = 0.001,
+                                alpha_smooth = true,
+                                alpha_continuous = true,
+                                alpha_amr = false,
                                 variable,
                                 network)
-  alpha_max, alpha_min = promote(alpha_max, alpha_min)
-  IndicatorType = typeof(indicator_type)
-  cache = create_cache(IndicatorNeuralNetwork{IndicatorType}, equations, basis)
-  IndicatorNeuralNetwork{IndicatorType, typeof(alpha_max), typeof(variable), typeof(network), typeof(cache)}(
-      indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable,
-      network, cache)
+    alpha_max, alpha_min = promote(alpha_max, alpha_min)
+    IndicatorType = typeof(indicator_type)
+    cache = create_cache(IndicatorNeuralNetwork{IndicatorType}, equations, basis)
+    IndicatorNeuralNetwork{IndicatorType, typeof(alpha_max), typeof(variable),
+                           typeof(network), typeof(cache)}(indicator_type, alpha_max,
+                                                           alpha_min, alpha_smooth,
+                                                           alpha_continuous, alpha_amr,
+                                                           variable,
+                                                           network, cache)
 end
 
 # this method is used when the indicator is constructed as for AMR
 function IndicatorNeuralNetwork(semi::AbstractSemidiscretization;
                                 indicator_type,
-                                alpha_max=0.5,
-                                alpha_min=0.001,
-                                alpha_smooth=true,
-                                alpha_continuous=true,
-                                alpha_amr=true,
+                                alpha_max = 0.5,
+                                alpha_min = 0.001,
+                                alpha_smooth = true,
+                                alpha_continuous = true,
+                                alpha_amr = true,
                                 variable,
                                 network)
-  alpha_max, alpha_min = promote(alpha_max, alpha_min)
-  IndicatorType = typeof(indicator_type)
-  cache = create_cache(IndicatorNeuralNetwork{IndicatorType}, semi)
-  IndicatorNeuralNetwork{IndicatorType, typeof(alpha_max), typeof(variable), typeof(network), typeof(cache)}(
-      indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable,
-      network, cache)
+    alpha_max, alpha_min = promote(alpha_max, alpha_min)
+    IndicatorType = typeof(indicator_type)
+    cache = create_cache(IndicatorNeuralNetwork{IndicatorType}, semi)
+    IndicatorNeuralNetwork{IndicatorType, typeof(alpha_max), typeof(variable),
+                           typeof(network), typeof(cache)}(indicator_type, alpha_max,
+                                                           alpha_min, alpha_smooth,
+                                                           alpha_continuous, alpha_amr,
+                                                           variable,
+                                                           network, cache)
 end
 
-
 function Base.show(io::IO, indicator::IndicatorNeuralNetwork)
-  @nospecialize indicator # reduce precompilation time
-
-  print(io, "IndicatorNeuralNetwork(")
-  print(io, indicator.indicator_type)
-  print(io, ", alpha_max=", indicator.alpha_max)
-  print(io, ", alpha_min=", indicator.alpha_min)
-  print(io, ", alpha_smooth=", indicator.alpha_smooth)
-  print(io, ", alpha_continuous=", indicator.alpha_continuous)
-  print(io, indicator.variable)
-  print(io, ")")
+    @nospecialize indicator # reduce precompilation time
+
+    print(io, "IndicatorNeuralNetwork(")
+    print(io, indicator.indicator_type)
+    print(io, ", alpha_max=", indicator.alpha_max)
+    print(io, ", alpha_min=", indicator.alpha_min)
+    print(io, ", alpha_smooth=", indicator.alpha_smooth)
+    print(io, ", alpha_continuous=", indicator.alpha_continuous)
+    print(io, indicator.variable)
+    print(io, ")")
 end
 
 function Base.show(io::IO, ::MIME"text/plain", indicator::IndicatorNeuralNetwork)
-  @nospecialize indicator # reduce precompilation time
-
-  if get(io, :compact, false)
-    show(io, indicator)
-  else
-    setup = [
-             "indicator type" => indicator.indicator_type,
-             "max. α" => indicator.alpha_max,
-             "min. α" => indicator.alpha_min,
-             "smooth α" => (indicator.alpha_smooth ? "yes" : "no"),
-             "continuous α" => (indicator.alpha_continuous ? "yes" : "no"),
-             "indicator variable" => indicator.variable,
-            ]
-    summary_box(io, "IndicatorNeuralNetwork", setup)
-  end
-end
-
-
-# Convert probability for troubled cell to indicator value for shockcapturing/AMR
-@inline function probability_to_indicator(probability_troubled_cell, alpha_continuous, alpha_amr,
-                                          alpha_min, alpha_max)
-  # Initialize indicator to zero
-  alpha_element = zero(probability_troubled_cell)
+    @nospecialize indicator # reduce precompilation time
 
-  if alpha_continuous && !alpha_amr
-    # Set good cells to 0 and troubled cells to continuous value of the network prediction
-    if probability_troubled_cell > 0.5
-      alpha_element = probability_troubled_cell
+    if get(io, :compact, false)
+        show(io, indicator)
     else
-      alpha_element = zero(probability_troubled_cell)
-    end
-
-    # Take care of the case close to pure FV
-    if alpha_element > 1 - alpha_min
-      alpha_element = one(alpha_element)
+        setup = [
+            "indicator type" => indicator.indicator_type,
+            "max. α" => indicator.alpha_max,
+            "min. α" => indicator.alpha_min,
+            "smooth α" => (indicator.alpha_smooth ? "yes" : "no"),
+            "continuous α" => (indicator.alpha_continuous ? "yes" : "no"),
+            "indicator variable" => indicator.variable,
+        ]
+        summary_box(io, "IndicatorNeuralNetwork", setup)
     end
+end
 
-    # Scale the probability for a troubled cell (in [0,1]) to the maximum allowed alpha
-    alpha_element *= alpha_max
-  elseif !alpha_continuous && !alpha_amr
-    # Set good cells to 0 and troubled cells to 1
-    if probability_troubled_cell > 0.5
-      alpha_element = alpha_max
-    else
-      alpha_element = zero(alpha_max)
+# Convert probability for troubled cell to indicator value for shockcapturing/AMR
+@inline function probability_to_indicator(probability_troubled_cell, alpha_continuous,
+                                          alpha_amr,
+                                          alpha_min, alpha_max)
+    # Initialize indicator to zero
+    alpha_element = zero(probability_troubled_cell)
+
+    if alpha_continuous && !alpha_amr
+        # Set good cells to 0 and troubled cells to continuous value of the network prediction
+        if probability_troubled_cell > 0.5
+            alpha_element = probability_troubled_cell
+        else
+            alpha_element = zero(probability_troubled_cell)
+        end
+
+        # Take care of the case close to pure FV
+        if alpha_element > 1 - alpha_min
+            alpha_element = one(alpha_element)
+        end
+
+        # Scale the probability for a troubled cell (in [0,1]) to the maximum allowed alpha
+        alpha_element *= alpha_max
+    elseif !alpha_continuous && !alpha_amr
+        # Set good cells to 0 and troubled cells to 1
+        if probability_troubled_cell > 0.5
+            alpha_element = alpha_max
+        else
+            alpha_element = zero(alpha_max)
+        end
+    elseif alpha_amr
+        # The entire continuous output of the neural network is used for AMR
+        alpha_element = probability_troubled_cell
+
+        # Scale the probability for a troubled cell (in [0,1]) to the maximum allowed alpha
+        alpha_element *= alpha_max
     end
-  elseif alpha_amr
-    # The entire continuous output of the neural network is used for AMR
-    alpha_element  = probability_troubled_cell
-
-    # Scale the probability for a troubled cell (in [0,1]) to the maximum allowed alpha
-    alpha_element *= alpha_max
-  end
 
-  return alpha_element
+    return alpha_element
 end
 
-
 """
     NeuralNetworkPerssonPeraire
 
@@ -449,5 +456,4 @@ Indicator type for creating an `IndicatorNeuralNetwork` indicator.
 See also: [`IndicatorNeuralNetwork`](@ref)
 """
 struct NeuralNetworkCNN end
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/indicators_1d.jl b/src/solvers/dgsem_tree/indicators_1d.jl
index 7086d77a1a3..e722584bb2e 100644
--- a/src/solvers/dgsem_tree/indicators_1d.jl
+++ b/src/solvers/dgsem_tree/indicators_1d.jl
@@ -3,397 +3,414 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
-function create_cache(::Type{IndicatorHennemannGassner}, equations::AbstractEquations{1}, basis::LobattoLegendreBasis)
-
-  alpha = Vector{real(basis)}()
-  alpha_tmp = similar(alpha)
+function create_cache(::Type{IndicatorHennemannGassner},
+                      equations::AbstractEquations{1}, basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
+    alpha_tmp = similar(alpha)
 
-  A = Array{real(basis), ndims(equations)}
-  indicator_threaded  = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
-  modal_threaded      = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
+    A = Array{real(basis), ndims(equations)}
+    indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
+    modal_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
 
-  return (; alpha, alpha_tmp, indicator_threaded, modal_threaded)
+    return (; alpha, alpha_tmp, indicator_threaded, modal_threaded)
 end
 
 # this method is used when the indicator is constructed as for AMR
-function create_cache(typ::Type{IndicatorHennemannGassner}, mesh, equations::AbstractEquations{1}, dg::DGSEM, cache)
-  create_cache(typ, equations, dg.basis)
+function create_cache(typ::Type{IndicatorHennemannGassner}, mesh,
+                      equations::AbstractEquations{1}, dg::DGSEM, cache)
+    create_cache(typ, equations, dg.basis)
 end
 
-
 # Use this function barrier and unpack inside to avoid passing closures to Polyester.jl
 # with @batch (@threaded).
 # Otherwise, @threaded does not work here with Julia ARM on macOS.
 # See https://github.com/JuliaSIMD/Polyester.jl/issues/88.
-@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, u,
+@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s,
+                                                   u,
                                                    element, mesh::AbstractMesh{1},
                                                    equations, dg, cache)
-  @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg
-  @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded = indicator_hg.cache
-
-  indicator = indicator_threaded[Threads.threadid()]
-  modal     = modal_threaded[Threads.threadid()]
-
-  # Calculate indicator variables at Gauss-Lobatto nodes
-  for i in eachnode(dg)
-    u_local = get_node_vars(u, equations, dg, i, element)
-    indicator[i] = indicator_hg.variable(u_local, equations)
-  end
-
-  # Convert to modal representation
-  multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator)
-
-  # Calculate total energies for all modes, without highest, without two highest
-  total_energy = zero(eltype(modal))
-  for i in 1:nnodes(dg)
-    total_energy += modal[i]^2
-  end
-  total_energy_clip1 = zero(eltype(modal))
-  for i in 1:(nnodes(dg)-1)
-    total_energy_clip1 += modal[i]^2
-  end
-  total_energy_clip2 = zero(eltype(modal))
-  for i in 1:(nnodes(dg)-2)
-    total_energy_clip2 += modal[i]^2
-  end
-
-  # Calculate energy in higher modes
-  if !(iszero(total_energy))
-    energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
-  else
-    energy_frac_1 = zero(total_energy)
-  end
-  if !(iszero(total_energy_clip1))
-    energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
-  else
-    energy_frac_2 = zero(total_energy_clip1)
-  end
-  energy = max(energy_frac_1, energy_frac_2)
-
-  alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
-
-  # Take care of the case close to pure DG
-  if alpha_element < alpha_min
-    alpha_element = zero(alpha_element)
-  end
-
-  # Take care of the case close to pure FV
-  if alpha_element > 1 - alpha_min
-    alpha_element = one(alpha_element)
-  end
-
-  # Clip the maximum amount of FV allowed
-  alpha[element] = min(alpha_max, alpha_element)
+    @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg
+    @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded = indicator_hg.cache
+
+    indicator = indicator_threaded[Threads.threadid()]
+    modal = modal_threaded[Threads.threadid()]
+
+    # Calculate indicator variables at Gauss-Lobatto nodes
+    for i in eachnode(dg)
+        u_local = get_node_vars(u, equations, dg, i, element)
+        indicator[i] = indicator_hg.variable(u_local, equations)
+    end
+
+    # Convert to modal representation
+    multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre,
+                                   indicator)
+
+    # Calculate total energies for all modes, without highest, without two highest
+    total_energy = zero(eltype(modal))
+    for i in 1:nnodes(dg)
+        total_energy += modal[i]^2
+    end
+    total_energy_clip1 = zero(eltype(modal))
+    for i in 1:(nnodes(dg) - 1)
+        total_energy_clip1 += modal[i]^2
+    end
+    total_energy_clip2 = zero(eltype(modal))
+    for i in 1:(nnodes(dg) - 2)
+        total_energy_clip2 += modal[i]^2
+    end
+
+    # Calculate energy in higher modes
+    if !(iszero(total_energy))
+        energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
+    else
+        energy_frac_1 = zero(total_energy)
+    end
+    if !(iszero(total_energy_clip1))
+        energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
+    else
+        energy_frac_2 = zero(total_energy_clip1)
+    end
+    energy = max(energy_frac_1, energy_frac_2)
+
+    alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
+
+    # Take care of the case close to pure DG
+    if alpha_element < alpha_min
+        alpha_element = zero(alpha_element)
+    end
+
+    # Take care of the case close to pure FV
+    if alpha_element > 1 - alpha_min
+        alpha_element = one(alpha_element)
+    end
+
+    # Clip the maximum amount of FV allowed
+    alpha[element] = min(alpha_max, alpha_element)
 end
 
 # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
-function apply_smoothing!(mesh::Union{TreeMesh{1}, P4estMesh{1}}, alpha, alpha_tmp, dg, cache)
-  # Copy alpha values such that smoothing is indpedenent of the element access order
-  alpha_tmp .= alpha
-
-  # Loop over interfaces
-  for interface in eachinterface(dg, cache)
-    # Get neighboring element ids
-    left  = cache.interfaces.neighbor_ids[1, interface]
-    right = cache.interfaces.neighbor_ids[2, interface]
-
-    # Apply smoothing
-    alpha[left]  = max(alpha_tmp[left],  0.5 * alpha_tmp[right], alpha[left])
-    alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left],  alpha[right])
-  end
+function apply_smoothing!(mesh::Union{TreeMesh{1}, P4estMesh{1}}, alpha, alpha_tmp, dg,
+                          cache)
+    # Copy alpha values such that smoothing is indpedenent of the element access order
+    alpha_tmp .= alpha
+
+    # Loop over interfaces
+    for interface in eachinterface(dg, cache)
+        # Get neighboring element ids
+        left = cache.interfaces.neighbor_ids[1, interface]
+        right = cache.interfaces.neighbor_ids[2, interface]
+
+        # Apply smoothing
+        alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[right], alpha[left])
+        alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left], alpha[right])
+    end
 end
 
-
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
-function create_cache(::Type{IndicatorLöhner}, equations::AbstractEquations{1}, basis::LobattoLegendreBasis)
-
-  alpha = Vector{real(basis)}()
+function create_cache(::Type{IndicatorLöhner}, equations::AbstractEquations{1},
+                      basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
 
-  A = Array{real(basis), ndims(equations)}
-  indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
+    A = Array{real(basis), ndims(equations)}
+    indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
 
-  return (; alpha, indicator_threaded)
+    return (; alpha, indicator_threaded)
 end
 
 # this method is used when the indicator is constructed as for AMR
-function create_cache(typ::Type{IndicatorLöhner}, mesh, equations::AbstractEquations{1}, dg::DGSEM, cache)
-  create_cache(typ, equations, dg.basis)
+function create_cache(typ::Type{IndicatorLöhner}, mesh, equations::AbstractEquations{1},
+                      dg::DGSEM, cache)
+    create_cache(typ, equations, dg.basis)
 end
 
-
-function (löhner::IndicatorLöhner)(u::AbstractArray{<:Any,3},
+function (löhner::IndicatorLöhner)(u::AbstractArray{<:Any, 3},
                                    mesh, equations, dg::DGSEM, cache;
                                    kwargs...)
-  @assert nnodes(dg) >= 3 "IndicatorLöhner only works for nnodes >= 3 (polydeg > 1)"
-  @unpack alpha, indicator_threaded = löhner.cache
-  resize!(alpha, nelements(dg, cache))
+    @assert nnodes(dg)>=3 "IndicatorLöhner only works for nnodes >= 3 (polydeg > 1)"
+    @unpack alpha, indicator_threaded = löhner.cache
+    resize!(alpha, nelements(dg, cache))
 
-  @threaded for element in eachelement(dg, cache)
-    indicator = indicator_threaded[Threads.threadid()]
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
 
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, element)
-      indicator[i] = löhner.variable(u_local, equations)
-    end
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, element)
+            indicator[i] = löhner.variable(u_local, equations)
+        end
 
-    estimate = zero(real(dg))
-    for i in 2:nnodes(dg)-1
-      # x direction
-      u0 = indicator[i  ]
-      up = indicator[i+1]
-      um = indicator[i-1]
-      estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner))
-    end
+        estimate = zero(real(dg))
+        for i in 2:(nnodes(dg) - 1)
+            # x direction
+            u0 = indicator[i]
+            up = indicator[i + 1]
+            um = indicator[i - 1]
+            estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner))
+        end
 
-    # use the maximum as DG element indicator
-    alpha[element] = estimate
-  end
+        # use the maximum as DG element indicator
+        alpha[element] = estimate
+    end
 
-  return alpha
+    return alpha
 end
 
-
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
-function create_cache(::Type{IndicatorMax}, equations::AbstractEquations{1}, basis::LobattoLegendreBasis)
-
-  alpha = Vector{real(basis)}()
+function create_cache(::Type{IndicatorMax}, equations::AbstractEquations{1},
+                      basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
 
-  A = Array{real(basis), ndims(equations)}
-  indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
+    A = Array{real(basis), ndims(equations)}
+    indicator_threaded = [A(undef, nnodes(basis)) for _ in 1:Threads.nthreads()]
 
-  return (; alpha, indicator_threaded)
+    return (; alpha, indicator_threaded)
 end
 
 # this method is used when the indicator is constructed as for AMR
-function create_cache(typ::Type{IndicatorMax}, mesh, equations::AbstractEquations{1}, dg::DGSEM, cache)
-  cache = create_cache(typ, equations, dg.basis)
+function create_cache(typ::Type{IndicatorMax}, mesh, equations::AbstractEquations{1},
+                      dg::DGSEM, cache)
+    cache = create_cache(typ, equations, dg.basis)
 end
 
-
-function (indicator_max::IndicatorMax)(u::AbstractArray{<:Any,3},
+function (indicator_max::IndicatorMax)(u::AbstractArray{<:Any, 3},
                                        mesh, equations, dg::DGSEM, cache;
                                        kwargs...)
-  @unpack alpha, indicator_threaded = indicator_max.cache
-  resize!(alpha, nelements(dg, cache))
+    @unpack alpha, indicator_threaded = indicator_max.cache
+    resize!(alpha, nelements(dg, cache))
 
-  @threaded for element in eachelement(dg, cache)
-    indicator = indicator_threaded[Threads.threadid()]
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
 
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, element)
-      indicator[i] = indicator_max.variable(u_local, equations)
-    end
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, element)
+            indicator[i] = indicator_max.variable(u_local, equations)
+        end
 
-    alpha[element] = maximum(indicator)
-  end
+        alpha[element] = maximum(indicator)
+    end
 
-  return alpha
+    return alpha
 end
 
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
 # empty cache is default
 function create_cache(::Type{<:IndicatorNeuralNetwork},
                       equations::AbstractEquations{1}, basis::LobattoLegendreBasis)
-  return NamedTuple()
+    return NamedTuple()
 end
 
 # cache for NeuralNetworkPerssonPeraire-type indicator
 function create_cache(::Type{IndicatorNeuralNetwork{NeuralNetworkPerssonPeraire}},
                       equations::AbstractEquations{1}, basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
+    alpha_tmp = similar(alpha)
+    A = Array{real(basis), ndims(equations)}
 
-  alpha = Vector{real(basis)}()
-  alpha_tmp = similar(alpha)
-  A = Array{real(basis), ndims(equations)}
-
-  prototype = A(undef, nnodes(basis))
-  indicator_threaded  = [similar(prototype) for _ in 1:Threads.nthreads()]
-  modal_threaded      = [similar(prototype) for _ in 1:Threads.nthreads()]
+    prototype = A(undef, nnodes(basis))
+    indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
+    modal_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
 
-  return (; alpha, alpha_tmp, indicator_threaded, modal_threaded)
+    return (; alpha, alpha_tmp, indicator_threaded, modal_threaded)
 end
 
 # cache for NeuralNetworkRayHesthaven-type indicator
 function create_cache(::Type{IndicatorNeuralNetwork{NeuralNetworkRayHesthaven}},
                       equations::AbstractEquations{1}, basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
+    alpha_tmp = similar(alpha)
+    A = Array{real(basis), ndims(equations)}
 
-  alpha = Vector{real(basis)}()
-  alpha_tmp = similar(alpha)
-  A = Array{real(basis), ndims(equations)}
-
-  prototype = A(undef, nnodes(basis))
-  indicator_threaded  = [similar(prototype) for _ in 1:Threads.nthreads()]
-  neighbor_ids = Vector{Int}(undef, 2)
+    prototype = A(undef, nnodes(basis))
+    indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
+    neighbor_ids = Vector{Int}(undef, 2)
 
-  return (; alpha, alpha_tmp, indicator_threaded, neighbor_ids)
+    return (; alpha, alpha_tmp, indicator_threaded, neighbor_ids)
 end
 
 # this method is used when the indicator is constructed as for AMR
 function create_cache(typ::Type{<:IndicatorNeuralNetwork},
                       mesh, equations::AbstractEquations{1}, dg::DGSEM, cache)
-  create_cache(typ, equations, dg.basis)
+    create_cache(typ, equations, dg.basis)
 end
 
-function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkPerssonPeraire})(
-    u::AbstractArray{<:Any,3}, mesh, equations, dg::DGSEM, cache; kwargs...)
-  @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann
-
-  @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded = indicator_ann.cache
-  # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
-  #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
-  #       or just `resize!` whenever we call the relevant methods as we do now?
-  resize!(alpha, nelements(dg, cache))
-  if alpha_smooth
-    resize!(alpha_tmp, nelements(dg, cache))
-  end
-
-  @threaded for element in eachelement(dg, cache)
-    indicator = indicator_threaded[Threads.threadid()]
-    modal     = modal_threaded[Threads.threadid()]
-
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, element)
-      indicator[i] = indicator_ann.variable(u_local, equations)
+function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkPerssonPeraire})(u::AbstractArray{
+                                                                                               <:Any,
+                                                                                               3
+                                                                                               },
+                                                                              mesh,
+                                                                              equations,
+                                                                              dg::DGSEM,
+                                                                              cache;
+                                                                              kwargs...)
+    @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann
+
+    @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded = indicator_ann.cache
+    # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
+    #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
+    #       or just `resize!` whenever we call the relevant methods as we do now?
+    resize!(alpha, nelements(dg, cache))
+    if alpha_smooth
+        resize!(alpha_tmp, nelements(dg, cache))
     end
 
-    # Convert to modal representation
-    multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator)
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
+        modal = modal_threaded[Threads.threadid()]
 
-    # Calculate total energies for all modes, without highest, without two highest
-    total_energy = zero(eltype(modal))
-    for i in 1:nnodes(dg)
-      total_energy += modal[i]^2
-    end
-    total_energy_clip1 = zero(eltype(modal))
-    for i in 1:(nnodes(dg)-1)
-      total_energy_clip1 += modal[i]^2
-    end
-    total_energy_clip2 = zero(eltype(modal))
-    for i in 1:(nnodes(dg)-2)
-      total_energy_clip2 += modal[i]^2
-    end
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, element)
+            indicator[i] = indicator_ann.variable(u_local, equations)
+        end
 
-    # Calculate energy in highest modes
-    X1 = (total_energy - total_energy_clip1)/total_energy
-    X2 = (total_energy_clip1 - total_energy_clip2)/total_energy_clip1
-
-    # There are two versions of the network:
-    # The first one only takes the highest energy modes as input, the second one also the number of
-    # nodes. Automatically use the correct input by checking the number of inputs of the network.
-    if size(params(network)[1],2) == 2
-      network_input = SVector(X1, X2)
-    elseif size(params(network)[1],2) == 3
-      network_input = SVector(X1, X2, nnodes(dg))
-    end
+        # Convert to modal representation
+        multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre,
+                                       indicator)
 
-    # Scale input data
-    network_input = network_input / max(maximum(abs, network_input), one(eltype(network_input)))
-    probability_troubled_cell = network(network_input)[1]
+        # Calculate total energies for all modes, without highest, without two highest
+        total_energy = zero(eltype(modal))
+        for i in 1:nnodes(dg)
+            total_energy += modal[i]^2
+        end
+        total_energy_clip1 = zero(eltype(modal))
+        for i in 1:(nnodes(dg) - 1)
+            total_energy_clip1 += modal[i]^2
+        end
+        total_energy_clip2 = zero(eltype(modal))
+        for i in 1:(nnodes(dg) - 2)
+            total_energy_clip2 += modal[i]^2
+        end
 
-    # Compute indicator value
-    alpha[element] = probability_to_indicator(probability_troubled_cell, alpha_continuous,
-                                              alpha_amr, alpha_min, alpha_max)
-  end
+        # Calculate energy in highest modes
+        X1 = (total_energy - total_energy_clip1) / total_energy
+        X2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
+
+        # There are two versions of the network:
+        # The first one only takes the highest energy modes as input, the second one also the number of
+        # nodes. Automatically use the correct input by checking the number of inputs of the network.
+        if size(params(network)[1], 2) == 2
+            network_input = SVector(X1, X2)
+        elseif size(params(network)[1], 2) == 3
+            network_input = SVector(X1, X2, nnodes(dg))
+        end
 
-  if alpha_smooth
-    apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
-  end
+        # Scale input data
+        network_input = network_input /
+                        max(maximum(abs, network_input), one(eltype(network_input)))
+        probability_troubled_cell = network(network_input)[1]
 
-  return alpha
-end
+        # Compute indicator value
+        alpha[element] = probability_to_indicator(probability_troubled_cell,
+                                                  alpha_continuous,
+                                                  alpha_amr, alpha_min, alpha_max)
+    end
 
-function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkRayHesthaven})(
-    u::AbstractArray{<:Any,3}, mesh, equations, dg::DGSEM, cache; kwargs...)
-  @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann
+    if alpha_smooth
+        apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
+    end
 
-  @unpack alpha, alpha_tmp, indicator_threaded, neighbor_ids = indicator_ann.cache
-  # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
-  #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
-  #       or just `resize!` whenever we call the relevant methods as we do now?
-  resize!(alpha, nelements(dg, cache))
-  if alpha_smooth
-    resize!(alpha_tmp, nelements(dg, cache))
-  end
+    return alpha
+end
 
-  c2e = zeros(Int, length(mesh.tree))
-  for element in eachelement(dg, cache)
-    c2e[cache.elements.cell_ids[element]] = element
-  end
+function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkRayHesthaven})(u::AbstractArray{
+                                                                                             <:Any,
+                                                                                             3
+                                                                                             },
+                                                                            mesh,
+                                                                            equations,
+                                                                            dg::DGSEM,
+                                                                            cache;
+                                                                            kwargs...)
+    @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann
+
+    @unpack alpha, alpha_tmp, indicator_threaded, neighbor_ids = indicator_ann.cache
+    # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
+    #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
+    #       or just `resize!` whenever we call the relevant methods as we do now?
+    resize!(alpha, nelements(dg, cache))
+    if alpha_smooth
+        resize!(alpha_tmp, nelements(dg, cache))
+    end
 
+    c2e = zeros(Int, length(mesh.tree))
+    for element in eachelement(dg, cache)
+        c2e[cache.elements.cell_ids[element]] = element
+    end
 
-  @threaded for element in eachelement(dg, cache)
-    indicator = indicator_threaded[Threads.threadid()]
-    cell_id   = cache.elements.cell_ids[element]
-
-    for direction in eachdirection(mesh.tree)
-      if !has_any_neighbor(mesh.tree, cell_id, direction)
-        neighbor_ids[direction] = element
-        continue
-      end
-      if has_neighbor(mesh.tree, cell_id, direction)
-        neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
-        if has_children(mesh.tree, neighbor_cell_id) # Cell has small neighbor
-          if direction == 1
-            neighbor_ids[direction] = c2e[mesh.tree.child_ids[2, neighbor_cell_id]]
-          else
-            neighbor_ids[direction] = c2e[mesh.tree.child_ids[1, neighbor_cell_id]]
-          end
-        else # Cell has same refinement level neighbor
-          neighbor_ids[direction] = c2e[neighbor_cell_id]
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
+        cell_id = cache.elements.cell_ids[element]
+
+        for direction in eachdirection(mesh.tree)
+            if !has_any_neighbor(mesh.tree, cell_id, direction)
+                neighbor_ids[direction] = element
+                continue
+            end
+            if has_neighbor(mesh.tree, cell_id, direction)
+                neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
+                if has_children(mesh.tree, neighbor_cell_id) # Cell has small neighbor
+                    if direction == 1
+                        neighbor_ids[direction] = c2e[mesh.tree.child_ids[2,
+                                                                          neighbor_cell_id]]
+                    else
+                        neighbor_ids[direction] = c2e[mesh.tree.child_ids[1,
+                                                                          neighbor_cell_id]]
+                    end
+                else # Cell has same refinement level neighbor
+                    neighbor_ids[direction] = c2e[neighbor_cell_id]
+                end
+            else # Cell is small and has large neighbor
+                parent_id = mesh.tree.parent_ids[cell_id]
+                neighbor_cell_id = mesh.tree.neighbor_ids[direction, parent_id]
+                neighbor_ids[direction] = c2e[neighbor_cell_id]
+            end
         end
-      else # Cell is small and has large neighbor
-        parent_id = mesh.tree.parent_ids[cell_id]
-        neighbor_cell_id = mesh.tree.neighbor_ids[direction, parent_id]
-        neighbor_ids[direction] = c2e[neighbor_cell_id]
-      end
-    end
 
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, element)
-      indicator[i] = indicator_ann.variable(u_local, equations)
-    end
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, element)
+            indicator[i] = indicator_ann.variable(u_local, equations)
+        end
 
+        # Cell average and interface values of the cell
+        X2 = sum(indicator) / nnodes(dg)
+        X4 = indicator[1]
+        X5 = indicator[end]
 
-    # Cell average and interface values of the cell
-    X2 = sum(indicator)/nnodes(dg)
-    X4 = indicator[1]
-    X5 = indicator[end]
+        # Calculate indicator variables from left neighboring cell at Gauss-Lobatto nodes
+        for i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, neighbor_ids[1])
+            indicator[i] = indicator_ann.variable(u_local, equations)
+        end
+        X1 = sum(indicator) / nnodes(dg)
 
-    # Calculate indicator variables from left neighboring cell at Gauss-Lobatto nodes
-    for i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, neighbor_ids[1])
-      indicator[i] = indicator_ann.variable(u_local, equations)
+        # Calculate indicator variables from right neighboring cell at Gauss-Lobatto nodes
+        for i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, neighbor_ids[2])
+            indicator[i] = indicator_ann.variable(u_local, equations)
+        end
+        X3 = sum(indicator) / nnodes(dg)
+        network_input = SVector(X1, X2, X3, X4, X5)
+
+        # Scale input data
+        network_input = network_input /
+                        max(maximum(abs, network_input), one(eltype(network_input)))
+        probability_troubled_cell = network(network_input)[1]
+
+        # Compute indicator value
+        alpha[element] = probability_to_indicator(probability_troubled_cell,
+                                                  alpha_continuous,
+                                                  alpha_amr, alpha_min, alpha_max)
     end
-    X1 = sum(indicator)/nnodes(dg)
 
-    # Calculate indicator variables from right neighboring cell at Gauss-Lobatto nodes
-    for i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, neighbor_ids[2])
-      indicator[i] = indicator_ann.variable(u_local, equations)
+    if alpha_smooth
+        apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
     end
-    X3 = sum(indicator)/nnodes(dg)
-    network_input = SVector(X1, X2, X3, X4, X5)
-
-    # Scale input data
-    network_input = network_input / max(maximum(abs, network_input), one(eltype(network_input)))
-    probability_troubled_cell = network(network_input)[1]
-
-    # Compute indicator value
-    alpha[element] = probability_to_indicator(probability_troubled_cell, alpha_continuous,
-                                              alpha_amr, alpha_min, alpha_max)
-  end
 
-  if alpha_smooth
-    apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
-  end
-
-  return alpha
+    return alpha
 end
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/indicators_2d.jl b/src/solvers/dgsem_tree/indicators_2d.jl
index eb08657563b..085cb71ad0c 100644
--- a/src/solvers/dgsem_tree/indicators_2d.jl
+++ b/src/solvers/dgsem_tree/indicators_2d.jl
@@ -3,544 +3,579 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
-function create_cache(::Type{IndicatorHennemannGassner}, equations::AbstractEquations{2}, basis::LobattoLegendreBasis)
-
-  alpha = Vector{real(basis)}()
-  alpha_tmp = similar(alpha)
-
-  A = Array{real(basis), ndims(equations)}
-  indicator_threaded  = [A(undef, nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()]
-  modal_threaded      = [A(undef, nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()]
-  modal_tmp1_threaded = [A(undef, nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()]
-
-  return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded)
+function create_cache(::Type{IndicatorHennemannGassner},
+                      equations::AbstractEquations{2}, basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
+    alpha_tmp = similar(alpha)
+
+    A = Array{real(basis), ndims(equations)}
+    indicator_threaded = [A(undef, nnodes(basis), nnodes(basis))
+                          for _ in 1:Threads.nthreads()]
+    modal_threaded = [A(undef, nnodes(basis), nnodes(basis))
+                      for _ in 1:Threads.nthreads()]
+    modal_tmp1_threaded = [A(undef, nnodes(basis), nnodes(basis))
+                           for _ in 1:Threads.nthreads()]
+
+    return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded)
 end
 
 # this method is used when the indicator is constructed as for AMR
-function create_cache(typ::Type{IndicatorHennemannGassner}, mesh, equations::AbstractEquations{2}, dg::DGSEM, cache)
-  create_cache(typ, equations, dg.basis)
+function create_cache(typ::Type{IndicatorHennemannGassner}, mesh,
+                      equations::AbstractEquations{2}, dg::DGSEM, cache)
+    create_cache(typ, equations, dg.basis)
 end
 
-
 # Use this function barrier and unpack inside to avoid passing closures to Polyester.jl
 # with @batch (@threaded).
 # Otherwise, @threaded does not work here with Julia ARM on macOS.
 # See https://github.com/JuliaSIMD/Polyester.jl/issues/88.
-@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, u,
+@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s,
+                                                   u,
                                                    element, mesh::AbstractMesh{2},
                                                    equations, dg, cache)
-  @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg
-  @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded,
-          modal_tmp1_threaded = indicator_hg.cache
-
-  indicator  = indicator_threaded[Threads.threadid()]
-  modal      = modal_threaded[Threads.threadid()]
-  modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
-
-  # Calculate indicator variables at Gauss-Lobatto nodes
-  for j in eachnode(dg), i in eachnode(dg)
-    u_local = get_node_vars(u, equations, dg, i, j, element)
-    indicator[i, j] = indicator_hg.variable(u_local, equations)
-  end
-
-  # Convert to modal representation
-  multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator, modal_tmp1)
-
-  # Calculate total energies for all modes, without highest, without two highest
-  total_energy = zero(eltype(modal))
-  for j in 1:nnodes(dg), i in 1:nnodes(dg)
-    total_energy += modal[i, j]^2
-  end
-  total_energy_clip1 = zero(eltype(modal))
-  for j in 1:(nnodes(dg)-1), i in 1:(nnodes(dg)-1)
-    total_energy_clip1 += modal[i, j]^2
-  end
-  total_energy_clip2 = zero(eltype(modal))
-  for j in 1:(nnodes(dg)-2), i in 1:(nnodes(dg)-2)
-    total_energy_clip2 += modal[i, j]^2
-  end
-
-  # Calculate energy in higher modes
-  if !(iszero(total_energy))
-    energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
-  else
-    energy_frac_1 = zero(total_energy)
-  end
-  if !(iszero(total_energy_clip1))
-    energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
-  else
-    energy_frac_2 = zero(total_energy_clip1)
-  end
-  energy = max(energy_frac_1, energy_frac_2)
-
-  alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
-
-  # Take care of the case close to pure DG
-  if alpha_element < alpha_min
-    alpha_element = zero(alpha_element)
-  end
-
-  # Take care of the case close to pure FV
-  if alpha_element > 1 - alpha_min
-    alpha_element = one(alpha_element)
-  end
-
-  # Clip the maximum amount of FV allowed
-  alpha[element] = min(alpha_max, alpha_element)
-end
+    @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg
+    @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded,
+    modal_tmp1_threaded = indicator_hg.cache
 
+    indicator = indicator_threaded[Threads.threadid()]
+    modal = modal_threaded[Threads.threadid()]
+    modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
 
-# Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
-function apply_smoothing!(mesh::Union{TreeMesh{2}, P4estMesh{2}}, alpha, alpha_tmp, dg, cache)
-  # Copy alpha values such that smoothing is indpedenent of the element access order
-  alpha_tmp .= alpha
-
-  # Loop over interfaces
-  for interface in eachinterface(dg, cache)
-    # Get neighboring element ids
-    left  = cache.interfaces.neighbor_ids[1, interface]
-    right = cache.interfaces.neighbor_ids[2, interface]
-
-    # Apply smoothing
-    alpha[left]  = max(alpha_tmp[left],  0.5 * alpha_tmp[right], alpha[left])
-    alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left],  alpha[right])
-  end
-
-  # Loop over L2 mortars
-  for mortar in eachmortar(dg, cache)
-    # Get neighboring element ids
-    lower = cache.mortars.neighbor_ids[1, mortar]
-    upper = cache.mortars.neighbor_ids[2, mortar]
-    large = cache.mortars.neighbor_ids[3, mortar]
-
-    # Apply smoothing
-    alpha[lower] = max(alpha_tmp[lower], 0.5 * alpha_tmp[large], alpha[lower])
-    alpha[upper] = max(alpha_tmp[upper], 0.5 * alpha_tmp[large], alpha[upper])
-    alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[lower], alpha[large])
-    alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[upper], alpha[large])
-  end
-
-  return alpha
+    # Calculate indicator variables at Gauss-Lobatto nodes
+    for j in eachnode(dg), i in eachnode(dg)
+        u_local = get_node_vars(u, equations, dg, i, j, element)
+        indicator[i, j] = indicator_hg.variable(u_local, equations)
+    end
+
+    # Convert to modal representation
+    multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre,
+                                   indicator, modal_tmp1)
+
+    # Calculate total energies for all modes, without highest, without two highest
+    total_energy = zero(eltype(modal))
+    for j in 1:nnodes(dg), i in 1:nnodes(dg)
+        total_energy += modal[i, j]^2
+    end
+    total_energy_clip1 = zero(eltype(modal))
+    for j in 1:(nnodes(dg) - 1), i in 1:(nnodes(dg) - 1)
+        total_energy_clip1 += modal[i, j]^2
+    end
+    total_energy_clip2 = zero(eltype(modal))
+    for j in 1:(nnodes(dg) - 2), i in 1:(nnodes(dg) - 2)
+        total_energy_clip2 += modal[i, j]^2
+    end
+
+    # Calculate energy in higher modes
+    if !(iszero(total_energy))
+        energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
+    else
+        energy_frac_1 = zero(total_energy)
+    end
+    if !(iszero(total_energy_clip1))
+        energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
+    else
+        energy_frac_2 = zero(total_energy_clip1)
+    end
+    energy = max(energy_frac_1, energy_frac_2)
+
+    alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
+
+    # Take care of the case close to pure DG
+    if alpha_element < alpha_min
+        alpha_element = zero(alpha_element)
+    end
+
+    # Take care of the case close to pure FV
+    if alpha_element > 1 - alpha_min
+        alpha_element = one(alpha_element)
+    end
+
+    # Clip the maximum amount of FV allowed
+    alpha[element] = min(alpha_max, alpha_element)
 end
 
+# Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
+function apply_smoothing!(mesh::Union{TreeMesh{2}, P4estMesh{2}}, alpha, alpha_tmp, dg,
+                          cache)
+    # Copy alpha values such that smoothing is indpedenent of the element access order
+    alpha_tmp .= alpha
+
+    # Loop over interfaces
+    for interface in eachinterface(dg, cache)
+        # Get neighboring element ids
+        left = cache.interfaces.neighbor_ids[1, interface]
+        right = cache.interfaces.neighbor_ids[2, interface]
+
+        # Apply smoothing
+        alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[right], alpha[left])
+        alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left], alpha[right])
+    end
 
-# this method is used when the indicator is constructed as for shock-capturing volume integrals
-function create_cache(::Type{IndicatorLöhner}, equations::AbstractEquations{2}, basis::LobattoLegendreBasis)
+    # Loop over L2 mortars
+    for mortar in eachmortar(dg, cache)
+        # Get neighboring element ids
+        lower = cache.mortars.neighbor_ids[1, mortar]
+        upper = cache.mortars.neighbor_ids[2, mortar]
+        large = cache.mortars.neighbor_ids[3, mortar]
+
+        # Apply smoothing
+        alpha[lower] = max(alpha_tmp[lower], 0.5 * alpha_tmp[large], alpha[lower])
+        alpha[upper] = max(alpha_tmp[upper], 0.5 * alpha_tmp[large], alpha[upper])
+        alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[lower], alpha[large])
+        alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[upper], alpha[large])
+    end
+
+    return alpha
+end
 
-  alpha = Vector{real(basis)}()
+# this method is used when the indicator is constructed as for shock-capturing volume integrals
+function create_cache(::Type{IndicatorLöhner}, equations::AbstractEquations{2},
+                      basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
 
-  A = Array{real(basis), ndims(equations)}
-  indicator_threaded = [A(undef, nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()]
+    A = Array{real(basis), ndims(equations)}
+    indicator_threaded = [A(undef, nnodes(basis), nnodes(basis))
+                          for _ in 1:Threads.nthreads()]
 
-  return (; alpha, indicator_threaded)
+    return (; alpha, indicator_threaded)
 end
 
 # this method is used when the indicator is constructed as for AMR
-function create_cache(typ::Type{IndicatorLöhner}, mesh, equations::AbstractEquations{2}, dg::DGSEM, cache)
-  create_cache(typ, equations, dg.basis)
+function create_cache(typ::Type{IndicatorLöhner}, mesh, equations::AbstractEquations{2},
+                      dg::DGSEM, cache)
+    create_cache(typ, equations, dg.basis)
 end
 
-
-function (löhner::IndicatorLöhner)(u::AbstractArray{<:Any,4},
+function (löhner::IndicatorLöhner)(u::AbstractArray{<:Any, 4},
                                    mesh, equations, dg::DGSEM, cache;
                                    kwargs...)
-  @assert nnodes(dg) >= 3 "IndicatorLöhner only works for nnodes >= 3 (polydeg > 1)"
-  @unpack alpha, indicator_threaded = löhner.cache
-  resize!(alpha, nelements(dg, cache))
+    @assert nnodes(dg)>=3 "IndicatorLöhner only works for nnodes >= 3 (polydeg > 1)"
+    @unpack alpha, indicator_threaded = löhner.cache
+    resize!(alpha, nelements(dg, cache))
 
-  @threaded for element in eachelement(dg, cache)
-    indicator = indicator_threaded[Threads.threadid()]
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
 
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for j in eachnode(dg), i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, j, element)
-      indicator[i, j] = löhner.variable(u_local, equations)
-    end
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for j in eachnode(dg), i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, j, element)
+            indicator[i, j] = löhner.variable(u_local, equations)
+        end
 
-    estimate = zero(real(dg))
-    for j in eachnode(dg), i in 2:nnodes(dg)-1
-      # x direction
-      u0 = indicator[i,   j]
-      up = indicator[i+1, j]
-      um = indicator[i-1, j]
-      estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner))
-    end
+        estimate = zero(real(dg))
+        for j in eachnode(dg), i in 2:(nnodes(dg) - 1)
+            # x direction
+            u0 = indicator[i, j]
+            up = indicator[i + 1, j]
+            um = indicator[i - 1, j]
+            estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner))
+        end
 
-    for j in 2:nnodes(dg)-1, i in eachnode(dg)
-      # y direction
-      u0 = indicator[i, j  ]
-      up = indicator[i, j+1]
-      um = indicator[i, j-1]
-      estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner))
-    end
+        for j in 2:(nnodes(dg) - 1), i in eachnode(dg)
+            # y direction
+            u0 = indicator[i, j]
+            up = indicator[i, j + 1]
+            um = indicator[i, j - 1]
+            estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner))
+        end
 
-    # use the maximum as DG element indicator
-    alpha[element] = estimate
-  end
+        # use the maximum as DG element indicator
+        alpha[element] = estimate
+    end
 
-  return alpha
+    return alpha
 end
 
-
-
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
-function create_cache(::Type{IndicatorMax}, equations::AbstractEquations{2}, basis::LobattoLegendreBasis)
-
-  alpha = Vector{real(basis)}()
+function create_cache(::Type{IndicatorMax}, equations::AbstractEquations{2},
+                      basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
 
-  A = Array{real(basis), ndims(equations)}
-  indicator_threaded = [A(undef, nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()]
+    A = Array{real(basis), ndims(equations)}
+    indicator_threaded = [A(undef, nnodes(basis), nnodes(basis))
+                          for _ in 1:Threads.nthreads()]
 
-  return (; alpha, indicator_threaded)
+    return (; alpha, indicator_threaded)
 end
 
 # this method is used when the indicator is constructed as for AMR
-function create_cache(typ::Type{IndicatorMax}, mesh, equations::AbstractEquations{2}, dg::DGSEM, cache)
-  cache = create_cache(typ, equations, dg.basis)
+function create_cache(typ::Type{IndicatorMax}, mesh, equations::AbstractEquations{2},
+                      dg::DGSEM, cache)
+    cache = create_cache(typ, equations, dg.basis)
 end
 
-
-function (indicator_max::IndicatorMax)(u::AbstractArray{<:Any,4},
+function (indicator_max::IndicatorMax)(u::AbstractArray{<:Any, 4},
                                        mesh, equations, dg::DGSEM, cache;
                                        kwargs...)
-  @unpack alpha, indicator_threaded = indicator_max.cache
-  resize!(alpha, nelements(dg, cache))
+    @unpack alpha, indicator_threaded = indicator_max.cache
+    resize!(alpha, nelements(dg, cache))
 
-  @threaded for element in eachelement(dg, cache)
-    indicator = indicator_threaded[Threads.threadid()]
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
 
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for j in eachnode(dg), i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, j, element)
-      indicator[i, j] = indicator_max.variable(u_local, equations)
-    end
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for j in eachnode(dg), i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, j, element)
+            indicator[i, j] = indicator_max.variable(u_local, equations)
+        end
 
-    alpha[element] = maximum(indicator)
-  end
+        alpha[element] = maximum(indicator)
+    end
 
-  return alpha
+    return alpha
 end
 
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
 # empty cache is default
 function create_cache(::Type{IndicatorNeuralNetwork},
                       equations::AbstractEquations{2}, basis::LobattoLegendreBasis)
-  return NamedTuple()
+    return NamedTuple()
 end
 
 # cache for NeuralNetworkPerssonPeraire-type indicator
 function create_cache(::Type{IndicatorNeuralNetwork{NeuralNetworkPerssonPeraire}},
                       equations::AbstractEquations{2}, basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
+    alpha_tmp = similar(alpha)
+    A = Array{real(basis), ndims(equations)}
 
-  alpha = Vector{real(basis)}()
-  alpha_tmp = similar(alpha)
-  A = Array{real(basis), ndims(equations)}
+    @assert nnodes(basis)>=4 "Indicator only works for nnodes >= 4 (polydeg > 2)"
 
-  @assert nnodes(basis) >= 4 "Indicator only works for nnodes >= 4 (polydeg > 2)"
+    prototype = A(undef, nnodes(basis), nnodes(basis))
+    indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
+    modal_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
+    modal_tmp1_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
 
-  prototype = A(undef, nnodes(basis), nnodes(basis))
-  indicator_threaded  = [similar(prototype) for _ in 1:Threads.nthreads()]
-  modal_threaded      = [similar(prototype) for _ in 1:Threads.nthreads()]
-  modal_tmp1_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
-
-  return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded)
+    return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded)
 end
 
 # cache for NeuralNetworkRayHesthaven-type indicator
 function create_cache(::Type{IndicatorNeuralNetwork{NeuralNetworkRayHesthaven}},
                       equations::AbstractEquations{2}, basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
+    alpha_tmp = similar(alpha)
+    A = Array{real(basis), ndims(equations)}
 
-  alpha = Vector{real(basis)}()
-  alpha_tmp = similar(alpha)
-  A = Array{real(basis), ndims(equations)}
-
-  prototype = A(undef, nnodes(basis), nnodes(basis))
-  indicator_threaded  = [similar(prototype) for _ in 1:Threads.nthreads()]
-  modal_threaded      = [similar(prototype) for _ in 1:Threads.nthreads()]
-  modal_tmp1_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
+    prototype = A(undef, nnodes(basis), nnodes(basis))
+    indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
+    modal_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
+    modal_tmp1_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
 
-  network_input = Vector{Float64}(undef, 15)
-  neighbor_ids= Array{Int64}(undef, 8)
-  neighbor_mean = Array{Float64}(undef, 4, 3)
+    network_input = Vector{Float64}(undef, 15)
+    neighbor_ids = Array{Int64}(undef, 8)
+    neighbor_mean = Array{Float64}(undef, 4, 3)
 
-  return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded,
+    return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded,
             network_input, neighbor_ids, neighbor_mean)
 end
 
 # cache for NeuralNetworkCNN-type indicator
 function create_cache(::Type{IndicatorNeuralNetwork{NeuralNetworkCNN}},
                       equations::AbstractEquations{2}, basis::LobattoLegendreBasis)
-
-  alpha = Vector{real(basis)}()
-  alpha_tmp = similar(alpha)
-  A = Array{real(basis), ndims(equations)}
-
-  prototype = A(undef, nnodes(basis), nnodes(basis))
-  indicator_threaded  = [similar(prototype) for _ in 1:Threads.nthreads()]
-  n_cnn = 4
-  nodes,_ = gauss_lobatto_nodes_weights(nnodes(basis))
-  cnn_nodes,_= gauss_lobatto_nodes_weights(n_cnn)
-  vandermonde = polynomial_interpolation_matrix(nodes, cnn_nodes)
-  network_input = Array{Float32}(undef, n_cnn, n_cnn, 1, 1)
-
-  return (; alpha, alpha_tmp, indicator_threaded, nodes, cnn_nodes, vandermonde, network_input)
+    alpha = Vector{real(basis)}()
+    alpha_tmp = similar(alpha)
+    A = Array{real(basis), ndims(equations)}
+
+    prototype = A(undef, nnodes(basis), nnodes(basis))
+    indicator_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
+    n_cnn = 4
+    nodes, _ = gauss_lobatto_nodes_weights(nnodes(basis))
+    cnn_nodes, _ = gauss_lobatto_nodes_weights(n_cnn)
+    vandermonde = polynomial_interpolation_matrix(nodes, cnn_nodes)
+    network_input = Array{Float32}(undef, n_cnn, n_cnn, 1, 1)
+
+    return (; alpha, alpha_tmp, indicator_threaded, nodes, cnn_nodes, vandermonde,
+            network_input)
 end
 
 # this method is used when the indicator is constructed as for AMR
 function create_cache(typ::Type{<:IndicatorNeuralNetwork},
                       mesh, equations::AbstractEquations{2}, dg::DGSEM, cache)
-  create_cache(typ, equations, dg.basis)
+    create_cache(typ, equations, dg.basis)
 end
 
+function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkPerssonPeraire})(u,
+                                                                              mesh::TreeMesh{
+                                                                                             2
+                                                                                             },
+                                                                              equations,
+                                                                              dg::DGSEM,
+                                                                              cache;
+                                                                              kwargs...)
+    @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann
+
+    @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded = indicator_ann.cache
+    # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
+    #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
+    #       or just `resize!` whenever we call the relevant methods as we do now?
+    resize!(alpha, nelements(dg, cache))
+    if alpha_smooth
+        resize!(alpha_tmp, nelements(dg, cache))
+    end
 
-function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkPerssonPeraire})(
-    u, mesh::TreeMesh{2}, equations, dg::DGSEM, cache; kwargs...)
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
+        modal = modal_threaded[Threads.threadid()]
+        modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
 
-  @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for j in eachnode(dg), i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, j, element)
+            indicator[i, j] = indicator_ann.variable(u_local, equations)
+        end
 
-  @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded = indicator_ann.cache
-  # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
-  #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
-  #       or just `resize!` whenever we call the relevant methods as we do now?
-  resize!(alpha, nelements(dg, cache))
-  if alpha_smooth
-    resize!(alpha_tmp, nelements(dg, cache))
-  end
+        # Convert to modal representation
+        multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre,
+                                       indicator, modal_tmp1)
 
-  @threaded for element in eachelement(dg, cache)
-    indicator  = indicator_threaded[Threads.threadid()]
-    modal      = modal_threaded[Threads.threadid()]
-    modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
+        # Calculate total energies for all modes, without highest, without two highest
+        total_energy = zero(eltype(modal))
+        for j in 1:nnodes(dg), i in 1:nnodes(dg)
+            total_energy += modal[i, j]^2
+        end
+        total_energy_clip1 = zero(eltype(modal))
+        for j in 1:(nnodes(dg) - 1), i in 1:(nnodes(dg) - 1)
+            total_energy_clip1 += modal[i, j]^2
+        end
+        total_energy_clip2 = zero(eltype(modal))
+        for j in 1:(nnodes(dg) - 2), i in 1:(nnodes(dg) - 2)
+            total_energy_clip2 += modal[i, j]^2
+        end
+        total_energy_clip3 = zero(eltype(modal))
+        for j in 1:(nnodes(dg) - 3), i in 1:(nnodes(dg) - 3)
+            total_energy_clip3 += modal[i, j]^2
+        end
 
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for j in eachnode(dg), i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, j, element)
-      indicator[i, j] = indicator_ann.variable(u_local, equations)
+        # Calculate energy in higher modes and polynomial degree for the network input
+        X1 = (total_energy - total_energy_clip1) / total_energy
+        X2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
+        X3 = (total_energy_clip2 - total_energy_clip3) / total_energy_clip2
+        X4 = nnodes(dg)
+        network_input = SVector(X1, X2, X3, X4)
+
+        # Scale input data
+        network_input = network_input /
+                        max(maximum(abs, network_input), one(eltype(network_input)))
+        probability_troubled_cell = network(network_input)[1]
+
+        # Compute indicator value
+        alpha[element] = probability_to_indicator(probability_troubled_cell,
+                                                  alpha_continuous,
+                                                  alpha_amr, alpha_min, alpha_max)
     end
 
-    # Convert to modal representation
-    multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator, modal_tmp1)
-
-    # Calculate total energies for all modes, without highest, without two highest
-    total_energy = zero(eltype(modal))
-    for j in 1:nnodes(dg), i in 1:nnodes(dg)
-      total_energy += modal[i, j]^2
-    end
-    total_energy_clip1 = zero(eltype(modal))
-    for j in 1:(nnodes(dg)-1), i in 1:(nnodes(dg)-1)
-      total_energy_clip1 += modal[i, j]^2
+    if alpha_smooth
+        apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
     end
-    total_energy_clip2 = zero(eltype(modal))
-    for j in 1:(nnodes(dg)-2), i in 1:(nnodes(dg)-2)
-      total_energy_clip2 += modal[i, j]^2
-    end
-    total_energy_clip3 = zero(eltype(modal))
-    for j in 1:(nnodes(dg)-3), i in 1:(nnodes(dg)-3)
-      total_energy_clip3 += modal[i, j]^2
-    end
-
-    # Calculate energy in higher modes and polynomial degree for the network input
-    X1 = (total_energy - total_energy_clip1)/total_energy
-    X2 = (total_energy_clip1 - total_energy_clip2)/total_energy_clip1
-    X3 = (total_energy_clip2 - total_energy_clip3)/total_energy_clip2
-    X4 = nnodes(dg)
-    network_input = SVector(X1, X2, X3, X4)
 
-    # Scale input data
-    network_input = network_input / max(maximum(abs, network_input), one(eltype(network_input)))
-    probability_troubled_cell = network(network_input)[1]
-
-    # Compute indicator value
-    alpha[element] = probability_to_indicator(probability_troubled_cell, alpha_continuous,
-                                              alpha_amr, alpha_min, alpha_max)
-  end
-
-  if alpha_smooth
-    apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
-  end
-
-  return alpha
+    return alpha
 end
 
+function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkRayHesthaven})(u,
+                                                                            mesh::TreeMesh{
+                                                                                           2
+                                                                                           },
+                                                                            equations,
+                                                                            dg::DGSEM,
+                                                                            cache;
+                                                                            kwargs...)
+    @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann
+
+    @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded, network_input, neighbor_ids, neighbor_mean = indicator_ann.cache #X, network_input
+    # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
+    #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
+    #       or just `resize!` whenever we call the relevant methods as we do now?
+    resize!(alpha, nelements(dg, cache))
+    if alpha_smooth
+        resize!(alpha_tmp, nelements(dg, cache))
+    end
 
-function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkRayHesthaven})(
-    u, mesh::TreeMesh{2}, equations, dg::DGSEM, cache; kwargs...)
+    c2e = zeros(Int, length(mesh.tree))
+    for element in eachelement(dg, cache)
+        c2e[cache.elements.cell_ids[element]] = element
+    end
 
-  @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann
+    X = Array{Float64}(undef, 3, nelements(dg, cache))
 
-  @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded, network_input, neighbor_ids, neighbor_mean = indicator_ann.cache #X, network_input
-  # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
-  #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
-  #       or just `resize!` whenever we call the relevant methods as we do now?
-  resize!(alpha, nelements(dg, cache))
-  if alpha_smooth
-    resize!(alpha_tmp, nelements(dg, cache))
-  end
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
+        modal = modal_threaded[Threads.threadid()]
+        modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
 
-  c2e = zeros(Int, length(mesh.tree))
-  for element in eachelement(dg, cache)
-    c2e[cache.elements.cell_ids[element]] = element
-  end
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for j in eachnode(dg), i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, j, element)
+            indicator[i, j] = indicator_ann.variable(u_local, equations)
+        end
 
-  X = Array{Float64}(undef, 3, nelements(dg, cache))
+        # Convert to modal representation
+        multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre,
+                                       indicator, modal_tmp1)
+        # Save linear modal coefficients for the network input
+        X[1, element] = modal[1, 1]
+        X[2, element] = modal[1, 2]
+        X[3, element] = modal[2, 1]
+    end
 
-  @threaded for element in eachelement(dg, cache)
-    indicator  = indicator_threaded[Threads.threadid()]
-    modal      = modal_threaded[Threads.threadid()]
-    modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
+    @threaded for element in eachelement(dg, cache)
+        cell_id = cache.elements.cell_ids[element]
+
+        network_input[1] = X[1, element]
+        network_input[2] = X[2, element]
+        network_input[3] = X[3, element]
+
+        for direction in eachdirection(mesh.tree)
+            if direction == 1 # -x
+                dir = 4
+            elseif direction == 2 # +x
+                dir = 1
+            elseif direction == 3 # -y
+                dir = 3
+            elseif direction == 4 # +y
+                dir = 2
+            end
 
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for j in eachnode(dg), i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, j, element)
-      indicator[i, j] = indicator_ann.variable(u_local, equations)
-    end
+            # Of no neighbor exists and current cell is not small
+            if !has_any_neighbor(mesh.tree, cell_id, direction)
+                network_input[3 * dir + 1] = X[1, element]
+                network_input[3 * dir + 2] = X[2, element]
+                network_input[3 * dir + 3] = X[3, element]
+                continue
+            end
 
-    # Convert to modal representation
-    multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator, modal_tmp1)
-    # Save linear modal coefficients for the network input
-    X[1,element] = modal[1,1]
-    X[2,element] = modal[1,2]
-    X[3,element] = modal[2,1]
-  end
-
-  @threaded for element in eachelement(dg, cache)
-    cell_id = cache.elements.cell_ids[element]
-
-    network_input[1] = X[1,element]
-    network_input[2] = X[2,element]
-    network_input[3] = X[3,element]
-
-    for direction in eachdirection(mesh.tree)
-      if direction == 1 # -x
-          dir = 4
-      elseif direction == 2 # +x
-          dir = 1
-      elseif direction == 3 # -y
-          dir = 3
-      elseif direction == 4 # +y
-          dir = 2
-      end
-
-      # Of no neighbor exists and current cell is not small
-      if !has_any_neighbor(mesh.tree, cell_id, direction)
-        network_input[3*dir+1] = X[1, element]
-        network_input[3*dir+2] = X[2, element]
-        network_input[3*dir+3] = X[3, element]
-        continue
-      end
-
-      # Get Input data from neighbors
-      if has_neighbor(mesh.tree, cell_id, direction)
-        neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
-        if has_children(mesh.tree, neighbor_cell_id) # Cell has small neighbor
-          # Mean over 4 neighbor cells
-          neighbor_ids[1] = mesh.tree.child_ids[1, neighbor_cell_id]
-          neighbor_ids[2] = mesh.tree.child_ids[2, neighbor_cell_id]
-          neighbor_ids[3] = mesh.tree.child_ids[3, neighbor_cell_id]
-          neighbor_ids[4] = mesh.tree.child_ids[4, neighbor_cell_id]
-
-          for i in 1:4
-            if has_children(mesh.tree, neighbor_ids[i])
-              neighbor_ids5 = c2e[mesh.tree.child_ids[1, neighbor_ids[i]]]
-              neighbor_ids6 = c2e[mesh.tree.child_ids[2, neighbor_ids[i]]]
-              neighbor_ids7 = c2e[mesh.tree.child_ids[3, neighbor_ids[i]]]
-              neighbor_ids8 = c2e[mesh.tree.child_ids[4, neighbor_ids[i]]]
-
-              neighbor_mean[i,1] = (X[1,neighbor_ids5] + X[1,neighbor_ids6] + X[1,neighbor_ids7] + X[1,neighbor_ids8])/4
-              neighbor_mean[i,2] = (X[2,neighbor_ids5] + X[2,neighbor_ids6] + X[2,neighbor_ids7] + X[2,neighbor_ids8])/4
-              neighbor_mean[i,3] = (X[3,neighbor_ids5] + X[3,neighbor_ids6] + X[3,neighbor_ids7] + X[3,neighbor_ids8])/4
-            else
-              neighbor_id = c2e[neighbor_ids[i]]
-              neighbor_mean[i,1] = X[1,neighbor_id]
-              neighbor_mean[i,2] = X[2,neighbor_id]
-              neighbor_mean[i,3] = X[3,neighbor_id]
+            # Get Input data from neighbors
+            if has_neighbor(mesh.tree, cell_id, direction)
+                neighbor_cell_id = mesh.tree.neighbor_ids[direction, cell_id]
+                if has_children(mesh.tree, neighbor_cell_id) # Cell has small neighbor
+                    # Mean over 4 neighbor cells
+                    neighbor_ids[1] = mesh.tree.child_ids[1, neighbor_cell_id]
+                    neighbor_ids[2] = mesh.tree.child_ids[2, neighbor_cell_id]
+                    neighbor_ids[3] = mesh.tree.child_ids[3, neighbor_cell_id]
+                    neighbor_ids[4] = mesh.tree.child_ids[4, neighbor_cell_id]
+
+                    for i in 1:4
+                        if has_children(mesh.tree, neighbor_ids[i])
+                            neighbor_ids5 = c2e[mesh.tree.child_ids[1, neighbor_ids[i]]]
+                            neighbor_ids6 = c2e[mesh.tree.child_ids[2, neighbor_ids[i]]]
+                            neighbor_ids7 = c2e[mesh.tree.child_ids[3, neighbor_ids[i]]]
+                            neighbor_ids8 = c2e[mesh.tree.child_ids[4, neighbor_ids[i]]]
+
+                            neighbor_mean[i, 1] = (X[1, neighbor_ids5] +
+                                                   X[1, neighbor_ids6] +
+                                                   X[1, neighbor_ids7] +
+                                                   X[1, neighbor_ids8]) / 4
+                            neighbor_mean[i, 2] = (X[2, neighbor_ids5] +
+                                                   X[2, neighbor_ids6] +
+                                                   X[2, neighbor_ids7] +
+                                                   X[2, neighbor_ids8]) / 4
+                            neighbor_mean[i, 3] = (X[3, neighbor_ids5] +
+                                                   X[3, neighbor_ids6] +
+                                                   X[3, neighbor_ids7] +
+                                                   X[3, neighbor_ids8]) / 4
+                        else
+                            neighbor_id = c2e[neighbor_ids[i]]
+                            neighbor_mean[i, 1] = X[1, neighbor_id]
+                            neighbor_mean[i, 2] = X[2, neighbor_id]
+                            neighbor_mean[i, 3] = X[3, neighbor_id]
+                        end
+                    end
+                    network_input[3 * dir + 1] = (neighbor_mean[1, 1] +
+                                                  neighbor_mean[2, 1] +
+                                                  neighbor_mean[3, 1] +
+                                                  neighbor_mean[4, 1]) / 4
+                    network_input[3 * dir + 2] = (neighbor_mean[1, 2] +
+                                                  neighbor_mean[2, 2] +
+                                                  neighbor_mean[3, 2] +
+                                                  neighbor_mean[4, 2]) / 4
+                    network_input[3 * dir + 3] = (neighbor_mean[1, 3] +
+                                                  neighbor_mean[2, 3] +
+                                                  neighbor_mean[3, 3] +
+                                                  neighbor_mean[4, 3]) / 4
+
+                else # Cell has same refinement level neighbor
+                    neighbor_id = c2e[neighbor_cell_id]
+                    network_input[3 * dir + 1] = X[1, neighbor_id]
+                    network_input[3 * dir + 2] = X[2, neighbor_id]
+                    network_input[3 * dir + 3] = X[3, neighbor_id]
+                end
+            else # Cell is small and has large neighbor
+                parent_id = mesh.tree.parent_ids[cell_id]
+                neighbor_id = c2e[mesh.tree.neighbor_ids[direction, parent_id]]
+
+                network_input[3 * dir + 1] = X[1, neighbor_id]
+                network_input[3 * dir + 2] = X[2, neighbor_id]
+                network_input[3 * dir + 3] = X[3, neighbor_id]
             end
-          end
-          network_input[3*dir+1] = (neighbor_mean[1,1] + neighbor_mean[2,1] + neighbor_mean[3,1] + neighbor_mean[4,1])/4
-          network_input[3*dir+2] = (neighbor_mean[1,2] + neighbor_mean[2,2] + neighbor_mean[3,2] + neighbor_mean[4,2])/4
-          network_input[3*dir+3] = (neighbor_mean[1,3] + neighbor_mean[2,3] + neighbor_mean[3,3] + neighbor_mean[4,3])/4
-
-        else # Cell has same refinement level neighbor
-          neighbor_id = c2e[neighbor_cell_id]
-          network_input[3*dir+1] = X[1,neighbor_id]
-          network_input[3*dir+2] = X[2,neighbor_id]
-          network_input[3*dir+3] = X[3,neighbor_id]
         end
-      else # Cell is small and has large neighbor
-        parent_id = mesh.tree.parent_ids[cell_id]
-        neighbor_id = c2e[mesh.tree.neighbor_ids[direction, parent_id]]
-
-        network_input[3*dir+1] = X[1,neighbor_id]
-        network_input[3*dir+2] = X[2,neighbor_id]
-        network_input[3*dir+3] = X[3,neighbor_id]
-      end
-    end
 
-    # Scale input data
-    network_input = network_input / max(maximum(abs, network_input), one(eltype(network_input)))
-    probability_troubled_cell = network(network_input)[1]
+        # Scale input data
+        network_input = network_input /
+                        max(maximum(abs, network_input), one(eltype(network_input)))
+        probability_troubled_cell = network(network_input)[1]
 
-    # Compute indicator value
-    alpha[element] = probability_to_indicator(probability_troubled_cell, alpha_continuous,
-                                              alpha_amr, alpha_min, alpha_max)
-  end
+        # Compute indicator value
+        alpha[element] = probability_to_indicator(probability_troubled_cell,
+                                                  alpha_continuous,
+                                                  alpha_amr, alpha_min, alpha_max)
+    end
 
-  if alpha_smooth
-    apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
-  end
+    if alpha_smooth
+        apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
+    end
 
-  return alpha
+    return alpha
 end
 
+function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkCNN})(u, mesh::TreeMesh{2},
+                                                                   equations, dg::DGSEM,
+                                                                   cache; kwargs...)
+    @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann
+
+    @unpack alpha, alpha_tmp, indicator_threaded, nodes, cnn_nodes, vandermonde, network_input = indicator_ann.cache
+    # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
+    #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
+    #       or just `resize!` whenever we call the relevant methods as we do now?
+    resize!(alpha, nelements(dg, cache))
+    if alpha_smooth
+        resize!(alpha_tmp, nelements(dg, cache))
+    end
 
-function (indicator_ann::IndicatorNeuralNetwork{NeuralNetworkCNN})(
-    u, mesh::TreeMesh{2}, equations, dg::DGSEM, cache; kwargs...)
-  @unpack indicator_type, alpha_max, alpha_min, alpha_smooth, alpha_continuous, alpha_amr, variable, network = indicator_ann
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
 
-  @unpack alpha, alpha_tmp, indicator_threaded, nodes, cnn_nodes, vandermonde, network_input = indicator_ann.cache
-  # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
-  #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
-  #       or just `resize!` whenever we call the relevant methods as we do now?
-  resize!(alpha, nelements(dg, cache))
-  if alpha_smooth
-    resize!(alpha_tmp, nelements(dg, cache))
-  end
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for j in eachnode(dg), i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, j, element)
+            indicator[i, j] = indicator_ann.variable(u_local, equations)
+        end
+
+        # Interpolate nodal data to 4x4 LGL nodes
+        for j in 1:4, i in 1:4
+            acc = zero(eltype(indicator))
+            for jj in eachnode(dg), ii in eachnode(dg)
+                acc += vandermonde[i, ii] * indicator[ii, jj] * vandermonde[j, jj]
+            end
+            network_input[i, j, 1, 1] = acc
+        end
 
-  @threaded for element in eachelement(dg, cache)
-    indicator  = indicator_threaded[Threads.threadid()]
+        # Scale input data
+        network_input = network_input /
+                        max(maximum(abs, network_input), one(eltype(network_input)))
+        probability_troubled_cell = network(network_input)[1]
 
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for j in eachnode(dg), i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, j, element)
-      indicator[i, j] = indicator_ann.variable(u_local, equations)
+        # Compute indicator value
+        alpha[element] = probability_to_indicator(probability_troubled_cell,
+                                                  alpha_continuous,
+                                                  alpha_amr, alpha_min, alpha_max)
     end
 
-    # Interpolate nodal data to 4x4 LGL nodes
-    for j in 1:4, i in 1:4
-      acc = zero(eltype(indicator))
-      for jj in eachnode(dg), ii in eachnode(dg)
-        acc += vandermonde[i,ii] * indicator[ii,jj] * vandermonde[j,jj]
-      end
-      network_input[i,j,1,1] = acc
+    if alpha_smooth
+        apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
     end
 
-    # Scale input data
-    network_input = network_input / max(maximum(abs, network_input), one(eltype(network_input)))
-    probability_troubled_cell = network(network_input)[1]
-
-    # Compute indicator value
-    alpha[element] = probability_to_indicator(probability_troubled_cell, alpha_continuous,
-                                              alpha_amr, alpha_min, alpha_max)
-  end
-
-  if alpha_smooth
-    apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
-  end
-
-  return alpha
+    return alpha
 end
-
 end # @muladd
diff --git a/src/solvers/dgsem_tree/indicators_3d.jl b/src/solvers/dgsem_tree/indicators_3d.jl
index c1e7aee886a..69041ed1298 100644
--- a/src/solvers/dgsem_tree/indicators_3d.jl
+++ b/src/solvers/dgsem_tree/indicators_3d.jl
@@ -3,242 +3,250 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # this method is used when the indicator is constructed as for shock-capturing volume integrals
-function create_cache(::Type{IndicatorHennemannGassner}, equations::AbstractEquations{3}, basis::LobattoLegendreBasis)
-
-  alpha = Vector{real(basis)}()
-  alpha_tmp = similar(alpha)
-
-  A = Array{real(basis), ndims(equations)}
-  indicator_threaded  = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()]
-  modal_threaded      = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()]
-  modal_tmp1_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()]
-  modal_tmp2_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()]
-
-  return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded, modal_tmp2_threaded)
+function create_cache(::Type{IndicatorHennemannGassner},
+                      equations::AbstractEquations{3}, basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
+    alpha_tmp = similar(alpha)
+
+    A = Array{real(basis), ndims(equations)}
+    indicator_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis))
+                          for _ in 1:Threads.nthreads()]
+    modal_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis))
+                      for _ in 1:Threads.nthreads()]
+    modal_tmp1_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis))
+                           for _ in 1:Threads.nthreads()]
+    modal_tmp2_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis))
+                           for _ in 1:Threads.nthreads()]
+
+    return (; alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded,
+            modal_tmp2_threaded)
 end
 
 # this method is used when the indicator is constructed as for AMR
-function create_cache(typ::Type{IndicatorHennemannGassner}, mesh, equations::AbstractEquations{3}, dg::DGSEM, cache)
-  create_cache(typ, equations, dg.basis)
+function create_cache(typ::Type{IndicatorHennemannGassner}, mesh,
+                      equations::AbstractEquations{3}, dg::DGSEM, cache)
+    create_cache(typ, equations, dg.basis)
 end
 
-
 # Use this function barrier and unpack inside to avoid passing closures to Polyester.jl
 # with @batch (@threaded).
 # Otherwise, @threaded does not work here with Julia ARM on macOS.
 # See https://github.com/JuliaSIMD/Polyester.jl/issues/88.
-@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s, u,
+@inline function calc_indicator_hennemann_gassner!(indicator_hg, threshold, parameter_s,
+                                                   u,
                                                    element, mesh::AbstractMesh{3},
                                                    equations, dg, cache)
-  @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg
-  @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded,
-          modal_tmp1_threaded, modal_tmp2_threaded = indicator_hg.cache
-
-  indicator  = indicator_threaded[Threads.threadid()]
-  modal      = modal_threaded[Threads.threadid()]
-  modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
-  modal_tmp2 = modal_tmp2_threaded[Threads.threadid()]
-
-  # Calculate indicator variables at Gauss-Lobatto nodes
-  for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-    u_local = get_node_vars(u, equations, dg, i, j, k, element)
-    indicator[i, j, k] = indicator_hg.variable(u_local, equations)
-  end
-
-  # Convert to modal representation
-  multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre, indicator, modal_tmp1, modal_tmp2)
-
-  # Calculate total energies for all modes, without highest, without two highest
-  total_energy = zero(eltype(modal))
-  for k in 1:nnodes(dg), j in 1:nnodes(dg), i in 1:nnodes(dg)
-    total_energy += modal[i, j, k]^2
-  end
-  total_energy_clip1 = zero(eltype(modal))
-  for k in 1:(nnodes(dg)-1), j in 1:(nnodes(dg)-1), i in 1:(nnodes(dg)-1)
-    total_energy_clip1 += modal[i, j, k]^2
-  end
-  total_energy_clip2 = zero(eltype(modal))
-  for k in 1:(nnodes(dg)-2), j in 1:(nnodes(dg)-2), i in 1:(nnodes(dg)-2)
-    total_energy_clip2 += modal[i, j, k]^2
-  end
-
-  # Calculate energy in higher modes
-  if !(iszero(total_energy))
-    energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
-  else
-    energy_frac_1 = zero(total_energy)
-  end
-  if !(iszero(total_energy_clip1))
-    energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
-  else
-    energy_frac_2 = zero(total_energy_clip1)
-  end
-  energy = max(energy_frac_1, energy_frac_2)
-
-  alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
-
-  # Take care of the case close to pure DG
-  if alpha_element < alpha_min
-    alpha_element = zero(alpha_element)
-  end
-
-  # Take care of the case close to pure FV
-  if alpha_element > 1 - alpha_min
-    alpha_element = one(alpha_element)
-  end
-
-  # Clip the maximum amount of FV allowed
-  alpha[element] = min(alpha_max, alpha_element)
-end
+    @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg
+    @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded,
+    modal_tmp1_threaded, modal_tmp2_threaded = indicator_hg.cache
 
+    indicator = indicator_threaded[Threads.threadid()]
+    modal = modal_threaded[Threads.threadid()]
+    modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
+    modal_tmp2 = modal_tmp2_threaded[Threads.threadid()]
 
-function apply_smoothing!(mesh::Union{TreeMesh{3}, P4estMesh{3}}, alpha, alpha_tmp, dg, cache)
-
-  # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
-  # Copy alpha values such that smoothing is indpedenent of the element access order
-  alpha_tmp .= alpha
-
-  # Loop over interfaces
-  for interface in eachinterface(dg, cache)
-    # Get neighboring element ids
-    left  = cache.interfaces.neighbor_ids[1, interface]
-    right = cache.interfaces.neighbor_ids[2, interface]
-
-    # Apply smoothing
-    alpha[left]  = max(alpha_tmp[left],  0.5 * alpha_tmp[right], alpha[left])
-    alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left],  alpha[right])
-  end
-
-  # Loop over L2 mortars
-  for mortar in eachmortar(dg, cache)
-    # Get neighboring element ids
-    lower_left  = cache.mortars.neighbor_ids[1, mortar]
-    lower_right = cache.mortars.neighbor_ids[2, mortar]
-    upper_left  = cache.mortars.neighbor_ids[3, mortar]
-    upper_right = cache.mortars.neighbor_ids[4, mortar]
-    large       = cache.mortars.neighbor_ids[5, mortar]
-
-    # Apply smoothing
-    alpha[lower_left]  = max(alpha_tmp[lower_left],  0.5 * alpha_tmp[large], alpha[lower_left])
-    alpha[lower_right] = max(alpha_tmp[lower_right], 0.5 * alpha_tmp[large], alpha[lower_right])
-    alpha[upper_left]  = max(alpha_tmp[upper_left],  0.5 * alpha_tmp[large], alpha[upper_left])
-    alpha[upper_right] = max(alpha_tmp[upper_right], 0.5 * alpha_tmp[large], alpha[upper_right])
-
-    alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[lower_left],  alpha[large])
-    alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[lower_right], alpha[large])
-    alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[upper_left],  alpha[large])
-    alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[upper_right], alpha[large])
-  end
+    # Calculate indicator variables at Gauss-Lobatto nodes
+    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+        u_local = get_node_vars(u, equations, dg, i, j, k, element)
+        indicator[i, j, k] = indicator_hg.variable(u_local, equations)
+    end
 
-end
+    # Convert to modal representation
+    multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre,
+                                   indicator, modal_tmp1, modal_tmp2)
 
+    # Calculate total energies for all modes, without highest, without two highest
+    total_energy = zero(eltype(modal))
+    for k in 1:nnodes(dg), j in 1:nnodes(dg), i in 1:nnodes(dg)
+        total_energy += modal[i, j, k]^2
+    end
+    total_energy_clip1 = zero(eltype(modal))
+    for k in 1:(nnodes(dg) - 1), j in 1:(nnodes(dg) - 1), i in 1:(nnodes(dg) - 1)
+        total_energy_clip1 += modal[i, j, k]^2
+    end
+    total_energy_clip2 = zero(eltype(modal))
+    for k in 1:(nnodes(dg) - 2), j in 1:(nnodes(dg) - 2), i in 1:(nnodes(dg) - 2)
+        total_energy_clip2 += modal[i, j, k]^2
+    end
 
-# this method is used when the indicator is constructed as for shock-capturing volume integrals
-function create_cache(::Type{IndicatorLöhner}, equations::AbstractEquations{3}, basis::LobattoLegendreBasis)
+    # Calculate energy in higher modes
+    if !(iszero(total_energy))
+        energy_frac_1 = (total_energy - total_energy_clip1) / total_energy
+    else
+        energy_frac_1 = zero(total_energy)
+    end
+    if !(iszero(total_energy_clip1))
+        energy_frac_2 = (total_energy_clip1 - total_energy_clip2) / total_energy_clip1
+    else
+        energy_frac_2 = zero(total_energy_clip1)
+    end
+    energy = max(energy_frac_1, energy_frac_2)
 
-  alpha = Vector{real(basis)}()
+    alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
 
-  A = Array{real(basis), ndims(equations)}
-  indicator_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()]
+    # Take care of the case close to pure DG
+    if alpha_element < alpha_min
+        alpha_element = zero(alpha_element)
+    end
 
-  return (; alpha, indicator_threaded)
-end
+    # Take care of the case close to pure FV
+    if alpha_element > 1 - alpha_min
+        alpha_element = one(alpha_element)
+    end
 
-# this method is used when the indicator is constructed as for AMR
-function create_cache(typ::Type{IndicatorLöhner}, mesh, equations::AbstractEquations{3}, dg::DGSEM, cache)
-  create_cache(typ, equations, dg.basis)
+    # Clip the maximum amount of FV allowed
+    alpha[element] = min(alpha_max, alpha_element)
 end
 
+function apply_smoothing!(mesh::Union{TreeMesh{3}, P4estMesh{3}}, alpha, alpha_tmp, dg,
+                          cache)
 
-function (löhner::IndicatorLöhner)(u::AbstractArray{<:Any,5},
-                                   mesh, equations, dg::DGSEM, cache;
-                                   kwargs...)
-  @assert nnodes(dg) >= 3 "IndicatorLöhner only works for nnodes >= 3 (polydeg > 1)"
-  @unpack alpha, indicator_threaded = löhner.cache
-  resize!(alpha, nelements(dg, cache))
+    # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
+    # Copy alpha values such that smoothing is indpedenent of the element access order
+    alpha_tmp .= alpha
 
-  @threaded for element in eachelement(dg, cache)
-    indicator = indicator_threaded[Threads.threadid()]
+    # Loop over interfaces
+    for interface in eachinterface(dg, cache)
+        # Get neighboring element ids
+        left = cache.interfaces.neighbor_ids[1, interface]
+        right = cache.interfaces.neighbor_ids[2, interface]
 
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, j, k, element)
-      indicator[i, j, k] = löhner.variable(u_local, equations)
+        # Apply smoothing
+        alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[right], alpha[left])
+        alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left], alpha[right])
     end
 
-    estimate = zero(real(dg))
-    for k in eachnode(dg), j in eachnode(dg), i in 2:nnodes(dg)-1
-      # x direction
-      u0 = indicator[i,   j, k]
-      up = indicator[i+1, j, k]
-      um = indicator[i-1, j, k]
-      estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner))
+    # Loop over L2 mortars
+    for mortar in eachmortar(dg, cache)
+        # Get neighboring element ids
+        lower_left = cache.mortars.neighbor_ids[1, mortar]
+        lower_right = cache.mortars.neighbor_ids[2, mortar]
+        upper_left = cache.mortars.neighbor_ids[3, mortar]
+        upper_right = cache.mortars.neighbor_ids[4, mortar]
+        large = cache.mortars.neighbor_ids[5, mortar]
+
+        # Apply smoothing
+        alpha[lower_left] = max(alpha_tmp[lower_left], 0.5 * alpha_tmp[large],
+                                alpha[lower_left])
+        alpha[lower_right] = max(alpha_tmp[lower_right], 0.5 * alpha_tmp[large],
+                                 alpha[lower_right])
+        alpha[upper_left] = max(alpha_tmp[upper_left], 0.5 * alpha_tmp[large],
+                                alpha[upper_left])
+        alpha[upper_right] = max(alpha_tmp[upper_right], 0.5 * alpha_tmp[large],
+                                 alpha[upper_right])
+
+        alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[lower_left], alpha[large])
+        alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[lower_right], alpha[large])
+        alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[upper_left], alpha[large])
+        alpha[large] = max(alpha_tmp[large], 0.5 * alpha_tmp[upper_right], alpha[large])
     end
+end
 
-    for k in eachnode(dg), j in 2:nnodes(dg)-1, i in eachnode(dg)
-      # y direction
-      u0 = indicator[i, j,   k]
-      up = indicator[i, j+1, k]
-      um = indicator[i, j-1, k]
-      estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner))
-    end
+# this method is used when the indicator is constructed as for shock-capturing volume integrals
+function create_cache(::Type{IndicatorLöhner}, equations::AbstractEquations{3},
+                      basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
 
-    for k in 2:nnodes(dg)-1, j in eachnode(dg), i in eachnode(dg)
-      # y direction
-      u0 = indicator[i, j, k  ]
-      up = indicator[i, j, k+1]
-      um = indicator[i, j, k-1]
-      estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner))
-    end
+    A = Array{real(basis), ndims(equations)}
+    indicator_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis))
+                          for _ in 1:Threads.nthreads()]
 
-    # use the maximum as DG element indicator
-    alpha[element] = estimate
-  end
+    return (; alpha, indicator_threaded)
+end
 
-  return alpha
+# this method is used when the indicator is constructed as for AMR
+function create_cache(typ::Type{IndicatorLöhner}, mesh, equations::AbstractEquations{3},
+                      dg::DGSEM, cache)
+    create_cache(typ, equations, dg.basis)
 end
 
+function (löhner::IndicatorLöhner)(u::AbstractArray{<:Any, 5},
+                                   mesh, equations, dg::DGSEM, cache;
+                                   kwargs...)
+    @assert nnodes(dg)>=3 "IndicatorLöhner only works for nnodes >= 3 (polydeg > 1)"
+    @unpack alpha, indicator_threaded = löhner.cache
+    resize!(alpha, nelements(dg, cache))
+
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
+
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, j, k, element)
+            indicator[i, j, k] = löhner.variable(u_local, equations)
+        end
+
+        estimate = zero(real(dg))
+        for k in eachnode(dg), j in eachnode(dg), i in 2:(nnodes(dg) - 1)
+            # x direction
+            u0 = indicator[i, j, k]
+            up = indicator[i + 1, j, k]
+            um = indicator[i - 1, j, k]
+            estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner))
+        end
+
+        for k in eachnode(dg), j in 2:(nnodes(dg) - 1), i in eachnode(dg)
+            # y direction
+            u0 = indicator[i, j, k]
+            up = indicator[i, j + 1, k]
+            um = indicator[i, j - 1, k]
+            estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner))
+        end
+
+        for k in 2:(nnodes(dg) - 1), j in eachnode(dg), i in eachnode(dg)
+            # y direction
+            u0 = indicator[i, j, k]
+            up = indicator[i, j, k + 1]
+            um = indicator[i, j, k - 1]
+            estimate = max(estimate, local_löhner_estimate(um, u0, up, löhner))
+        end
+
+        # use the maximum as DG element indicator
+        alpha[element] = estimate
+    end
 
-# this method is used when the indicator is constructed as for shock-capturing volume integrals
-function create_cache(::Type{IndicatorMax}, equations::AbstractEquations{3}, basis::LobattoLegendreBasis)
+    return alpha
+end
 
-  alpha = Vector{real(basis)}()
+# this method is used when the indicator is constructed as for shock-capturing volume integrals
+function create_cache(::Type{IndicatorMax}, equations::AbstractEquations{3},
+                      basis::LobattoLegendreBasis)
+    alpha = Vector{real(basis)}()
 
-  A = Array{real(basis), ndims(equations)}
-  indicator_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis)) for _ in 1:Threads.nthreads()]
+    A = Array{real(basis), ndims(equations)}
+    indicator_threaded = [A(undef, nnodes(basis), nnodes(basis), nnodes(basis))
+                          for _ in 1:Threads.nthreads()]
 
-  return (; alpha, indicator_threaded)
+    return (; alpha, indicator_threaded)
 end
 
 # this method is used when the indicator is constructed as for AMR
-function create_cache(typ::Type{IndicatorMax}, mesh, equations::AbstractEquations{3}, dg::DGSEM, cache)
-  cache = create_cache(typ, equations, dg.basis)
+function create_cache(typ::Type{IndicatorMax}, mesh, equations::AbstractEquations{3},
+                      dg::DGSEM, cache)
+    cache = create_cache(typ, equations, dg.basis)
 end
 
-
-function (indicator_max::IndicatorMax)(u::AbstractArray{<:Any,5},
+function (indicator_max::IndicatorMax)(u::AbstractArray{<:Any, 5},
                                        mesh, equations, dg::DGSEM, cache;
                                        kwargs...)
-  @unpack alpha, indicator_threaded = indicator_max.cache
-  resize!(alpha, nelements(dg, cache))
+    @unpack alpha, indicator_threaded = indicator_max.cache
+    resize!(alpha, nelements(dg, cache))
 
-  @threaded for element in eachelement(dg, cache)
-    indicator = indicator_threaded[Threads.threadid()]
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
 
-    # Calculate indicator variables at Gauss-Lobatto nodes
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      u_local = get_node_vars(u, equations, dg, i, j, k, element)
-      indicator[i, j, k] = indicator_max.variable(u_local, equations)
-    end
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, j, k, element)
+            indicator[i, j, k] = indicator_max.variable(u_local, equations)
+        end
 
-    alpha[element] = maximum(indicator)
-  end
+        alpha[element] = maximum(indicator)
+    end
 
-  return alpha
+    return alpha
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_unstructured/containers_2d.jl b/src/solvers/dgsem_unstructured/containers_2d.jl
index f1fda031ee9..13eeaeabffb 100644
--- a/src/solvers/dgsem_unstructured/containers_2d.jl
+++ b/src/solvers/dgsem_unstructured/containers_2d.jl
@@ -3,42 +3,44 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Container data structure (structure-of-arrays style) for DG elements on curved unstructured mesh
-struct UnstructuredElementContainer2D{RealT<:Real, uEltype<:Real}
-  node_coordinates     ::Array{RealT, 4}   # [ndims, nnodes, nnodes, nelement]
-  jacobian_matrix      ::Array{RealT, 5}   # [ndims, ndims, nnodes, nnodes, nelement]
-  inverse_jacobian     ::Array{RealT, 3}   # [nnodes, nnodes, nelement]
-  contravariant_vectors::Array{RealT, 5}   # [ndims, ndims, nnodes, nnodes, nelement]
-  normal_directions    ::Array{RealT, 4}   # [ndims, nnodes, local sides, nelement]
-  surface_flux_values  ::Array{uEltype, 4} # [variables, nnodes, local sides, elements]
+struct UnstructuredElementContainer2D{RealT <: Real, uEltype <: Real}
+    node_coordinates::Array{RealT, 4}   # [ndims, nnodes, nnodes, nelement]
+    jacobian_matrix::Array{RealT, 5}   # [ndims, ndims, nnodes, nnodes, nelement]
+    inverse_jacobian::Array{RealT, 3}   # [nnodes, nnodes, nelement]
+    contravariant_vectors::Array{RealT, 5}   # [ndims, ndims, nnodes, nnodes, nelement]
+    normal_directions::Array{RealT, 4}   # [ndims, nnodes, local sides, nelement]
+    surface_flux_values::Array{uEltype, 4} # [variables, nnodes, local sides, elements]
 end
 
-
 # construct an empty curved element container to be filled later with geometries in the
 # unstructured mesh constructor
-function UnstructuredElementContainer2D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real}
-  nan_RealT = convert(RealT, NaN)
-  nan_uEltype = convert(uEltype, NaN)
-
-  node_coordinates      = fill(nan_RealT, (2, n_nodes, n_nodes, capacity))
-  jacobian_matrix       = fill(nan_RealT, (2, 2, n_nodes, n_nodes, capacity))
-  inverse_jacobian      = fill(nan_RealT, (n_nodes, n_nodes, capacity))
-  contravariant_vectors = fill(nan_RealT, (2, 2, n_nodes, n_nodes, capacity))
-  normal_directions     = fill(nan_RealT, (2, n_nodes, 4, capacity))
-  surface_flux_values   = fill(nan_uEltype, (n_variables, n_nodes, 4, capacity))
-
-  return UnstructuredElementContainer2D{RealT, uEltype}(node_coordinates,
-                                                        jacobian_matrix,
-                                                        inverse_jacobian,
-                                                        contravariant_vectors,
-                                                        normal_directions,
-                                                        surface_flux_values)
+function UnstructuredElementContainer2D{RealT, uEltype}(capacity::Integer, n_variables,
+                                                        n_nodes) where {RealT <: Real,
+                                                                        uEltype <: Real}
+    nan_RealT = convert(RealT, NaN)
+    nan_uEltype = convert(uEltype, NaN)
+
+    node_coordinates = fill(nan_RealT, (2, n_nodes, n_nodes, capacity))
+    jacobian_matrix = fill(nan_RealT, (2, 2, n_nodes, n_nodes, capacity))
+    inverse_jacobian = fill(nan_RealT, (n_nodes, n_nodes, capacity))
+    contravariant_vectors = fill(nan_RealT, (2, 2, n_nodes, n_nodes, capacity))
+    normal_directions = fill(nan_RealT, (2, n_nodes, 4, capacity))
+    surface_flux_values = fill(nan_uEltype, (n_variables, n_nodes, 4, capacity))
+
+    return UnstructuredElementContainer2D{RealT, uEltype}(node_coordinates,
+                                                          jacobian_matrix,
+                                                          inverse_jacobian,
+                                                          contravariant_vectors,
+                                                          normal_directions,
+                                                          surface_flux_values)
 end
 
-
-@inline nelements(elements::UnstructuredElementContainer2D) = size(elements.surface_flux_values, 4)
+@inline function nelements(elements::UnstructuredElementContainer2D)
+    size(elements.surface_flux_values, 4)
+end
 """
     eachelement(elements::UnstructuredElementContainer2D)
 
@@ -46,280 +48,292 @@ Return an iterator over the indices that specify the location in relevant data s
 for the elements in `elements`. 
 In particular, not the elements themselves are returned.
 """
-@inline eachelement(elements::UnstructuredElementContainer2D) = Base.OneTo(nelements(elements))
+@inline function eachelement(elements::UnstructuredElementContainer2D)
+    Base.OneTo(nelements(elements))
+end
 
-@inline nvariables(elements::UnstructuredElementContainer2D) = size(elements.surface_flux_values, 1)
-@inline nnodes(elements::UnstructuredElementContainer2D) = size(elements.surface_flux_values, 2)
+@inline function nvariables(elements::UnstructuredElementContainer2D)
+    size(elements.surface_flux_values, 1)
+end
+@inline function nnodes(elements::UnstructuredElementContainer2D)
+    size(elements.surface_flux_values, 2)
+end
 
 Base.real(elements::UnstructuredElementContainer2D) = eltype(elements.node_coordinates)
-Base.eltype(elements::UnstructuredElementContainer2D) = eltype(elements.surface_flux_values)
-
+function Base.eltype(elements::UnstructuredElementContainer2D)
+    eltype(elements.surface_flux_values)
+end
 
 @inline function get_surface_normal(vec, indices...)
-  # way to extract the normal vector at the surfaces without allocating
-  surface_vector = SVector(ntuple(j -> vec[j, indices...], 2))
-  return surface_vector
+    # way to extract the normal vector at the surfaces without allocating
+    surface_vector = SVector(ntuple(j -> vec[j, indices...], 2))
+    return surface_vector
 end
 
 function init_elements(mesh::UnstructuredMesh2D, equations, basis, RealT, uEltype)
-  elements = UnstructuredElementContainer2D{RealT, uEltype}(
-    mesh.n_elements, nvariables(equations), nnodes(basis))
-  init_elements!(elements, mesh, basis)
-  return elements
+    elements = UnstructuredElementContainer2D{RealT, uEltype}(mesh.n_elements,
+                                                              nvariables(equations),
+                                                              nnodes(basis))
+    init_elements!(elements, mesh, basis)
+    return elements
 end
 
-
 function init_elements!(elements::UnstructuredElementContainer2D, mesh, basis)
-  four_corners = zeros(eltype(mesh.corners), 4, 2)
-
-  # loop through elements and call the correct constructor based on whether the element is curved
-  for element in eachelement(elements)
-    if mesh.element_is_curved[element]
-      init_element!(elements, element, basis.nodes, view(mesh.surface_curves, :, element))
-    else # straight sided element
-      for i in 1:4, j in 1:2
-        # pull the (x,y) values of these corners out of the global corners array
-        four_corners[i, j] = mesh.corners[j, mesh.element_node_ids[i, element]]
-      end
-      init_element!(elements, element, basis.nodes, four_corners)
+    four_corners = zeros(eltype(mesh.corners), 4, 2)
+
+    # loop through elements and call the correct constructor based on whether the element is curved
+    for element in eachelement(elements)
+        if mesh.element_is_curved[element]
+            init_element!(elements, element, basis.nodes,
+                          view(mesh.surface_curves, :, element))
+        else # straight sided element
+            for i in 1:4, j in 1:2
+                # pull the (x,y) values of these corners out of the global corners array
+                four_corners[i, j] = mesh.corners[j, mesh.element_node_ids[i, element]]
+            end
+            init_element!(elements, element, basis.nodes, four_corners)
+        end
     end
-  end
 end
 
-
 # initialize all the values in the container of a general element (either straight sided or curved)
 function init_element!(elements, element, nodes, corners_or_surface_curves)
+    calc_node_coordinates!(elements.node_coordinates, element, nodes,
+                           corners_or_surface_curves)
 
-  calc_node_coordinates!(elements.node_coordinates, element, nodes, corners_or_surface_curves)
+    calc_metric_terms!(elements.jacobian_matrix, element, nodes,
+                       corners_or_surface_curves)
 
-  calc_metric_terms!(elements.jacobian_matrix, element, nodes, corners_or_surface_curves)
+    calc_inverse_jacobian!(elements.inverse_jacobian, element, elements.jacobian_matrix)
 
-  calc_inverse_jacobian!(elements.inverse_jacobian, element, elements.jacobian_matrix)
+    calc_contravariant_vectors!(elements.contravariant_vectors, element,
+                                elements.jacobian_matrix)
 
-  calc_contravariant_vectors!(elements.contravariant_vectors, element, elements.jacobian_matrix)
+    calc_normal_directions!(elements.normal_directions, element, nodes,
+                            corners_or_surface_curves)
 
-  calc_normal_directions!(elements.normal_directions, element, nodes, corners_or_surface_curves)
-
-  return elements
+    return elements
 end
 
-
 # generic container for the interior interfaces of an unstructured mesh
-struct UnstructuredInterfaceContainer2D{uEltype<:Real}
-  u                ::Array{uEltype, 4} # [primary/secondary, variables, i, interfaces]
-  start_index      ::Vector{Int}       # [interfaces]
-  index_increment  ::Vector{Int}       # [interfaces]
-  element_ids      ::Array{Int, 2}     # [primary/secondary, interfaces]
-  element_side_ids ::Array{Int, 2}     # [primary/secondary, interfaces]
+struct UnstructuredInterfaceContainer2D{uEltype <: Real}
+    u::Array{uEltype, 4} # [primary/secondary, variables, i, interfaces]
+    start_index::Vector{Int}       # [interfaces]
+    index_increment::Vector{Int}       # [interfaces]
+    element_ids::Array{Int, 2}     # [primary/secondary, interfaces]
+    element_side_ids::Array{Int, 2}     # [primary/secondary, interfaces]
 end
 
-
 # Construct an empty curved interface container to be filled later with neighbour
 # information in the unstructured mesh constructor
-function UnstructuredInterfaceContainer2D{uEltype}(capacity::Integer, n_variables, n_nodes) where {uEltype<:Real}
-
-  nan_uEltype = convert(uEltype, NaN)
-
-  u                = fill(nan_uEltype, (2, n_variables, n_nodes, capacity))
-  start_index      = fill(typemin(Int), capacity)
-  index_increment  = fill(typemin(Int), capacity)
-  element_ids      = fill(typemin(Int), (2, capacity))
-  element_side_ids = fill(typemin(Int), (2, capacity))
-
-  return UnstructuredInterfaceContainer2D{uEltype}(
-    u, start_index, index_increment, element_ids, element_side_ids)
+function UnstructuredInterfaceContainer2D{uEltype}(capacity::Integer, n_variables,
+                                                   n_nodes) where {uEltype <: Real}
+    nan_uEltype = convert(uEltype, NaN)
+
+    u = fill(nan_uEltype, (2, n_variables, n_nodes, capacity))
+    start_index = fill(typemin(Int), capacity)
+    index_increment = fill(typemin(Int), capacity)
+    element_ids = fill(typemin(Int), (2, capacity))
+    element_side_ids = fill(typemin(Int), (2, capacity))
+
+    return UnstructuredInterfaceContainer2D{uEltype}(u, start_index, index_increment,
+                                                     element_ids, element_side_ids)
 end
 
-
-@inline ninterfaces(interfaces::UnstructuredInterfaceContainer2D) = length(interfaces.start_index)
+@inline function ninterfaces(interfaces::UnstructuredInterfaceContainer2D)
+    length(interfaces.start_index)
+end
 @inline nnodes(interfaces::UnstructuredInterfaceContainer2D) = size(interfaces.u, 3)
 
+function init_interfaces(mesh::UnstructuredMesh2D,
+                         elements::UnstructuredElementContainer2D)
+    interfaces = UnstructuredInterfaceContainer2D{eltype(elements)}(mesh.n_interfaces,
+                                                                    nvariables(elements),
+                                                                    nnodes(elements))
 
-function init_interfaces(mesh::UnstructuredMesh2D, elements::UnstructuredElementContainer2D)
-
-  interfaces = UnstructuredInterfaceContainer2D{eltype(elements)}(
-    mesh.n_interfaces, nvariables(elements), nnodes(elements))
-
-  # extract and save the appropriate neighbour information from the mesh skeleton
-  if isperiodic(mesh)
-    init_interfaces!(interfaces, mesh.neighbour_information, mesh.boundary_names,
-                     mesh.n_elements, True())
-  else
-    init_interfaces!(interfaces, mesh.neighbour_information, mesh.boundary_names,
-                     mesh.n_elements, False())
-  end
+    # extract and save the appropriate neighbour information from the mesh skeleton
+    if isperiodic(mesh)
+        init_interfaces!(interfaces, mesh.neighbour_information, mesh.boundary_names,
+                         mesh.n_elements, True())
+    else
+        init_interfaces!(interfaces, mesh.neighbour_information, mesh.boundary_names,
+                         mesh.n_elements, False())
+    end
 
-  return interfaces
+    return interfaces
 end
 
-
 function init_interfaces!(interfaces, edge_information, boundary_names, n_elements,
                           periodic::False)
-
-  n_nodes = nnodes(interfaces)
-  n_surfaces = size(edge_information, 2)
-  intr_count = 1
-  for j in 1:n_surfaces
-    if edge_information[4,j] > 0
-      # get the primary/secondary element information and coupling for an interior interface
-      interfaces.element_ids[1,intr_count]      = edge_information[3,j]      # primary element id
-      interfaces.element_ids[2,intr_count]      = edge_information[4,j]      # secondary element id
-      interfaces.element_side_ids[1,intr_count] = edge_information[5,j]      # primary side id
-      interfaces.element_side_ids[2,intr_count] = abs(edge_information[6,j]) # secondary side id
-      # default the start and increment indexing
-      interfaces.start_index[intr_count] = 1
-      interfaces.index_increment[intr_count] = 1
-      if edge_information[6,j] < 0
-        # coordinate system in the secondary element is "flipped" compared to the primary element.
-        # Adjust the start and increment indexes such that the secondary element coordinate system
-        # can match the primary neighbour when surface coupling is computed
-        interfaces.start_index[intr_count] = n_nodes
-        interfaces.index_increment[intr_count] = -1
-      end
-      intr_count += 1
+    n_nodes = nnodes(interfaces)
+    n_surfaces = size(edge_information, 2)
+    intr_count = 1
+    for j in 1:n_surfaces
+        if edge_information[4, j] > 0
+            # get the primary/secondary element information and coupling for an interior interface
+            interfaces.element_ids[1, intr_count] = edge_information[3, j]      # primary element id
+            interfaces.element_ids[2, intr_count] = edge_information[4, j]      # secondary element id
+            interfaces.element_side_ids[1, intr_count] = edge_information[5, j]      # primary side id
+            interfaces.element_side_ids[2, intr_count] = abs(edge_information[6, j]) # secondary side id
+            # default the start and increment indexing
+            interfaces.start_index[intr_count] = 1
+            interfaces.index_increment[intr_count] = 1
+            if edge_information[6, j] < 0
+                # coordinate system in the secondary element is "flipped" compared to the primary element.
+                # Adjust the start and increment indexes such that the secondary element coordinate system
+                # can match the primary neighbour when surface coupling is computed
+                interfaces.start_index[intr_count] = n_nodes
+                interfaces.index_increment[intr_count] = -1
+            end
+            intr_count += 1
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function init_interfaces!(interfaces, edge_information, boundary_names, n_elements,
                           periodic::True)
-
-  n_nodes = nnodes(interfaces)
-  n_surfaces = size(edge_information, 2)
-  # for now this set a fully periodic domain
-  #   TODO: possibly adjust to be able to set periodic in only the x or y direction
-  for j in 1:n_surfaces
-    if edge_information[4,j] > 0
-      # get the primary/secondary element information and coupling for an interior interface
-      interfaces.element_ids[1,j]      = edge_information[3,j]      # primary element id
-      interfaces.element_ids[2,j]      = edge_information[4,j]      # secondary element id
-      interfaces.element_side_ids[1,j] = edge_information[5,j]      # primary side id
-      interfaces.element_side_ids[2,j] = abs(edge_information[6,j]) # secondary side id
-      # default the start and increment indexing
-      interfaces.start_index[j] = 1
-      interfaces.index_increment[j] = 1
-      if edge_information[6,j] < 0
-        # coordinate system in the secondary element is "flipped" compared to the primary element.
-        # Adjust the start and increment indexes such that the secondary element coordinate system
-        # can match the primary neighbour when surface coupling is computed
-        interfaces.start_index[j] = n_nodes
-        interfaces.index_increment[j] = -1
-      end
-    else
-      # way to set periodic BCs where we are assuming to have a structured mesh with internal curves
-      primary_side = edge_information[5,j]
-      primary_element = edge_information[3,j]
-      # Note: This is a way to get the neighbour element number and local side from a square
-      #       structured mesh where the element local surface numbering is right-handed
-      if boundary_names[primary_side, primary_element] === :Bottom
-        secondary_element = primary_element + (n_elements - convert(Int, sqrt(n_elements)))
-        secondary_side    = 3
-      elseif boundary_names[primary_side, primary_element] === :Top
-        secondary_element = primary_element - (n_elements - convert(Int, sqrt(n_elements)))
-        secondary_side    = 1
-      elseif boundary_names[primary_side, primary_element] === :Left
-        secondary_element = primary_element + (convert(Int, sqrt(n_elements)) - 1)
-        secondary_side    = 2
-      elseif boundary_names[primary_side, primary_element] === :Right
-        secondary_element = primary_element - (convert(Int, sqrt(n_elements)) - 1)
-        secondary_side    = 4
-      end
-      interfaces.element_ids[1,j]      = primary_element
-      interfaces.element_ids[2,j]      = secondary_element
-      interfaces.element_side_ids[1,j] = primary_side
-      interfaces.element_side_ids[2,j] = secondary_side
-      # set the start and increment indexing
-      #  Note! We assume that the periodic mesh has no flipped element coordinate systems
-      interfaces.start_index[j] = 1
-      interfaces.index_increment[j] = 1
+    n_nodes = nnodes(interfaces)
+    n_surfaces = size(edge_information, 2)
+    # for now this set a fully periodic domain
+    #   TODO: possibly adjust to be able to set periodic in only the x or y direction
+    for j in 1:n_surfaces
+        if edge_information[4, j] > 0
+            # get the primary/secondary element information and coupling for an interior interface
+            interfaces.element_ids[1, j] = edge_information[3, j]      # primary element id
+            interfaces.element_ids[2, j] = edge_information[4, j]      # secondary element id
+            interfaces.element_side_ids[1, j] = edge_information[5, j]      # primary side id
+            interfaces.element_side_ids[2, j] = abs(edge_information[6, j]) # secondary side id
+            # default the start and increment indexing
+            interfaces.start_index[j] = 1
+            interfaces.index_increment[j] = 1
+            if edge_information[6, j] < 0
+                # coordinate system in the secondary element is "flipped" compared to the primary element.
+                # Adjust the start and increment indexes such that the secondary element coordinate system
+                # can match the primary neighbour when surface coupling is computed
+                interfaces.start_index[j] = n_nodes
+                interfaces.index_increment[j] = -1
+            end
+        else
+            # way to set periodic BCs where we are assuming to have a structured mesh with internal curves
+            primary_side = edge_information[5, j]
+            primary_element = edge_information[3, j]
+            # Note: This is a way to get the neighbour element number and local side from a square
+            #       structured mesh where the element local surface numbering is right-handed
+            if boundary_names[primary_side, primary_element] === :Bottom
+                secondary_element = primary_element +
+                                    (n_elements - convert(Int, sqrt(n_elements)))
+                secondary_side = 3
+            elseif boundary_names[primary_side, primary_element] === :Top
+                secondary_element = primary_element -
+                                    (n_elements - convert(Int, sqrt(n_elements)))
+                secondary_side = 1
+            elseif boundary_names[primary_side, primary_element] === :Left
+                secondary_element = primary_element +
+                                    (convert(Int, sqrt(n_elements)) - 1)
+                secondary_side = 2
+            elseif boundary_names[primary_side, primary_element] === :Right
+                secondary_element = primary_element -
+                                    (convert(Int, sqrt(n_elements)) - 1)
+                secondary_side = 4
+            end
+            interfaces.element_ids[1, j] = primary_element
+            interfaces.element_ids[2, j] = secondary_element
+            interfaces.element_side_ids[1, j] = primary_side
+            interfaces.element_side_ids[2, j] = secondary_side
+            # set the start and increment indexing
+            #  Note! We assume that the periodic mesh has no flipped element coordinate systems
+            interfaces.start_index[j] = 1
+            interfaces.index_increment[j] = 1
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # TODO: Clean-up meshes. Find a better name since it's also used for other meshes
 # generic container for the boundary interfaces of an unstructured mesh
-struct UnstructuredBoundaryContainer2D{RealT<:Real, uEltype<:Real}
-  u               ::Array{uEltype, 3} # [variables, i, boundaries]
-  element_id      ::Vector{Int}       # [boundaries]
-  element_side_id ::Vector{Int}       # [boundaries]
-  node_coordinates::Array{RealT, 3}   # [ndims, nnodes, boundaries]
-  name            ::Vector{Symbol}    # [boundaries]
+struct UnstructuredBoundaryContainer2D{RealT <: Real, uEltype <: Real}
+    u::Array{uEltype, 3} # [variables, i, boundaries]
+    element_id::Vector{Int}       # [boundaries]
+    element_side_id::Vector{Int}       # [boundaries]
+    node_coordinates::Array{RealT, 3}   # [ndims, nnodes, boundaries]
+    name::Vector{Symbol}    # [boundaries]
 end
 
-
 # construct an empty curved boundary container to be filled later with neighbour
 # information in the unstructured mesh constructor
-function UnstructuredBoundaryContainer2D{RealT, uEltype}(capacity::Integer, n_variables, n_nodes) where {RealT<:Real, uEltype<:Real}
-
-  nan_RealT = convert(RealT, NaN)
-  nan_uEltype = convert(uEltype, NaN)
-
-  u                = fill(nan_uEltype, (n_variables, n_nodes, capacity))
-  element_id       = fill(typemin(Int), capacity)
-  element_side_id  = fill(typemin(Int), capacity)
-  node_coordinates = fill(nan_RealT, (2, n_nodes, capacity))
-  name             = fill(:empty, capacity)
-
-  return UnstructuredBoundaryContainer2D{RealT, uEltype}(
-    u, element_id, element_side_id, node_coordinates, name)
+function UnstructuredBoundaryContainer2D{RealT, uEltype}(capacity::Integer, n_variables,
+                                                         n_nodes) where {RealT <: Real,
+                                                                         uEltype <:
+                                                                         Real}
+    nan_RealT = convert(RealT, NaN)
+    nan_uEltype = convert(uEltype, NaN)
+
+    u = fill(nan_uEltype, (n_variables, n_nodes, capacity))
+    element_id = fill(typemin(Int), capacity)
+    element_side_id = fill(typemin(Int), capacity)
+    node_coordinates = fill(nan_RealT, (2, n_nodes, capacity))
+    name = fill(:empty, capacity)
+
+    return UnstructuredBoundaryContainer2D{RealT, uEltype}(u, element_id,
+                                                           element_side_id,
+                                                           node_coordinates, name)
 end
 
+@inline function nboundaries(boundaries::UnstructuredBoundaryContainer2D)
+    length(boundaries.name)
+end
 
-@inline nboundaries(boundaries::UnstructuredBoundaryContainer2D) = length(boundaries.name)
-
-
-function init_boundaries(mesh::UnstructuredMesh2D, elements::UnstructuredElementContainer2D)
-
-  boundaries = UnstructuredBoundaryContainer2D{real(elements), eltype(elements)}(
-    mesh.n_boundaries, nvariables(elements), nnodes(elements))
+function init_boundaries(mesh::UnstructuredMesh2D,
+                         elements::UnstructuredElementContainer2D)
+    boundaries = UnstructuredBoundaryContainer2D{real(elements), eltype(elements)}(mesh.n_boundaries,
+                                                                                   nvariables(elements),
+                                                                                   nnodes(elements))
 
-  # extract and save the appropriate boundary information provided any physical boundaries exist
-  if mesh.n_boundaries > 0
-    init_boundaries!(boundaries, mesh.neighbour_information, mesh.boundary_names, elements)
-  end
-  return boundaries
+    # extract and save the appropriate boundary information provided any physical boundaries exist
+    if mesh.n_boundaries > 0
+        init_boundaries!(boundaries, mesh.neighbour_information, mesh.boundary_names,
+                         elements)
+    end
+    return boundaries
 end
 
-
 function init_boundaries!(boundaries::UnstructuredBoundaryContainer2D, edge_information,
                           boundary_names, elements)
-
-  n_surfaces = size(edge_information,2)
-  bndy_count = 1
-  for j in 1:n_surfaces
-    if edge_information[4,j] == 0
-      # get the primary element information at a boundary interface
-      primary_element = edge_information[3,j]
-      primary_side    = edge_information[5,j]
-      boundaries.element_id[bndy_count]      = primary_element
-      boundaries.element_side_id[bndy_count] = primary_side
-
-      # extract the physical boundary's name from the global list
-      boundaries.name[bndy_count] = boundary_names[primary_side, primary_element]
-
-      # Store copy of the (x,y) node coordinates on the physical boundary
-      enc = elements.node_coordinates
-      if primary_side == 1
-        boundaries.node_coordinates[:, :, bndy_count] .= enc[:, :,    1, primary_element]
-      elseif primary_side == 2
-        boundaries.node_coordinates[:, :, bndy_count] .= enc[:, end,  :, primary_element]
-      elseif primary_side == 3
-        boundaries.node_coordinates[:, :, bndy_count] .= enc[:, :,  end, primary_element]
-      else # primary_side == 4
-        boundaries.node_coordinates[:, :, bndy_count] .= enc[:, 1,    :, primary_element]
-      end
-      bndy_count += 1
+    n_surfaces = size(edge_information, 2)
+    bndy_count = 1
+    for j in 1:n_surfaces
+        if edge_information[4, j] == 0
+            # get the primary element information at a boundary interface
+            primary_element = edge_information[3, j]
+            primary_side = edge_information[5, j]
+            boundaries.element_id[bndy_count] = primary_element
+            boundaries.element_side_id[bndy_count] = primary_side
+
+            # extract the physical boundary's name from the global list
+            boundaries.name[bndy_count] = boundary_names[primary_side, primary_element]
+
+            # Store copy of the (x,y) node coordinates on the physical boundary
+            enc = elements.node_coordinates
+            if primary_side == 1
+                boundaries.node_coordinates[:, :, bndy_count] .= enc[:, :, 1,
+                                                                     primary_element]
+            elseif primary_side == 2
+                boundaries.node_coordinates[:, :, bndy_count] .= enc[:, end, :,
+                                                                     primary_element]
+            elseif primary_side == 3
+                boundaries.node_coordinates[:, :, bndy_count] .= enc[:, :, end,
+                                                                     primary_element]
+            else # primary_side == 4
+                boundaries.node_coordinates[:, :, bndy_count] .= enc[:, 1, :,
+                                                                     primary_element]
+            end
+            bndy_count += 1
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_unstructured/dg.jl b/src/solvers/dgsem_unstructured/dg.jl
index 36926e6463a..3543f1a5829 100644
--- a/src/solvers/dgsem_unstructured/dg.jl
+++ b/src/solvers/dgsem_unstructured/dg.jl
@@ -3,19 +3,19 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
-
-@inline function get_one_sided_surface_node_vars(u, equations, solver::DG, j, indices...)
-  # There is a cut-off at `n == 10` inside of the method
-  # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17
-  # in Julia `v1.5`, leading to type instabilities if
-  # more than ten variables are used. That's why we use
-  # `Val(...)` below.
-  u_surface = SVector(ntuple(v -> u[j, v, indices...], Val(nvariables(equations))))
-  return u_surface
+@inline function get_one_sided_surface_node_vars(u, equations, solver::DG, j,
+                                                 indices...)
+    # There is a cut-off at `n == 10` inside of the method
+    # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17
+    # in Julia `v1.5`, leading to type instabilities if
+    # more than ten variables are used. That's why we use
+    # `Val(...)` below.
+    u_surface = SVector(ntuple(v -> u[j, v, indices...], Val(nvariables(equations))))
+    return u_surface
 end
 
-
 # 2D unstructured DG implementation
 include("mappings_geometry_curved_2d.jl")
 include("mappings_geometry_straight_2d.jl")
@@ -23,5 +23,4 @@ include("containers_2d.jl")
 include("sort_boundary_conditions.jl")
 include("dg_2d.jl")
 include("indicators_2d.jl")
-
 end # @muladd
diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl
index 283f8bdc74e..95dec027a82 100644
--- a/src/solvers/dgsem_unstructured/dg_2d.jl
+++ b/src/solvers/dgsem_unstructured/dg_2d.jl
@@ -3,82 +3,90 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # This method is called when a SemidiscretizationHyperbolic is constructed.
 # It constructs the basic `cache` used throughout the simulation to compute
 # the RHS etc.
 function create_cache(mesh::UnstructuredMesh2D, equations,
                       dg::DG, RealT, uEltype)
+    elements = init_elements(mesh, equations, dg.basis, RealT, uEltype)
 
-  elements = init_elements(mesh, equations, dg.basis, RealT, uEltype)
-
-  interfaces = init_interfaces(mesh, elements)
+    interfaces = init_interfaces(mesh, elements)
 
-  boundaries = init_boundaries(mesh, elements)
+    boundaries = init_boundaries(mesh, elements)
 
-  cache = (; elements, interfaces, boundaries)
+    cache = (; elements, interfaces, boundaries)
 
-  # perform a check on the sufficient metric identities condition for free-stream preservation
-  # and halt computation if it fails
-  if !isapprox(max_discrete_metric_identities(dg, cache), 0, atol=1e-12)
-    error("metric terms fail free-stream preservation check with maximum error $(max_discrete_metric_identities(dg, cache))")
-  end
+    # perform a check on the sufficient metric identities condition for free-stream preservation
+    # and halt computation if it fails
+    if !isapprox(max_discrete_metric_identities(dg, cache), 0, atol = 1e-12)
+        error("metric terms fail free-stream preservation check with maximum error $(max_discrete_metric_identities(dg, cache))")
+    end
 
-  # Add specialized parts of the cache required to compute the flux differencing volume integral
-  cache = (;cache..., create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
+    # Add specialized parts of the cache required to compute the flux differencing volume integral
+    cache = (; cache...,
+             create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
 
-  return cache
+    return cache
 end
 
-
 function rhs!(du, u, t,
               mesh::UnstructuredMesh2D, equations,
               initial_condition, boundary_conditions, source_terms::Source,
               dg::DG, cache) where {Source}
-  # Reset du
-  @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
-
-  # Calculate volume integral
-  @trixi_timeit timer() "volume integral" calc_volume_integral!(
-    du, u, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.volume_integral, dg, cache)
-
-  # Prolong solution to interfaces
-  @trixi_timeit timer() "prolong2interfaces" prolong2interfaces!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate interface fluxes
-  @trixi_timeit timer() "interface flux" calc_interface_flux!(
-    cache.elements.surface_flux_values, mesh,
-    have_nonconservative_terms(equations), equations,
-    dg.surface_integral, dg, cache)
-
-  # Prolong solution to boundaries
-  @trixi_timeit timer() "prolong2boundaries" prolong2boundaries!(
-    cache, u, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate boundary fluxes
-  @trixi_timeit timer() "boundary flux" calc_boundary_flux!(
-    cache, t, boundary_conditions, mesh, equations, dg.surface_integral, dg)
-
-  # Calculate surface integrals
-  @trixi_timeit timer() "surface integral" calc_surface_integral!(
-    du, u, mesh, equations, dg.surface_integral, dg, cache)
-
-  # Apply Jacobian from mapping to reference element
-  #  Note! this routine is reused from dg_curved/dg_2d.jl
-  @trixi_timeit timer() "Jacobian" apply_jacobian!(
-    du, mesh, equations, dg, cache)
-
-  # Calculate source terms
-  @trixi_timeit timer() "source terms" calc_sources!(
-    du, u, t, source_terms, equations, dg, cache)
-
-  return nothing
-end
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, u, mesh,
+                              have_nonconservative_terms(equations), equations,
+                              dg.volume_integral, dg, cache)
+    end
 
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache, u, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache.elements.surface_flux_values, mesh,
+                             have_nonconservative_terms(equations), equations,
+                             dg.surface_integral, dg, cache)
+    end
+
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache, u, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux!(cache, t, boundary_conditions, mesh, equations,
+                            dg.surface_integral, dg)
+    end
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations,
+                               dg.surface_integral, dg, cache)
+    end
+
+    # Apply Jacobian from mapping to reference element
+    #  Note! this routine is reused from dg_curved/dg_2d.jl
+    @trixi_timeit timer() "Jacobian" apply_jacobian!(du, mesh, equations, dg, cache)
+
+    # Calculate source terms
+    @trixi_timeit timer() "source terms" begin
+        calc_sources!(du, u, t, source_terms, equations, dg, cache)
+    end
+
+    return nothing
+end
 
 # prolong the solution into the convenience array in the interior interface container
 # We pass the `surface_integral` argument solely for dispatch
@@ -86,107 +94,110 @@ end
 function prolong2interfaces!(cache, u,
                              mesh::UnstructuredMesh2D,
                              equations, surface_integral, dg::DG)
-  @unpack interfaces = cache
-
-  @threaded for interface in eachinterface(dg, cache)
-    primary_element   = interfaces.element_ids[1, interface]
-    secondary_element = interfaces.element_ids[2, interface]
-
-    primary_side   = interfaces.element_side_ids[1, interface]
-    secondary_side = interfaces.element_side_ids[2, interface]
-
-    if primary_side == 1
-      for i in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[1, v, i, interface] = u[v, i, 1, primary_element]
-      end
-    elseif primary_side == 2
-      for i in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[1, v, i, interface] = u[v, nnodes(dg), i, primary_element]
-      end
-    elseif primary_side == 3
-      for i in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg), primary_element]
-      end
-    else # primary_side == 4
-      for i in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[1, v, i, interface] = u[v, 1, i, primary_element]
-      end
-    end
-
-    if secondary_side == 1
-      for i in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[2, v, i, interface] = u[v, i, 1, secondary_element]
-      end
-    elseif secondary_side == 2
-      for i in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[2, v, i, interface] = u[v, nnodes(dg), i, secondary_element]
-      end
-    elseif secondary_side == 3
-      for i in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[2, v, i, interface] = u[v, i, nnodes(dg), secondary_element]
-      end
-    else # secondary_side == 4
-      for i in eachnode(dg), v in eachvariable(equations)
-        interfaces.u[2, v, i, interface] = u[v, 1, i, secondary_element]
-      end
+    @unpack interfaces = cache
+
+    @threaded for interface in eachinterface(dg, cache)
+        primary_element = interfaces.element_ids[1, interface]
+        secondary_element = interfaces.element_ids[2, interface]
+
+        primary_side = interfaces.element_side_ids[1, interface]
+        secondary_side = interfaces.element_side_ids[2, interface]
+
+        if primary_side == 1
+            for i in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[1, v, i, interface] = u[v, i, 1, primary_element]
+            end
+        elseif primary_side == 2
+            for i in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[1, v, i, interface] = u[v, nnodes(dg), i, primary_element]
+            end
+        elseif primary_side == 3
+            for i in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg), primary_element]
+            end
+        else # primary_side == 4
+            for i in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[1, v, i, interface] = u[v, 1, i, primary_element]
+            end
+        end
+
+        if secondary_side == 1
+            for i in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[2, v, i, interface] = u[v, i, 1, secondary_element]
+            end
+        elseif secondary_side == 2
+            for i in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[2, v, i, interface] = u[v, nnodes(dg), i,
+                                                     secondary_element]
+            end
+        elseif secondary_side == 3
+            for i in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[2, v, i, interface] = u[v, i, nnodes(dg),
+                                                     secondary_element]
+            end
+        else # secondary_side == 4
+            for i in eachnode(dg), v in eachvariable(equations)
+                interfaces.u[2, v, i, interface] = u[v, 1, i, secondary_element]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # compute the numerical flux interface coupling between two elements on an unstructured
 # quadrilateral mesh
 function calc_interface_flux!(surface_flux_values,
                               mesh::UnstructuredMesh2D,
                               nonconservative_terms::False, equations,
                               surface_integral, dg::DG, cache)
-  @unpack surface_flux = surface_integral
-  @unpack u, start_index, index_increment, element_ids, element_side_ids = cache.interfaces
-  @unpack normal_directions = cache.elements
-
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Get neighboring elements
-    primary_element   = element_ids[1, interface]
-    secondary_element = element_ids[2, interface]
-
-    # Get the local side id on which to compute the flux
-    primary_side   = element_side_ids[1, interface]
-    secondary_side = element_side_ids[2, interface]
-
-    # initial index for the coordinate system on the secondary element
-    secondary_index = start_index[interface]
-
-    # loop through the primary element coordinate system and compute the interface coupling
-    for primary_index in eachnode(dg)
-      # pull the primary and secondary states from the boundary u values
-      u_ll = get_one_sided_surface_node_vars(u, equations, dg, 1, primary_index, interface)
-      u_rr = get_one_sided_surface_node_vars(u, equations, dg, 2, secondary_index, interface)
-
-      # pull the outward pointing (normal) directional vector
-      #   Note! this assumes a conforming approximation, more must be done in terms of the normals
-      #         for hanging nodes and other non-conforming approximation spaces
-      outward_direction = get_surface_normal(normal_directions, primary_index, primary_side,
-                                             primary_element)
-
-      # Call pointwise numerical flux with rotation. Direction is normalized inside this function
-      flux = surface_flux(u_ll, u_rr, outward_direction, equations)
-
-      # Copy flux back to primary/secondary element storage
-      # Note the sign change for the normal flux in the secondary element!
-      for v in eachvariable(equations)
-        surface_flux_values[v, primary_index  , primary_side  , primary_element  ] =  flux[v]
-        surface_flux_values[v, secondary_index, secondary_side, secondary_element] = -flux[v]
-      end
-
-      # increment the index of the coordinate system in the secondary element
-      secondary_index += index_increment[interface]
+    @unpack surface_flux = surface_integral
+    @unpack u, start_index, index_increment, element_ids, element_side_ids = cache.interfaces
+    @unpack normal_directions = cache.elements
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Get neighboring elements
+        primary_element = element_ids[1, interface]
+        secondary_element = element_ids[2, interface]
+
+        # Get the local side id on which to compute the flux
+        primary_side = element_side_ids[1, interface]
+        secondary_side = element_side_ids[2, interface]
+
+        # initial index for the coordinate system on the secondary element
+        secondary_index = start_index[interface]
+
+        # loop through the primary element coordinate system and compute the interface coupling
+        for primary_index in eachnode(dg)
+            # pull the primary and secondary states from the boundary u values
+            u_ll = get_one_sided_surface_node_vars(u, equations, dg, 1, primary_index,
+                                                   interface)
+            u_rr = get_one_sided_surface_node_vars(u, equations, dg, 2, secondary_index,
+                                                   interface)
+
+            # pull the outward pointing (normal) directional vector
+            #   Note! this assumes a conforming approximation, more must be done in terms of the normals
+            #         for hanging nodes and other non-conforming approximation spaces
+            outward_direction = get_surface_normal(normal_directions, primary_index,
+                                                   primary_side,
+                                                   primary_element)
+
+            # Call pointwise numerical flux with rotation. Direction is normalized inside this function
+            flux = surface_flux(u_ll, u_rr, outward_direction, equations)
+
+            # Copy flux back to primary/secondary element storage
+            # Note the sign change for the normal flux in the secondary element!
+            for v in eachvariable(equations)
+                surface_flux_values[v, primary_index, primary_side, primary_element] = flux[v]
+                surface_flux_values[v, secondary_index, secondary_side, secondary_element] = -flux[v]
+            end
+
+            # increment the index of the coordinate system in the secondary element
+            secondary_index += index_increment[interface]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
 # compute the numerical flux interface with nonconservative terms coupling between two elements
@@ -195,204 +206,207 @@ function calc_interface_flux!(surface_flux_values,
                               mesh::UnstructuredMesh2D,
                               nonconservative_terms::True, equations,
                               surface_integral, dg::DG, cache)
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-  @unpack u, start_index, index_increment, element_ids, element_side_ids = cache.interfaces
-  @unpack normal_directions = cache.elements
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Get the primary element index and local side index
-    primary_element = element_ids[1, interface]
-    primary_side = element_side_ids[1, interface]
-
-    # Get neighboring element, local side index, and index increment on the
-    # secondary element
-    secondary_element = element_ids[2, interface]
-    secondary_side = element_side_ids[2, interface]
-    secondary_index_increment = index_increment[interface]
-
-    secondary_index = start_index[interface]
-    for primary_index in eachnode(dg)
-      # pull the primary and secondary states from the boundary u values
-      u_ll = get_one_sided_surface_node_vars(u, equations, dg, 1, primary_index, interface)
-      u_rr = get_one_sided_surface_node_vars(u, equations, dg, 2, secondary_index, interface)
-
-      # pull the outward pointing (normal) directional vector
-      # Note! This assumes a conforming approximation, more must be done in terms
-      # of the normals for hanging nodes and other non-conforming approximation spaces
-      outward_direction = get_surface_normal(normal_directions, primary_index, primary_side,
-                                             primary_element)
-
-      # Calculate the conservative portion of the numerical flux
-      # Call pointwise numerical flux with rotation. Direction is normalized
-      # inside this function
-      flux = surface_flux(u_ll, u_rr, outward_direction, equations)
-
-      # Compute both nonconservative fluxes
-      # In general, nonconservative fluxes can depend on both the contravariant
-      # vectors (normal direction) at the current node and the averaged ones.
-      # However, both are the same at watertight interfaces, so we pass the
-      # `outward_direction` twice.
-      noncons_primary   = nonconservative_flux(u_ll, u_rr, outward_direction, outward_direction, equations)
-      noncons_secondary = nonconservative_flux(u_rr, u_ll, outward_direction, outward_direction, equations)
-
-      # Copy flux to primary and secondary element storage
-      # Note the sign change for the components in the secondary element!
-      for v in eachvariable(equations)
-        # Note the factor 0.5 necessary for the nonconservative fluxes based on
-        # the interpretation of global SBP operators coupled discontinuously via
-        # central fluxes/SATs
-        surface_flux_values[v, primary_index, primary_side, primary_element] = (
-          flux[v] + 0.5 * noncons_primary[v])
-        surface_flux_values[v, secondary_index, secondary_side, secondary_element] = -(
-          flux[v] + 0.5 * noncons_secondary[v])
-      end
-
-      # increment the index of the coordinate system in the secondary element
-      secondary_index += secondary_index_increment
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack u, start_index, index_increment, element_ids, element_side_ids = cache.interfaces
+    @unpack normal_directions = cache.elements
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Get the primary element index and local side index
+        primary_element = element_ids[1, interface]
+        primary_side = element_side_ids[1, interface]
+
+        # Get neighboring element, local side index, and index increment on the
+        # secondary element
+        secondary_element = element_ids[2, interface]
+        secondary_side = element_side_ids[2, interface]
+        secondary_index_increment = index_increment[interface]
+
+        secondary_index = start_index[interface]
+        for primary_index in eachnode(dg)
+            # pull the primary and secondary states from the boundary u values
+            u_ll = get_one_sided_surface_node_vars(u, equations, dg, 1, primary_index,
+                                                   interface)
+            u_rr = get_one_sided_surface_node_vars(u, equations, dg, 2, secondary_index,
+                                                   interface)
+
+            # pull the outward pointing (normal) directional vector
+            # Note! This assumes a conforming approximation, more must be done in terms
+            # of the normals for hanging nodes and other non-conforming approximation spaces
+            outward_direction = get_surface_normal(normal_directions, primary_index,
+                                                   primary_side,
+                                                   primary_element)
+
+            # Calculate the conservative portion of the numerical flux
+            # Call pointwise numerical flux with rotation. Direction is normalized
+            # inside this function
+            flux = surface_flux(u_ll, u_rr, outward_direction, equations)
+
+            # Compute both nonconservative fluxes
+            # In general, nonconservative fluxes can depend on both the contravariant
+            # vectors (normal direction) at the current node and the averaged ones.
+            # However, both are the same at watertight interfaces, so we pass the
+            # `outward_direction` twice.
+            noncons_primary = nonconservative_flux(u_ll, u_rr, outward_direction,
+                                                   outward_direction, equations)
+            noncons_secondary = nonconservative_flux(u_rr, u_ll, outward_direction,
+                                                     outward_direction, equations)
+
+            # Copy flux to primary and secondary element storage
+            # Note the sign change for the components in the secondary element!
+            for v in eachvariable(equations)
+                # Note the factor 0.5 necessary for the nonconservative fluxes based on
+                # the interpretation of global SBP operators coupled discontinuously via
+                # central fluxes/SATs
+                surface_flux_values[v, primary_index, primary_side, primary_element] = (flux[v] +
+                                                                                        0.5 *
+                                                                                        noncons_primary[v])
+                surface_flux_values[v, secondary_index, secondary_side, secondary_element] = -(flux[v] +
+                                                                                               0.5 *
+                                                                                               noncons_secondary[v])
+            end
+
+            # increment the index of the coordinate system in the secondary element
+            secondary_index += secondary_index_increment
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # move the approximate solution onto physical boundaries within a "right-handed" element
 function prolong2boundaries!(cache, u,
                              mesh::UnstructuredMesh2D,
                              equations, surface_integral, dg::DG)
-  @unpack boundaries = cache
-
-  @threaded for boundary in eachboundary(dg, cache)
-    element = boundaries.element_id[boundary]
-    side    = boundaries.element_side_id[boundary]
-
-    if side == 1
-      for l in eachnode(dg), v in eachvariable(equations)
-        boundaries.u[v, l, boundary] = u[v, l, 1, element]
-      end
-    elseif side == 2
-      for l in eachnode(dg), v in eachvariable(equations)
-        boundaries.u[v, l, boundary] = u[v, nnodes(dg), l, element]
-      end
-    elseif side == 3
-      for l in eachnode(dg), v in eachvariable(equations)
-        boundaries.u[v, l, boundary] = u[v, l, nnodes(dg), element]
-      end
-    else # side == 4
-      for l in eachnode(dg), v in eachvariable(equations)
-        boundaries.u[v, l, boundary] = u[v, 1, l, element]
-      end
+    @unpack boundaries = cache
+
+    @threaded for boundary in eachboundary(dg, cache)
+        element = boundaries.element_id[boundary]
+        side = boundaries.element_side_id[boundary]
+
+        if side == 1
+            for l in eachnode(dg), v in eachvariable(equations)
+                boundaries.u[v, l, boundary] = u[v, l, 1, element]
+            end
+        elseif side == 2
+            for l in eachnode(dg), v in eachvariable(equations)
+                boundaries.u[v, l, boundary] = u[v, nnodes(dg), l, element]
+            end
+        elseif side == 3
+            for l in eachnode(dg), v in eachvariable(equations)
+                boundaries.u[v, l, boundary] = u[v, l, nnodes(dg), element]
+            end
+        else # side == 4
+            for l in eachnode(dg), v in eachvariable(equations)
+                boundaries.u[v, l, boundary] = u[v, 1, l, element]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # TODO: Taal dimension agnostic
 function calc_boundary_flux!(cache, t, boundary_condition::BoundaryConditionPeriodic,
                              mesh::Union{UnstructuredMesh2D, P4estMesh},
                              equations, surface_integral, dg::DG)
-  @assert isempty(eachboundary(dg, cache))
+    @assert isempty(eachboundary(dg, cache))
 end
 
-
 # Function barrier for type stability
 function calc_boundary_flux!(cache, t, boundary_conditions,
                              mesh::Union{UnstructuredMesh2D, P4estMesh},
                              equations, surface_integral, dg::DG)
-  @unpack boundary_condition_types, boundary_indices = boundary_conditions
+    @unpack boundary_condition_types, boundary_indices = boundary_conditions
 
-  calc_boundary_flux_by_type!(cache, t, boundary_condition_types, boundary_indices,
-                              mesh, equations, surface_integral, dg)
-  return nothing
+    calc_boundary_flux_by_type!(cache, t, boundary_condition_types, boundary_indices,
+                                mesh, equations, surface_integral, dg)
+    return nothing
 end
 
-
 # Iterate over tuples of boundary condition types and associated indices
 # in a type-stable way using "lispy tuple programming".
-function calc_boundary_flux_by_type!(cache, t, BCs::NTuple{N,Any},
-                                     BC_indices::NTuple{N,Vector{Int}},
+function calc_boundary_flux_by_type!(cache, t, BCs::NTuple{N, Any},
+                                     BC_indices::NTuple{N, Vector{Int}},
                                      mesh::Union{UnstructuredMesh2D, P4estMesh},
                                      equations, surface_integral, dg::DG) where {N}
-  # Extract the boundary condition type and index vector
-  boundary_condition = first(BCs)
-  boundary_condition_indices = first(BC_indices)
-  # Extract the remaining types and indices to be processed later
-  remaining_boundary_conditions = Base.tail(BCs)
-  remaining_boundary_condition_indices = Base.tail(BC_indices)
-
-  # process the first boundary condition type
-  calc_boundary_flux!(cache, t, boundary_condition, boundary_condition_indices,
-                      mesh, equations, surface_integral, dg)
-
-  # recursively call this method with the unprocessed boundary types
-  calc_boundary_flux_by_type!(cache, t, remaining_boundary_conditions,
-                              remaining_boundary_condition_indices,
-                              mesh, equations, surface_integral, dg)
-
-  return nothing
+    # Extract the boundary condition type and index vector
+    boundary_condition = first(BCs)
+    boundary_condition_indices = first(BC_indices)
+    # Extract the remaining types and indices to be processed later
+    remaining_boundary_conditions = Base.tail(BCs)
+    remaining_boundary_condition_indices = Base.tail(BC_indices)
+
+    # process the first boundary condition type
+    calc_boundary_flux!(cache, t, boundary_condition, boundary_condition_indices,
+                        mesh, equations, surface_integral, dg)
+
+    # recursively call this method with the unprocessed boundary types
+    calc_boundary_flux_by_type!(cache, t, remaining_boundary_conditions,
+                                remaining_boundary_condition_indices,
+                                mesh, equations, surface_integral, dg)
+
+    return nothing
 end
 
 # terminate the type-stable iteration over tuples
 function calc_boundary_flux_by_type!(cache, t, BCs::Tuple{}, BC_indices::Tuple{},
                                      mesh::Union{UnstructuredMesh2D, P4estMesh},
                                      equations, surface_integral, dg::DG)
-  nothing
+    nothing
 end
 
-
 function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing,
                              mesh::UnstructuredMesh2D, equations,
                              surface_integral, dg::DG)
-  @unpack surface_flux_values = cache.elements
-  @unpack element_id, element_side_id = cache.boundaries
-
-  @threaded for local_index in eachindex(boundary_indexing)
-    # use the local index to get the global boundary index from the pre-sorted list
-    boundary = boundary_indexing[local_index]
-
-    # get the element and side IDs on the boundary element
-    element = element_id[boundary]
-    side    = element_side_id[boundary]
-
-    # calc boundary flux on the current boundary interface
-    for node in eachnode(dg)
-      calc_boundary_flux!(surface_flux_values, t, boundary_condition,
-                          mesh, have_nonconservative_terms(equations),
-                          equations, surface_integral, dg, cache,
-                          node, side, element, boundary)
+    @unpack surface_flux_values = cache.elements
+    @unpack element_id, element_side_id = cache.boundaries
+
+    @threaded for local_index in eachindex(boundary_indexing)
+        # use the local index to get the global boundary index from the pre-sorted list
+        boundary = boundary_indexing[local_index]
+
+        # get the element and side IDs on the boundary element
+        element = element_id[boundary]
+        side = element_side_id[boundary]
+
+        # calc boundary flux on the current boundary interface
+        for node in eachnode(dg)
+            calc_boundary_flux!(surface_flux_values, t, boundary_condition,
+                                mesh, have_nonconservative_terms(equations),
+                                equations, surface_integral, dg, cache,
+                                node, side, element, boundary)
+        end
     end
-  end
 end
 
-
 # inlined version of the boundary flux calculation along a physical interface where the
 # boundary flux values are set according to a particular `boundary_condition` function
 @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition,
                                      mesh::UnstructuredMesh2D,
                                      nonconservative_terms::False, equations,
                                      surface_integral, dg::DG, cache,
-                                     node_index, side_index, element_index, boundary_index)
-  @unpack normal_directions = cache.elements
-  @unpack u, node_coordinates = cache.boundaries
-  @unpack surface_flux = surface_integral
+                                     node_index, side_index, element_index,
+                                     boundary_index)
+    @unpack normal_directions = cache.elements
+    @unpack u, node_coordinates = cache.boundaries
+    @unpack surface_flux = surface_integral
 
-  # pull the inner solution state from the boundary u values on the boundary element
-  u_inner = get_node_vars(u, equations, dg, node_index, boundary_index)
+    # pull the inner solution state from the boundary u values on the boundary element
+    u_inner = get_node_vars(u, equations, dg, node_index, boundary_index)
 
-  # pull the outward pointing (normal) directional vector
-  outward_direction = get_surface_normal(normal_directions, node_index, side_index, element_index)
+    # pull the outward pointing (normal) directional vector
+    outward_direction = get_surface_normal(normal_directions, node_index, side_index,
+                                           element_index)
 
-  # get the external solution values from the prescribed external state
-  x = get_node_coords(node_coordinates, equations, dg, node_index, boundary_index)
+    # get the external solution values from the prescribed external state
+    x = get_node_coords(node_coordinates, equations, dg, node_index, boundary_index)
 
-  # Call pointwise numerical flux function in the normal direction on the boundary
-  flux = boundary_condition(u_inner, outward_direction, x, t, surface_flux, equations)
+    # Call pointwise numerical flux function in the normal direction on the boundary
+    flux = boundary_condition(u_inner, outward_direction, x, t, surface_flux, equations)
 
-  for v in eachvariable(equations)
-    surface_flux_values[v, node_index, side_index, element_index] = flux[v]
-  end
+    for v in eachvariable(equations)
+        surface_flux_values[v, node_index, side_index, element_index] = flux[v]
+    end
 end
 
 # inlined version of the boundary flux and nonconseravtive terms calculation along a
@@ -405,41 +419,45 @@ end
                                      mesh::UnstructuredMesh2D,
                                      nonconservative_terms::True, equations,
                                      surface_integral, dg::DG, cache,
-                                     node_index, side_index, element_index, boundary_index)
-  surface_flux, nonconservative_flux = surface_integral.surface_flux
-  @unpack normal_directions = cache.elements
-  @unpack u, node_coordinates = cache.boundaries
-
-  # pull the inner solution state from the boundary u values on the boundary element
-  u_inner = get_node_vars(u, equations, dg, node_index, boundary_index)
-
-  # pull the outward pointing (normal) directional vector
-  outward_direction = get_surface_normal(normal_directions, node_index, side_index, element_index)
-
-  # get the external solution values from the prescribed external state
-  x = get_node_coords(node_coordinates, equations, dg, node_index, boundary_index)
-
-  # Call pointwise numerical flux function for the conservative part
-  # in the normal direction on the boundary
-  flux = boundary_condition(u_inner, outward_direction, x, t, surface_flux, equations)
-
-  # Compute pointwise nonconservative numerical flux at the boundary.
-  # In general, nonconservative fluxes can depend on both the contravariant
-  # vectors (normal direction) at the current node and the averaged ones.
-  # However, both are the same at watertight interfaces, so we pass the
-  # `outward_direction` twice.
-  # Note: This does not set any type of boundary condition for the nonconservative term
-  noncons_flux = nonconservative_flux(u_inner, u_inner, outward_direction, outward_direction, equations)
-
-  for v in eachvariable(equations)
-    # Note the factor 0.5 necessary for the nonconservative fluxes based on
-    # the interpretation of global SBP operators coupled discontinuously via
-    # central fluxes/SATs
-    surface_flux_values[v, node_index, side_index, element_index] = flux[v] + 0.5 * noncons_flux[v]
-  end
+                                     node_index, side_index, element_index,
+                                     boundary_index)
+    surface_flux, nonconservative_flux = surface_integral.surface_flux
+    @unpack normal_directions = cache.elements
+    @unpack u, node_coordinates = cache.boundaries
+
+    # pull the inner solution state from the boundary u values on the boundary element
+    u_inner = get_node_vars(u, equations, dg, node_index, boundary_index)
+
+    # pull the outward pointing (normal) directional vector
+    outward_direction = get_surface_normal(normal_directions, node_index, side_index,
+                                           element_index)
+
+    # get the external solution values from the prescribed external state
+    x = get_node_coords(node_coordinates, equations, dg, node_index, boundary_index)
+
+    # Call pointwise numerical flux function for the conservative part
+    # in the normal direction on the boundary
+    flux = boundary_condition(u_inner, outward_direction, x, t, surface_flux, equations)
+
+    # Compute pointwise nonconservative numerical flux at the boundary.
+    # In general, nonconservative fluxes can depend on both the contravariant
+    # vectors (normal direction) at the current node and the averaged ones.
+    # However, both are the same at watertight interfaces, so we pass the
+    # `outward_direction` twice.
+    # Note: This does not set any type of boundary condition for the nonconservative term
+    noncons_flux = nonconservative_flux(u_inner, u_inner, outward_direction,
+                                        outward_direction, equations)
+
+    for v in eachvariable(equations)
+        # Note the factor 0.5 necessary for the nonconservative fluxes based on
+        # the interpretation of global SBP operators coupled discontinuously via
+        # central fluxes/SATs
+        surface_flux_values[v, node_index, side_index, element_index] = flux[v] +
+                                                                        0.5 *
+                                                                        noncons_flux[v]
+    end
 end
 
-
 # Note! The local side numbering for the unstructured quadrilateral element implementation differs
 #       from the structured TreeMesh or StructuredMesh local side numbering:
 #
@@ -456,55 +474,58 @@ end
 # Therefore, we require a different surface integral routine here despite their similar structure.
 function calc_surface_integral!(du, u, mesh::UnstructuredMesh2D,
                                 equations, surface_integral, dg::DGSEM, cache)
-  @unpack boundary_interpolation = dg.basis
-  @unpack surface_flux_values = cache.elements
-
-  @threaded for element in eachelement(dg, cache)
-    for l in eachnode(dg), v in eachvariable(equations)
-      # surface contribution along local sides 2 and 4 (fixed x and y varies)
-      du[v, 1,          l, element] += ( surface_flux_values[v, l, 4, element]
-                                          * boundary_interpolation[1, 1] )
-      du[v, nnodes(dg), l, element] += ( surface_flux_values[v, l, 2, element]
-                                          * boundary_interpolation[nnodes(dg), 2] )
-      # surface contribution along local sides 1 and 3 (fixed y and x varies)
-      du[v, l, 1,          element] += ( surface_flux_values[v, l, 1, element]
-                                          * boundary_interpolation[1, 1] )
-      du[v, l, nnodes(dg), element] += ( surface_flux_values[v, l, 3, element]
-                                          * boundary_interpolation[nnodes(dg), 2] )
+    @unpack boundary_interpolation = dg.basis
+    @unpack surface_flux_values = cache.elements
+
+    @threaded for element in eachelement(dg, cache)
+        for l in eachnode(dg), v in eachvariable(equations)
+            # surface contribution along local sides 2 and 4 (fixed x and y varies)
+            du[v, 1, l, element] += (surface_flux_values[v, l, 4, element]
+                                     *
+                                     boundary_interpolation[1, 1])
+            du[v, nnodes(dg), l, element] += (surface_flux_values[v, l, 2, element]
+                                              *
+                                              boundary_interpolation[nnodes(dg), 2])
+            # surface contribution along local sides 1 and 3 (fixed y and x varies)
+            du[v, l, 1, element] += (surface_flux_values[v, l, 1, element]
+                                     *
+                                     boundary_interpolation[1, 1])
+            du[v, l, nnodes(dg), element] += (surface_flux_values[v, l, 3, element]
+                                              *
+                                              boundary_interpolation[nnodes(dg), 2])
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # This routine computes the maximum value of the discrete metric identities necessary to ensure
 # that the approxmiation will be free-stream preserving (i.e. a constant solution remains constant)
 # on a curvilinear mesh.
 #   Note! Independent of the equation system and is only a check on the discrete mapping terms.
 #         Can be used for a metric identities check on StructuredMesh{2} or UnstructuredMesh2D
 function max_discrete_metric_identities(dg::DGSEM, cache)
-  @unpack derivative_matrix = dg.basis
-  @unpack contravariant_vectors = cache.elements
+    @unpack derivative_matrix = dg.basis
+    @unpack contravariant_vectors = cache.elements
 
-  ndims_ = size(contravariant_vectors, 1)
+    ndims_ = size(contravariant_vectors, 1)
 
-  metric_id_dx = zeros(eltype(contravariant_vectors), nnodes(dg), nnodes(dg))
-  metric_id_dy = zeros(eltype(contravariant_vectors), nnodes(dg), nnodes(dg))
+    metric_id_dx = zeros(eltype(contravariant_vectors), nnodes(dg), nnodes(dg))
+    metric_id_dy = zeros(eltype(contravariant_vectors), nnodes(dg), nnodes(dg))
 
-  max_metric_ids = zero(eltype(contravariant_vectors))
+    max_metric_ids = zero(eltype(contravariant_vectors))
 
-  for i in 1:ndims_, element in eachelement(dg, cache)
-    # compute D*Ja_1^i + Ja_2^i*D^T
-    @views mul!(metric_id_dx, derivative_matrix, contravariant_vectors[i, 1, :, :, element])
-    @views mul!(metric_id_dy, contravariant_vectors[i, 2, :, :, element], derivative_matrix')
-    local_max_metric_ids = maximum( abs.(metric_id_dx + metric_id_dy) )
+    for i in 1:ndims_, element in eachelement(dg, cache)
+        # compute D*Ja_1^i + Ja_2^i*D^T
+        @views mul!(metric_id_dx, derivative_matrix,
+                    contravariant_vectors[i, 1, :, :, element])
+        @views mul!(metric_id_dy, contravariant_vectors[i, 2, :, :, element],
+                    derivative_matrix')
+        local_max_metric_ids = maximum(abs.(metric_id_dx + metric_id_dy))
 
-    max_metric_ids = max(max_metric_ids, local_max_metric_ids)
-  end
+        max_metric_ids = max(max_metric_ids, local_max_metric_ids)
+    end
 
-  return max_metric_ids
+    return max_metric_ids
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_unstructured/indicators_2d.jl b/src/solvers/dgsem_unstructured/indicators_2d.jl
index fe841e78687..8052534ad48 100644
--- a/src/solvers/dgsem_unstructured/indicators_2d.jl
+++ b/src/solvers/dgsem_unstructured/indicators_2d.jl
@@ -3,22 +3,22 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 function apply_smoothing!(mesh::UnstructuredMesh2D, alpha, alpha_tmp, dg, cache)
-  # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
-  # Copy alpha values such that smoothing is indpedenent of the element access order
-  alpha_tmp .= alpha
-  
-  # Loop over interfaces
-  for interface in eachinterface(dg, cache)
-    # Get neighboring element ids
-    left  = cache.interfaces.element_ids[1, interface]
-    right = cache.interfaces.element_ids[2, interface]
+    # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
+    # Copy alpha values such that smoothing is indpedenent of the element access order
+    alpha_tmp .= alpha
 
-    # Apply smoothing
-    alpha[left]  = max(alpha_tmp[left],  0.5 * alpha_tmp[right], alpha[left])
-    alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left],  alpha[right])
-  end
-end
+    # Loop over interfaces
+    for interface in eachinterface(dg, cache)
+        # Get neighboring element ids
+        left = cache.interfaces.element_ids[1, interface]
+        right = cache.interfaces.element_ids[2, interface]
 
+        # Apply smoothing
+        alpha[left] = max(alpha_tmp[left], 0.5 * alpha_tmp[right], alpha[left])
+        alpha[right] = max(alpha_tmp[right], 0.5 * alpha_tmp[left], alpha[right])
+    end
+end
 end # @muladd
diff --git a/src/solvers/dgsem_unstructured/mappings_geometry_curved_2d.jl b/src/solvers/dgsem_unstructured/mappings_geometry_curved_2d.jl
index 8ad018bd08a..75b9a1f4da2 100644
--- a/src/solvers/dgsem_unstructured/mappings_geometry_curved_2d.jl
+++ b/src/solvers/dgsem_unstructured/mappings_geometry_curved_2d.jl
@@ -3,148 +3,161 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # transfinite mapping formula from a point (xi, eta) in reference space [-1,1]^2 to a point
 # (x,y) in physical coordinate space for a quadrilateral element with general curved sides
 #     Alg. 98 from the blue book of Kopriva
 function transfinite_quad_map(xi, eta, surface_curves::AbstractVector{<:CurvedSurface})
 
-  # evaluate the gamma curves to get the four corner points of the element
-  x_corner1, y_corner1 = evaluate_at(-1.0, surface_curves[1])
-  x_corner2, y_corner2 = evaluate_at( 1.0, surface_curves[1])
-  x_corner3, y_corner3 = evaluate_at( 1.0, surface_curves[3])
-  x_corner4, y_corner4 = evaluate_at(-1.0, surface_curves[3])
-
-  # evaluate along the gamma curves at a particular point (ξ, η) in computational space to get
-  # the value (x,y) in physical space
-  x1, y1 = evaluate_at(xi , surface_curves[1])
-  x2, y2 = evaluate_at(eta, surface_curves[2])
-  x3, y3 = evaluate_at(xi , surface_curves[3])
-  x4, y4 = evaluate_at(eta, surface_curves[4])
-
-  x = ( 0.5 * (  (1.0 - xi)  * x4 + (1.0 + xi)  * x2 + (1.0 - eta) * x1 + (1.0 + eta) * x3 )
-       - 0.25 * (  (1.0 - xi) * ( (1.0 - eta) * x_corner1 + (1.0 + eta) * x_corner4 )
-                 + (1.0 + xi) * ( (1.0 - eta) * x_corner2 + (1.0 + eta) * x_corner3 ) ) )
-
-  y = ( 0.5 * (  (1.0 - xi)  * y4 + (1.0 + xi)  * y2 + (1.0 - eta) * y1 + (1.0 + eta) * y3 )
-       - 0.25 * (  (1.0 - xi) * ( (1.0 - eta) * y_corner1 + (1.0 + eta) * y_corner4 )
-                 + (1.0 + xi) * ( (1.0 - eta) * y_corner2 + (1.0 + eta) * y_corner3 ) ) )
-
-  return x, y
+    # evaluate the gamma curves to get the four corner points of the element
+    x_corner1, y_corner1 = evaluate_at(-1.0, surface_curves[1])
+    x_corner2, y_corner2 = evaluate_at(1.0, surface_curves[1])
+    x_corner3, y_corner3 = evaluate_at(1.0, surface_curves[3])
+    x_corner4, y_corner4 = evaluate_at(-1.0, surface_curves[3])
+
+    # evaluate along the gamma curves at a particular point (ξ, η) in computational space to get
+    # the value (x,y) in physical space
+    x1, y1 = evaluate_at(xi, surface_curves[1])
+    x2, y2 = evaluate_at(eta, surface_curves[2])
+    x3, y3 = evaluate_at(xi, surface_curves[3])
+    x4, y4 = evaluate_at(eta, surface_curves[4])
+
+    x = (0.5 *
+         ((1.0 - xi) * x4 + (1.0 + xi) * x2 + (1.0 - eta) * x1 + (1.0 + eta) * x3)
+         -
+         0.25 * ((1.0 - xi) * ((1.0 - eta) * x_corner1 + (1.0 + eta) * x_corner4) +
+          (1.0 + xi) * ((1.0 - eta) * x_corner2 + (1.0 + eta) * x_corner3)))
+
+    y = (0.5 *
+         ((1.0 - xi) * y4 + (1.0 + xi) * y2 + (1.0 - eta) * y1 + (1.0 + eta) * y3)
+         -
+         0.25 * ((1.0 - xi) * ((1.0 - eta) * y_corner1 + (1.0 + eta) * y_corner4) +
+          (1.0 + xi) * ((1.0 - eta) * y_corner2 + (1.0 + eta) * y_corner3)))
+
+    return x, y
 end
 
-
 # Compute the metric terms for the general curved sided quadrilateral transfitie mapping
 #     Alg. 99 from the blue book of Kopriva
-function transfinite_quad_map_metrics(xi, eta, surface_curves::AbstractVector{<:CurvedSurface})
-
-  # evaluate the gamma curves to get the four corner points of the element
-  x_corner1, y_corner1 = evaluate_at(-1.0, surface_curves[1])
-  x_corner2, y_corner2 = evaluate_at( 1.0, surface_curves[1])
-  x_corner3, y_corner3 = evaluate_at( 1.0, surface_curves[3])
-  x_corner4, y_corner4 = evaluate_at(-1.0, surface_curves[3])
-
-  # evaluate along the gamma curves at a particular point (ξ, η) in computational space to get
-  # the value (x,y) in physical space
-  x1, y1 = evaluate_at(xi , surface_curves[1])
-  x2, y2 = evaluate_at(eta, surface_curves[2])
-  x3, y3 = evaluate_at(xi , surface_curves[3])
-  x4, y4 = evaluate_at(eta, surface_curves[4])
-
-  # evaluate along the derivative of the gamma curves at a particular point (ξ, η) in
-  # computational space to get the value (x_prime,y_prime) in physical space
-  x1_prime, y1_prime = derivative_at(xi , surface_curves[1])
-  x2_prime, y2_prime = derivative_at(eta, surface_curves[2])
-  x3_prime, y3_prime = derivative_at(xi , surface_curves[3])
-  x4_prime, y4_prime = derivative_at(eta, surface_curves[4])
-
-  X_xi  = ( 0.5 * (x2 - x4 + (1.0 - eta) * x1_prime + (1.0 + eta) * x3_prime)
-          -0.25 * ((1.0 - eta) * (x_corner2 - x_corner1) + (1.0 + eta) * (x_corner3 - x_corner4)) )
-
-  X_eta = ( 0.5  * ((1.0 - xi) * x4_prime + (1.0 + xi) * x2_prime + x3 - x1)
-           -0.25 * ((1.0 - xi) * (x_corner4 - x_corner1) + (1.0 + xi) * (x_corner3 - x_corner2)) )
-
-  Y_xi = ( 0.5  * (y2 - y4 + (1.0 - eta) * y1_prime + (1.0 + eta) * y3_prime)
-          -0.25 * ((1.0 - eta) * (y_corner2 - y_corner1) + (1.0 + eta) * (y_corner3 - y_corner4)) )
-
-  Y_eta = ( 0.5  * ((1.0 - xi) * y4_prime + (1.0 + xi) * y2_prime + y3 - y1)
-           -0.25 * ((1.0 - xi) * (y_corner4 - y_corner1) + (1.0 + xi) * (y_corner3 - y_corner2)) )
-
-  return X_xi, X_eta, Y_xi, Y_eta
+function transfinite_quad_map_metrics(xi, eta,
+                                      surface_curves::AbstractVector{<:CurvedSurface})
+
+    # evaluate the gamma curves to get the four corner points of the element
+    x_corner1, y_corner1 = evaluate_at(-1.0, surface_curves[1])
+    x_corner2, y_corner2 = evaluate_at(1.0, surface_curves[1])
+    x_corner3, y_corner3 = evaluate_at(1.0, surface_curves[3])
+    x_corner4, y_corner4 = evaluate_at(-1.0, surface_curves[3])
+
+    # evaluate along the gamma curves at a particular point (ξ, η) in computational space to get
+    # the value (x,y) in physical space
+    x1, y1 = evaluate_at(xi, surface_curves[1])
+    x2, y2 = evaluate_at(eta, surface_curves[2])
+    x3, y3 = evaluate_at(xi, surface_curves[3])
+    x4, y4 = evaluate_at(eta, surface_curves[4])
+
+    # evaluate along the derivative of the gamma curves at a particular point (ξ, η) in
+    # computational space to get the value (x_prime,y_prime) in physical space
+    x1_prime, y1_prime = derivative_at(xi, surface_curves[1])
+    x2_prime, y2_prime = derivative_at(eta, surface_curves[2])
+    x3_prime, y3_prime = derivative_at(xi, surface_curves[3])
+    x4_prime, y4_prime = derivative_at(eta, surface_curves[4])
+
+    X_xi = (0.5 * (x2 - x4 + (1.0 - eta) * x1_prime + (1.0 + eta) * x3_prime)
+            -
+            0.25 * ((1.0 - eta) * (x_corner2 - x_corner1) +
+             (1.0 + eta) * (x_corner3 - x_corner4)))
+
+    X_eta = (0.5 * ((1.0 - xi) * x4_prime + (1.0 + xi) * x2_prime + x3 - x1)
+             -
+             0.25 * ((1.0 - xi) * (x_corner4 - x_corner1) +
+              (1.0 + xi) * (x_corner3 - x_corner2)))
+
+    Y_xi = (0.5 * (y2 - y4 + (1.0 - eta) * y1_prime + (1.0 + eta) * y3_prime)
+            -
+            0.25 * ((1.0 - eta) * (y_corner2 - y_corner1) +
+             (1.0 + eta) * (y_corner3 - y_corner4)))
+
+    Y_eta = (0.5 * ((1.0 - xi) * y4_prime + (1.0 + xi) * y2_prime + y3 - y1)
+             -
+             0.25 * ((1.0 - xi) * (y_corner4 - y_corner1) +
+              (1.0 + xi) * (y_corner3 - y_corner2)))
+
+    return X_xi, X_eta, Y_xi, Y_eta
 end
 
-
 # construct the (x,y) node coordinates in the volume of a curved sided element
-function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, element, nodes,
+function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, element,
+                                nodes,
                                 surface_curves::AbstractVector{<:CurvedSurface})
+    for j in eachindex(nodes), i in eachindex(nodes)
+        node_coordinates[:, i, j, element] .= transfinite_quad_map(nodes[i], nodes[j],
+                                                                   surface_curves)
+    end
 
-  for j in eachindex(nodes), i in eachindex(nodes)
-    node_coordinates[:, i, j, element] .= transfinite_quad_map(nodes[i], nodes[j], surface_curves)
-  end
-
-  return node_coordinates
+    return node_coordinates
 end
 
-
 # construct the metric terms for a curved sided element
 function calc_metric_terms!(jacobian_matrix, element, nodes,
                             surface_curves::AbstractVector{<:CurvedSurface})
 
-  # storage format:
-  #   jacobian_matrix[1,1,:,:,:] <- X_xi
-  #   jacobian_matrix[1,2,:,:,:] <- X_eta
-  #   jacobian_matrix[2,1,:,:,:] <- Y_xi
-  #   jacobian_matrix[2,2,:,:,:] <- Y_eta
-  for j in eachindex(nodes), i in eachindex(nodes)
-    (jacobian_matrix[1, 1, i, j, element],
-     jacobian_matrix[1, 2, i, j, element],
-     jacobian_matrix[2, 1, i, j, element],
-     jacobian_matrix[2, 2, i, j, element]) = transfinite_quad_map_metrics(nodes[i], nodes[j],
-                                                                          surface_curves)
-  end
-
-  return jacobian_matrix
+    # storage format:
+    #   jacobian_matrix[1,1,:,:,:] <- X_xi
+    #   jacobian_matrix[1,2,:,:,:] <- X_eta
+    #   jacobian_matrix[2,1,:,:,:] <- Y_xi
+    #   jacobian_matrix[2,2,:,:,:] <- Y_eta
+    for j in eachindex(nodes), i in eachindex(nodes)
+        (jacobian_matrix[1, 1, i, j, element],
+        jacobian_matrix[1, 2, i, j, element],
+        jacobian_matrix[2, 1, i, j, element],
+        jacobian_matrix[2, 2, i, j, element]) = transfinite_quad_map_metrics(nodes[i],
+                                                                             nodes[j],
+                                                                             surface_curves)
+    end
+
+    return jacobian_matrix
 end
 
-
 # construct the normal direction vectors (but not actually normalized) for a curved sided element
 # normalization occurs on the fly during the surface flux computation
 function calc_normal_directions!(normal_directions, element, nodes,
                                  surface_curves::AbstractVector{<:CurvedSurface})
 
-  # normal directions on the boundary for the left (local side 4) and right (local side 2)
-  for j in eachindex(nodes)
-    # side 2
-    X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(1.0, nodes[j], surface_curves)
-    Jtemp = X_xi * Y_eta - X_eta * Y_xi
-    normal_directions[1, j, 2, element] = sign(Jtemp) * ( Y_eta )
-    normal_directions[2, j, 2, element] = sign(Jtemp) * (-X_eta )
-
-    # side 4
-    X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(-1.0, nodes[j], surface_curves)
-    Jtemp = X_xi * Y_eta - X_eta * Y_xi
-    normal_directions[1, j, 4, element] = -sign(Jtemp) * ( Y_eta )
-    normal_directions[2, j, 4, element] = -sign(Jtemp) * (-X_eta )
-  end
-
-  # normal directions on the boundary for the top (local side 3) and bottom (local side 1)
-  for i in eachindex(nodes)
-    # side 1
-    X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(nodes[i], -1.0, surface_curves)
-    Jtemp = X_xi * Y_eta - X_eta * Y_xi
-    normal_directions[1, i, 1, element] = -sign(Jtemp) * (-Y_xi )
-    normal_directions[2, i, 1, element] = -sign(Jtemp) * ( X_xi )
-
-    # side 3
-    X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(nodes[i], 1.0, surface_curves)
-    Jtemp = X_xi * Y_eta - X_eta * Y_xi
-    normal_directions[1, i, 3, element] = sign(Jtemp) * (-Y_xi )
-    normal_directions[2, i, 3, element] = sign(Jtemp) * ( X_xi )
-  end
-
-  return normal_directions
+    # normal directions on the boundary for the left (local side 4) and right (local side 2)
+    for j in eachindex(nodes)
+        # side 2
+        X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(1.0, nodes[j],
+                                                                surface_curves)
+        Jtemp = X_xi * Y_eta - X_eta * Y_xi
+        normal_directions[1, j, 2, element] = sign(Jtemp) * (Y_eta)
+        normal_directions[2, j, 2, element] = sign(Jtemp) * (-X_eta)
+
+        # side 4
+        X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(-1.0, nodes[j],
+                                                                surface_curves)
+        Jtemp = X_xi * Y_eta - X_eta * Y_xi
+        normal_directions[1, j, 4, element] = -sign(Jtemp) * (Y_eta)
+        normal_directions[2, j, 4, element] = -sign(Jtemp) * (-X_eta)
+    end
+
+    # normal directions on the boundary for the top (local side 3) and bottom (local side 1)
+    for i in eachindex(nodes)
+        # side 1
+        X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(nodes[i], -1.0,
+                                                                surface_curves)
+        Jtemp = X_xi * Y_eta - X_eta * Y_xi
+        normal_directions[1, i, 1, element] = -sign(Jtemp) * (-Y_xi)
+        normal_directions[2, i, 1, element] = -sign(Jtemp) * (X_xi)
+
+        # side 3
+        X_xi, X_eta, Y_xi, Y_eta = transfinite_quad_map_metrics(nodes[i], 1.0,
+                                                                surface_curves)
+        Jtemp = X_xi * Y_eta - X_eta * Y_xi
+        normal_directions[1, i, 3, element] = sign(Jtemp) * (-Y_xi)
+        normal_directions[2, i, 3, element] = sign(Jtemp) * (X_xi)
+    end
+
+    return normal_directions
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_unstructured/mappings_geometry_straight_2d.jl b/src/solvers/dgsem_unstructured/mappings_geometry_straight_2d.jl
index d4f9cf5d49d..7ceba93188d 100644
--- a/src/solvers/dgsem_unstructured/mappings_geometry_straight_2d.jl
+++ b/src/solvers/dgsem_unstructured/mappings_geometry_straight_2d.jl
@@ -3,114 +3,112 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # mapping formula from a point (xi, eta) in reference space [-1,1]^2 to a point (x,y)
 # in physical coordinate space for a quadrilateral element with straight sides
 #     Alg. 95 from the blue book of Kopriva
 function straight_side_quad_map(xi, eta, corner_points)
+    x = 0.25 * (corner_points[1, 1] * (1.0 - xi) * (1.0 - eta)
+         + corner_points[2, 1] * (1.0 + xi) * (1.0 - eta)
+         + corner_points[3, 1] * (1.0 + xi) * (1.0 + eta)
+         + corner_points[4, 1] * (1.0 - xi) * (1.0 + eta))
 
-  x = (0.25 * (  corner_points[1,1] * (1.0 - xi) * (1.0 - eta)
-               + corner_points[2,1] * (1.0 + xi) * (1.0 - eta)
-               + corner_points[3,1] * (1.0 + xi) * (1.0 + eta)
-               + corner_points[4,1] * (1.0 - xi) * (1.0 + eta)) )
-
-  y = (0.25 * (  corner_points[1,2] * (1.0 - xi) * (1.0 - eta)
-               + corner_points[2,2] * (1.0 + xi) * (1.0 - eta)
-               + corner_points[3,2] * (1.0 + xi) * (1.0 + eta)
-               + corner_points[4,2] * (1.0 - xi) * (1.0 + eta)) )
+    y = 0.25 * (corner_points[1, 2] * (1.0 - xi) * (1.0 - eta)
+         + corner_points[2, 2] * (1.0 + xi) * (1.0 - eta)
+         + corner_points[3, 2] * (1.0 + xi) * (1.0 + eta)
+         + corner_points[4, 2] * (1.0 - xi) * (1.0 + eta))
 
-  return x, y
+    return x, y
 end
 
-
 # Compute the metric terms for the straight sided quadrilateral mapping
 #     Alg. 100 from the blue book of Kopriva
 function straight_side_quad_map_metrics(xi, eta, corner_points)
+    X_xi = 0.25 * ((1.0 - eta) * (corner_points[2, 1] - corner_points[1, 1]) +
+            (1.0 + eta) * (corner_points[3, 1] - corner_points[4, 1]))
 
-  X_xi  = ( 0.25 * (  (1.0 - eta) * (corner_points[2,1] - corner_points[1,1])
-                    + (1.0 + eta) * (corner_points[3,1] - corner_points[4,1])) )
+    X_eta = 0.25 * ((1.0 - xi) * (corner_points[4, 1] - corner_points[1, 1]) +
+             (1.0 + xi) * (corner_points[3, 1] - corner_points[2, 1]))
 
-  X_eta = ( 0.25 * (  (1.0 - xi) * (corner_points[4,1] - corner_points[1,1])
-                    + (1.0 + xi) * (corner_points[3,1] - corner_points[2,1])) )
+    Y_xi = 0.25 * ((1.0 - eta) * (corner_points[2, 2] - corner_points[1, 2]) +
+            (1.0 + eta) * (corner_points[3, 2] - corner_points[4, 2]))
 
-  Y_xi  = ( 0.25 * (  (1.0 - eta) * (corner_points[2,2] - corner_points[1,2])
-                    + (1.0 + eta) * (corner_points[3,2] - corner_points[4,2])) )
+    Y_eta = 0.25 * ((1.0 - xi) * (corner_points[4, 2] - corner_points[1, 2]) +
+             (1.0 + xi) * (corner_points[3, 2] - corner_points[2, 2]))
 
-  Y_eta = ( 0.25 * (  (1.0 - xi) * (corner_points[4,2] - corner_points[1,2])
-                    + (1.0 + xi) * (corner_points[3,2] - corner_points[2,2])) )
-
-  return X_xi, X_eta, Y_xi, Y_eta
+    return X_xi, X_eta, Y_xi, Y_eta
 end
 
-
 # construct the (x,y) node coordinates in the volume of a straight sided element
-function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, element, nodes, corners)
-
-  for j in eachindex(nodes), i in eachindex(nodes)
-    node_coordinates[:, i ,j ,element] .= straight_side_quad_map(nodes[i], nodes[j], corners)
-  end
-
-  return node_coordinates
+function calc_node_coordinates!(node_coordinates::AbstractArray{<:Any, 4}, element,
+                                nodes, corners)
+    for j in eachindex(nodes), i in eachindex(nodes)
+        node_coordinates[:, i, j, element] .= straight_side_quad_map(nodes[i], nodes[j],
+                                                                     corners)
+    end
+
+    return node_coordinates
 end
 
-
 # construct the metric terms for a straight sided element
 function calc_metric_terms!(jacobian_matrix, element, nodes, corners)
 
-  # storage format:
-  #   jacobian_matrix[1,1,:,:,:] <- X_xi
-  #   jacobian_matrix[1,2,:,:,:] <- X_eta
-  #   jacobian_matrix[2,1,:,:,:] <- Y_xi
-  #   jacobian_matrix[2,2,:,:,:] <- Y_eta
-  for j in eachindex(nodes), i in eachindex(nodes)
-    (jacobian_matrix[1, 1, i, j, element],
-     jacobian_matrix[1, 2, i, j, element],
-     jacobian_matrix[2, 1, i, j, element],
-     jacobian_matrix[2, 2, i, j, element]) = straight_side_quad_map_metrics(nodes[i], nodes[j],
-                                                                            corners)
-  end
-
-  return jacobian_matrix
+    # storage format:
+    #   jacobian_matrix[1,1,:,:,:] <- X_xi
+    #   jacobian_matrix[1,2,:,:,:] <- X_eta
+    #   jacobian_matrix[2,1,:,:,:] <- Y_xi
+    #   jacobian_matrix[2,2,:,:,:] <- Y_eta
+    for j in eachindex(nodes), i in eachindex(nodes)
+        (jacobian_matrix[1, 1, i, j, element],
+        jacobian_matrix[1, 2, i, j, element],
+        jacobian_matrix[2, 1, i, j, element],
+        jacobian_matrix[2, 2, i, j, element]) = straight_side_quad_map_metrics(nodes[i],
+                                                                               nodes[j],
+                                                                               corners)
+    end
+
+    return jacobian_matrix
 end
 
-
 # construct the normal direction vectors (but not actually normalized) for a straight sided element
 # normalization occurs on the fly during the surface flux computation
 function calc_normal_directions!(normal_directions, element, nodes, corners)
 
-  # normal directions on the boundary for the left (local side 4) and right (local side 2)
-  for j in eachindex(nodes)
-    # side 2
-    X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(1.0, nodes[j], corners)
-    Jtemp = X_xi * Y_eta - X_eta * Y_xi
-    normal_directions[1, j, 2, element] = sign(Jtemp) * ( Y_eta )
-    normal_directions[2, j, 2, element] = sign(Jtemp) * (-X_eta )
-
-    # side 4
-    X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(-1.0, nodes[j], corners)
-    Jtemp =  X_xi * Y_eta - X_eta * Y_xi
-    normal_directions[1, j, 4, element] = -sign(Jtemp) * ( Y_eta )
-    normal_directions[2, j, 4, element] = -sign(Jtemp) * (-X_eta )
-  end
-
-  # normal directions on the boundary for the top (local side 3) and bottom (local side 1)
-  for i in eachindex(nodes)
-    # side 1
-    X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(nodes[i], -1.0, corners)
-    Jtemp =  X_xi * Y_eta - X_eta * Y_xi
-    normal_directions[1, i, 1, element] = -sign(Jtemp) * (-Y_xi )
-    normal_directions[2, i, 1, element] = -sign(Jtemp) * ( X_xi )
-
-    # side 3
-    X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(nodes[i], 1.0, corners)
-    Jtemp = X_xi * Y_eta - X_eta * Y_xi
-    normal_directions[1, i, 3, element] = sign(Jtemp) * (-Y_xi )
-    normal_directions[2, i, 3, element] = sign(Jtemp) * ( X_xi )
-  end
-
-  return normal_directions
+    # normal directions on the boundary for the left (local side 4) and right (local side 2)
+    for j in eachindex(nodes)
+        # side 2
+        X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(1.0, nodes[j],
+                                                                  corners)
+        Jtemp = X_xi * Y_eta - X_eta * Y_xi
+        normal_directions[1, j, 2, element] = sign(Jtemp) * (Y_eta)
+        normal_directions[2, j, 2, element] = sign(Jtemp) * (-X_eta)
+
+        # side 4
+        X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(-1.0, nodes[j],
+                                                                  corners)
+        Jtemp = X_xi * Y_eta - X_eta * Y_xi
+        normal_directions[1, j, 4, element] = -sign(Jtemp) * (Y_eta)
+        normal_directions[2, j, 4, element] = -sign(Jtemp) * (-X_eta)
+    end
+
+    # normal directions on the boundary for the top (local side 3) and bottom (local side 1)
+    for i in eachindex(nodes)
+        # side 1
+        X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(nodes[i], -1.0,
+                                                                  corners)
+        Jtemp = X_xi * Y_eta - X_eta * Y_xi
+        normal_directions[1, i, 1, element] = -sign(Jtemp) * (-Y_xi)
+        normal_directions[2, i, 1, element] = -sign(Jtemp) * (X_xi)
+
+        # side 3
+        X_xi, X_eta, Y_xi, Y_eta = straight_side_quad_map_metrics(nodes[i], 1.0,
+                                                                  corners)
+        Jtemp = X_xi * Y_eta - X_eta * Y_xi
+        normal_directions[1, i, 3, element] = sign(Jtemp) * (-Y_xi)
+        normal_directions[2, i, 3, element] = sign(Jtemp) * (X_xi)
+    end
+
+    return normal_directions
 end
-
-
 end # @muladd
diff --git a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl
index 5315e695bd6..cad5542aae3 100644
--- a/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl
+++ b/src/solvers/dgsem_unstructured/sort_boundary_conditions.jl
@@ -3,7 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     UnstructuredSortedBoundaryTypes
@@ -13,86 +13,88 @@ It stores a set of global indices for each boundary condition type to expedite c
 during the call to `calc_boundary_flux!`. The original dictionary form of the boundary conditions
 set by the user in the elixir file is also stored for printing.
 """
-mutable struct UnstructuredSortedBoundaryTypes{N, BCs<:NTuple{N, Any}}
-  boundary_condition_types::BCs # specific boundary condition type(s), e.g. BoundaryConditionDirichlet
-  boundary_indices::NTuple{N, Vector{Int}} # integer vectors containing global boundary indices
-  boundary_dictionary::Dict{Symbol, Any} # boundary conditions as set by the user in the elixir file
+mutable struct UnstructuredSortedBoundaryTypes{N, BCs <: NTuple{N, Any}}
+    boundary_condition_types::BCs # specific boundary condition type(s), e.g. BoundaryConditionDirichlet
+    boundary_indices::NTuple{N, Vector{Int}} # integer vectors containing global boundary indices
+    boundary_dictionary::Dict{Symbol, Any} # boundary conditions as set by the user in the elixir file
 end
 
-
 # constructor that "eats" the original boundary condition dictionary and sorts the information
 # from the `UnstructuredBoundaryContainer2D` in cache.boundaries according to the boundary types
 # and stores the associated global boundary indexing in NTuple
 function UnstructuredSortedBoundaryTypes(boundary_conditions::Dict, cache)
-  # extract the unique boundary function routines from the dictionary
-  boundary_condition_types = Tuple(unique(collect(values(boundary_conditions))))
-  n_boundary_types = length(boundary_condition_types)
-  boundary_indices = ntuple(_ -> [], n_boundary_types)
+    # extract the unique boundary function routines from the dictionary
+    boundary_condition_types = Tuple(unique(collect(values(boundary_conditions))))
+    n_boundary_types = length(boundary_condition_types)
+    boundary_indices = ntuple(_ -> [], n_boundary_types)
 
-  container = UnstructuredSortedBoundaryTypes{n_boundary_types, typeof(boundary_condition_types)}(
-    boundary_condition_types, boundary_indices, boundary_conditions)
+    container = UnstructuredSortedBoundaryTypes{n_boundary_types,
+                                                typeof(boundary_condition_types)}(boundary_condition_types,
+                                                                                  boundary_indices,
+                                                                                  boundary_conditions)
 
-  initialize!(container, cache)
+    initialize!(container, cache)
 end
 
-
-function initialize!(boundary_types_container::UnstructuredSortedBoundaryTypes{N}, cache) where N
-  @unpack boundary_dictionary, boundary_condition_types = boundary_types_container
-
-  unique_names = unique(cache.boundaries.name)
-
-  if mpi_isparallel()
-    # Exchange of boundaries names
-    send_buffer = Vector{UInt8}(join(unique_names, "\0"))
-    push!(send_buffer, 0)
-    if mpi_isroot()
-      recv_buffer_length = MPI.Gather(length(send_buffer), mpi_root(), mpi_comm())
-      recv_buffer = Vector{UInt8}(undef, sum(recv_buffer_length))
-      MPI.Gatherv!(send_buffer, MPI.VBuffer(recv_buffer, recv_buffer_length), mpi_root(), mpi_comm())
-      all_names = unique(Symbol.(split(String(recv_buffer), "\0"; keepempty=false)))
-      for key in keys(boundary_dictionary)
-        if !(key in all_names)
-          println(stderr, "ERROR: Key $(repr(key)) is not a valid boundary name")
-          MPI.Abort(mpi_comm(), 1)
+function initialize!(boundary_types_container::UnstructuredSortedBoundaryTypes{N},
+                     cache) where {N}
+    @unpack boundary_dictionary, boundary_condition_types = boundary_types_container
+
+    unique_names = unique(cache.boundaries.name)
+
+    if mpi_isparallel()
+        # Exchange of boundaries names
+        send_buffer = Vector{UInt8}(join(unique_names, "\0"))
+        push!(send_buffer, 0)
+        if mpi_isroot()
+            recv_buffer_length = MPI.Gather(length(send_buffer), mpi_root(), mpi_comm())
+            recv_buffer = Vector{UInt8}(undef, sum(recv_buffer_length))
+            MPI.Gatherv!(send_buffer, MPI.VBuffer(recv_buffer, recv_buffer_length),
+                         mpi_root(), mpi_comm())
+            all_names = unique(Symbol.(split(String(recv_buffer), "\0";
+                                             keepempty = false)))
+            for key in keys(boundary_dictionary)
+                if !(key in all_names)
+                    println(stderr,
+                            "ERROR: Key $(repr(key)) is not a valid boundary name")
+                    MPI.Abort(mpi_comm(), 1)
+                end
+            end
+        else
+            MPI.Gather(length(send_buffer), mpi_root(), mpi_comm())
+            MPI.Gatherv!(send_buffer, nothing, mpi_root(), mpi_comm())
         end
-      end
     else
-      MPI.Gather(length(send_buffer), mpi_root(), mpi_comm())
-      MPI.Gatherv!(send_buffer, nothing, mpi_root(), mpi_comm())
-    end
-  else
-    for key in keys(boundary_dictionary)
-      if !(key in unique_names)
-        error("Key $(repr(key)) is not a valid boundary name")
-      end
+        for key in keys(boundary_dictionary)
+            if !(key in unique_names)
+                error("Key $(repr(key)) is not a valid boundary name")
+            end
+        end
     end
-  end
 
-  # Verify that each boundary has a boundary condition
-  for name in unique_names
-    if name !== Symbol("---") && !haskey(boundary_dictionary, name)
-      error("No boundary condition specified for boundary $(repr(name))")
+    # Verify that each boundary has a boundary condition
+    for name in unique_names
+        if name !== Symbol("---") && !haskey(boundary_dictionary, name)
+            error("No boundary condition specified for boundary $(repr(name))")
+        end
     end
-  end
 
-  # pull and sort the indexing for each boundary type
-  _boundary_indices = Vector{Any}(nothing, N)
-  for j in 1:N
-    indices_for_current_type = Int[]
-    for (test_name, test_condition) in boundary_dictionary
-      temp_indices = findall(x->x===test_name, cache.boundaries.name)
-      if test_condition === boundary_condition_types[j]
-        indices_for_current_type = vcat(indices_for_current_type, temp_indices)
-      end
+    # pull and sort the indexing for each boundary type
+    _boundary_indices = Vector{Any}(nothing, N)
+    for j in 1:N
+        indices_for_current_type = Int[]
+        for (test_name, test_condition) in boundary_dictionary
+            temp_indices = findall(x -> x === test_name, cache.boundaries.name)
+            if test_condition === boundary_condition_types[j]
+                indices_for_current_type = vcat(indices_for_current_type, temp_indices)
+            end
+        end
+        _boundary_indices[j] = sort!(indices_for_current_type)
     end
-    _boundary_indices[j] = sort!(indices_for_current_type)
-  end
 
-  # convert the work array with the boundary indices into a tuple
-  boundary_types_container.boundary_indices = Tuple(_boundary_indices)
+    # convert the work array with the boundary indices into a tuple
+    boundary_types_container.boundary_indices = Tuple(_boundary_indices)
 
-  return boundary_types_container
+    return boundary_types_container
 end
-
-
 end # @muladd
diff --git a/src/solvers/fdsbp_tree/fdsbp.jl b/src/solvers/fdsbp_tree/fdsbp.jl
index b89d59c1156..cbb6fd16243 100644
--- a/src/solvers/fdsbp_tree/fdsbp.jl
+++ b/src/solvers/fdsbp_tree/fdsbp.jl
@@ -6,7 +6,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 """
     FDSBP(D_SBP; surface_integral, volume_integral)
@@ -25,13 +25,13 @@ The other arguments have the same meaning as in [`DG`](@ref) or [`DGSEM`](@ref).
 !!! warning "Experimental implementation (upwind SBP)"
     This is an experimental feature and may change in future releases.
 """
-const FDSBP = DG{Basis} where {Basis<:AbstractDerivativeOperator}
+const FDSBP = DG{Basis} where {Basis <: AbstractDerivativeOperator}
 
 function FDSBP(D_SBP::AbstractDerivativeOperator; surface_integral, volume_integral)
-  return DG(D_SBP, nothing #= mortar =#, surface_integral, volume_integral)
+    # `nothing` is passed as `mortar`
+    return DG(D_SBP, nothing, surface_integral, volume_integral)
 end
 
-
 # General interface methods for SummationByPartsOperators.jl and Trixi.jl
 nnodes(D::AbstractDerivativeOperator) = size(D, 1)
 eachnode(D::AbstractDerivativeOperator) = Base.OneTo(nnodes(D))
@@ -42,7 +42,6 @@ get_nodes(D::AbstractDerivativeOperator) = grid(D)
 polydeg(D::AbstractDerivativeOperator) = size(D, 1) - 1
 polydeg(fdsbp::FDSBP) = polydeg(fdsbp.basis)
 
-
 # TODO: FD. No mortars supported at the moment
 init_mortars(cell_ids, mesh, elements, mortar::Nothing) = nothing
 create_cache(mesh, equations, mortar::Nothing, uEltype) = NamedTuple()
@@ -50,25 +49,21 @@ nmortars(mortar::Nothing) = 0
 
 function prolong2mortars!(cache, u, mesh, equations, mortar::Nothing,
                           surface_integral, dg::DG)
-  @assert isempty(eachmortar(dg, cache))
+    @assert isempty(eachmortar(dg, cache))
 end
 
 function calc_mortar_flux!(surface_flux_values, mesh,
                            nonconservative_terms, equations,
                            mortar::Nothing,
                            surface_integral, dg::DG, cache)
-  @assert isempty(eachmortar(dg, cache))
+    @assert isempty(eachmortar(dg, cache))
 end
 
-
 # We do not use a specialized setup to analyze solutions
 SolutionAnalyzer(D::AbstractDerivativeOperator) = D
 
-
 # dimension-specific implementations
 include("fdsbp_1d.jl")
 include("fdsbp_2d.jl")
 include("fdsbp_3d.jl")
-
-
 end # @muladd
diff --git a/src/solvers/fdsbp_tree/fdsbp_1d.jl b/src/solvers/fdsbp_tree/fdsbp_1d.jl
index 2cd6edd04fd..c7712074940 100644
--- a/src/solvers/fdsbp_tree/fdsbp_1d.jl
+++ b/src/solvers/fdsbp_tree/fdsbp_1d.jl
@@ -6,78 +6,79 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # 1D caches
 function create_cache(mesh::TreeMesh{1}, equations,
                       volume_integral::VolumeIntegralStrongForm, dg, uEltype)
+    prototype = Array{SVector{nvariables(equations), uEltype}, ndims(mesh)}(undef,
+                                                                            ntuple(_ -> nnodes(dg),
+                                                                                   ndims(mesh))...)
+    f_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
 
-  prototype = Array{SVector{nvariables(equations), uEltype}, ndims(mesh)}(
-    undef, ntuple(_ -> nnodes(dg), ndims(mesh))...)
-  f_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
-
-  return (; f_threaded,)
+    return (; f_threaded)
 end
 
 function create_cache(mesh::TreeMesh{1}, equations,
                       volume_integral::VolumeIntegralUpwind, dg, uEltype)
+    u_node = SVector{nvariables(equations), uEltype}(ntuple(_ -> zero(uEltype),
+                                                            Val{nvariables(equations)}()))
+    f = StructArray([(u_node, u_node)])
+    f_minus_plus_threaded = [similar(f, ntuple(_ -> nnodes(dg), ndims(mesh))...)
+                             for _ in 1:Threads.nthreads()]
+
+    f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[1])
+    f_minus_threaded = [f_minus]
+    f_plus_threaded = [f_plus]
+    for i in 2:Threads.nthreads()
+        f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[i])
+        push!(f_minus_threaded, f_minus)
+        push!(f_plus_threaded, f_plus)
+    end
 
-  u_node = SVector{nvariables(equations), uEltype}(ntuple(_ -> zero(uEltype), Val{nvariables(equations)}()))
-  f = StructArray([(u_node, u_node)])
-  f_minus_plus_threaded = [similar(f, ntuple(_ -> nnodes(dg), ndims(mesh))...) for _ in 1:Threads.nthreads()]
-
-  f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[1])
-  f_minus_threaded = [f_minus]
-  f_plus_threaded = [f_plus]
-  for i in 2:Threads.nthreads()
-    f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[i])
-    push!(f_minus_threaded, f_minus)
-    push!(f_plus_threaded, f_plus)
-  end
-
-  return (; f_minus_plus_threaded, f_minus_threaded, f_plus_threaded,)
+    return (; f_minus_plus_threaded, f_minus_threaded, f_plus_threaded)
 end
 
-
 # 2D volume integral contributions for `VolumeIntegralStrongForm`
 function calc_volume_integral!(du, u,
                                mesh::TreeMesh{1},
                                nonconservative_terms::False, equations,
                                volume_integral::VolumeIntegralStrongForm,
                                dg::FDSBP, cache)
-  D = dg.basis # SBP derivative operator
-  @unpack f_threaded = cache
-
-  # SBP operators from SummationByPartsOperators.jl implement the basic interface
-  # of matrix-vector multiplication. Thus, we pass an "array of structures",
-  # packing all variables per node in an `SVector`.
-  if nvariables(equations) == 1
-    # `reinterpret(reshape, ...)` removes the leading dimension only if more
-    # than one variable is used.
-    u_vectors  = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u),
-                         nnodes(dg), nelements(dg, cache))
-    du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, du),
-                         nnodes(dg), nelements(dg, cache))
-  else
-    u_vectors  = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u)
-    du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, du)
-  end
-
-  # Use the tensor product structure to compute the discrete derivatives of
-  # the fluxes line-by-line and add them to `du` for each element.
-  @threaded for element in eachelement(dg, cache)
-    f_element = f_threaded[Threads.threadid()]
-    u_element = view(u_vectors, :, element)
-
-    # x direction
-    @. f_element = flux(u_element, 1, equations)
-    mul!(view(du_vectors, :, element), D, view(f_element, :),
-         one(eltype(du)), one(eltype(du)))
-  end
-
-  return nothing
-end
+    D = dg.basis # SBP derivative operator
+    @unpack f_threaded = cache
+
+    # SBP operators from SummationByPartsOperators.jl implement the basic interface
+    # of matrix-vector multiplication. Thus, we pass an "array of structures",
+    # packing all variables per node in an `SVector`.
+    if nvariables(equations) == 1
+        # `reinterpret(reshape, ...)` removes the leading dimension only if more
+        # than one variable is used.
+        u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u),
+                            nnodes(dg), nelements(dg, cache))
+        du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)},
+                                         du),
+                             nnodes(dg), nelements(dg, cache))
+    else
+        u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u)
+        du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)},
+                                 du)
+    end
+
+    # Use the tensor product structure to compute the discrete derivatives of
+    # the fluxes line-by-line and add them to `du` for each element.
+    @threaded for element in eachelement(dg, cache)
+        f_element = f_threaded[Threads.threadid()]
+        u_element = view(u_vectors, :, element)
 
+        # x direction
+        @. f_element = flux(u_element, 1, equations)
+        mul!(view(du_vectors, :, element), D, view(f_element, :),
+             one(eltype(du)), one(eltype(du)))
+    end
+
+    return nothing
+end
 
 # 1D volume integral contributions for `VolumeIntegralUpwind`.
 # Note that the plus / minus notation of the operators does not refer to the
@@ -91,78 +92,78 @@ function calc_volume_integral!(du, u,
                                nonconservative_terms::False, equations,
                                volume_integral::VolumeIntegralUpwind,
                                dg::FDSBP, cache)
-  # Assume that
-  # dg.basis isa SummationByPartsOperators.UpwindOperators
-  D_minus = dg.basis.minus # Upwind SBP D^- derivative operator
-  D_plus = dg.basis.plus   # Upwind SBP D^+ derivative operator
-  @unpack f_minus_plus_threaded, f_minus_threaded, f_plus_threaded = cache
-  @unpack splitting = volume_integral
-
-  # SBP operators from SummationByPartsOperators.jl implement the basic interface
-  # of matrix-vector multiplication. Thus, we pass an "array of structures",
-  # packing all variables per node in an `SVector`.
-  if nvariables(equations) == 1
-    # `reinterpret(reshape, ...)` removes the leading dimension only if more
-    # than one variable is used.
-    u_vectors  = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u),
-                         nnodes(dg), nelements(dg, cache))
-    du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, du),
-                         nnodes(dg), nelements(dg, cache))
-  else
-    u_vectors  = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u)
-    du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, du)
-  end
-
-  # Use the tensor product structure to compute the discrete derivatives of
-  # the fluxes line-by-line and add them to `du` for each element.
-  @threaded for element in eachelement(dg, cache)
-    # f_minus_plus_element wraps the storage provided by f_minus_element and
-    # f_plus_element such that we can use a single plain broadcasting below.
-    # f_minus_element and f_plus_element are updated in broadcasting calls
-    # of the form `@. f_minus_plus_element = ...`.
-    f_minus_plus_element = f_minus_plus_threaded[Threads.threadid()]
-    f_minus_element = f_minus_threaded[Threads.threadid()]
-    f_plus_element = f_plus_threaded[Threads.threadid()]
-    u_element = view(u_vectors, :, element)
-
-    # x direction
-    @. f_minus_plus_element = splitting(u_element, 1, equations)
-    mul!(view(du_vectors, :, element), D_plus, view(f_minus_element, :),
-         one(eltype(du)), one(eltype(du)))
-    mul!(view(du_vectors, :, element), D_minus, view(f_plus_element, :),
-         one(eltype(du)), one(eltype(du)))
-  end
-
-  return nothing
-end
+    # Assume that
+    # dg.basis isa SummationByPartsOperators.UpwindOperators
+    D_minus = dg.basis.minus # Upwind SBP D^- derivative operator
+    D_plus = dg.basis.plus   # Upwind SBP D^+ derivative operator
+    @unpack f_minus_plus_threaded, f_minus_threaded, f_plus_threaded = cache
+    @unpack splitting = volume_integral
+
+    # SBP operators from SummationByPartsOperators.jl implement the basic interface
+    # of matrix-vector multiplication. Thus, we pass an "array of structures",
+    # packing all variables per node in an `SVector`.
+    if nvariables(equations) == 1
+        # `reinterpret(reshape, ...)` removes the leading dimension only if more
+        # than one variable is used.
+        u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u),
+                            nnodes(dg), nelements(dg, cache))
+        du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)},
+                                         du),
+                             nnodes(dg), nelements(dg, cache))
+    else
+        u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u)
+        du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)},
+                                 du)
+    end
 
+    # Use the tensor product structure to compute the discrete derivatives of
+    # the fluxes line-by-line and add them to `du` for each element.
+    @threaded for element in eachelement(dg, cache)
+        # f_minus_plus_element wraps the storage provided by f_minus_element and
+        # f_plus_element such that we can use a single plain broadcasting below.
+        # f_minus_element and f_plus_element are updated in broadcasting calls
+        # of the form `@. f_minus_plus_element = ...`.
+        f_minus_plus_element = f_minus_plus_threaded[Threads.threadid()]
+        f_minus_element = f_minus_threaded[Threads.threadid()]
+        f_plus_element = f_plus_threaded[Threads.threadid()]
+        u_element = view(u_vectors, :, element)
+
+        # x direction
+        @. f_minus_plus_element = splitting(u_element, 1, equations)
+        mul!(view(du_vectors, :, element), D_plus, view(f_minus_element, :),
+             one(eltype(du)), one(eltype(du)))
+        mul!(view(du_vectors, :, element), D_minus, view(f_plus_element, :),
+             one(eltype(du)), one(eltype(du)))
+    end
+
+    return nothing
+end
 
 function calc_surface_integral!(du, u, mesh::TreeMesh{1},
                                 equations, surface_integral::SurfaceIntegralStrongForm,
                                 dg::DG, cache)
-  inv_weight_left  = inv(left_boundary_weight(dg.basis))
-  inv_weight_right = inv(right_boundary_weight(dg.basis))
-  @unpack surface_flux_values = cache.elements
-
-  @threaded for element in eachelement(dg, cache)
-    # surface at -x
-    u_node = get_node_vars(u, equations, dg, 1, element)
-    f_node = flux(u_node, 1, equations)
-    f_num  = get_node_vars(surface_flux_values, equations, dg, 1, element)
-    multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
-                               equations, dg, 1, element)
-
-    # surface at +x
-    u_node = get_node_vars(u, equations, dg, nnodes(dg), element)
-    f_node = flux(u_node, 1, equations)
-    f_num  = get_node_vars(surface_flux_values, equations, dg, 2, element)
-    multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
-                               equations, dg, nnodes(dg), element)
-  end
-
-  return nothing
-end
+    inv_weight_left = inv(left_boundary_weight(dg.basis))
+    inv_weight_right = inv(right_boundary_weight(dg.basis))
+    @unpack surface_flux_values = cache.elements
+
+    @threaded for element in eachelement(dg, cache)
+        # surface at -x
+        u_node = get_node_vars(u, equations, dg, 1, element)
+        f_node = flux(u_node, 1, equations)
+        f_num = get_node_vars(surface_flux_values, equations, dg, 1, element)
+        multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
+                                   equations, dg, 1, element)
+
+        # surface at +x
+        u_node = get_node_vars(u, equations, dg, nnodes(dg), element)
+        f_node = flux(u_node, 1, equations)
+        f_num = get_node_vars(surface_flux_values, equations, dg, 2, element)
+        multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
+                                   equations, dg, nnodes(dg), element)
+    end
 
+    return nothing
+end
 
 # Specialized interface flux computation because the upwind solver does
 # not require a standard numerical flux (Riemann solver). The flux splitting
@@ -174,39 +175,39 @@ function calc_interface_flux!(surface_flux_values,
                               nonconservative_terms::False, equations,
                               surface_integral::SurfaceIntegralUpwind,
                               dg::FDSBP, cache)
-  @unpack splitting = surface_integral
-  @unpack u, neighbor_ids, orientations = cache.interfaces
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Get neighboring elements
-    left_id  = neighbor_ids[1, interface]
-    right_id = neighbor_ids[2, interface]
-
-    # Determine interface direction with respect to elements:
-    # orientation = 1: left -> 2, right -> 1
-    left_direction  = 2 * orientations[interface]
-    right_direction = 2 * orientations[interface] - 1
-
-    # Pull the left and right solution data
-    u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface)
-
-    # Compute the upwind coupling terms where right-traveling
-    # information comes from the left and left-traveling information
-    # comes from the right
-    flux_minus_rr = splitting(u_rr, Val{:minus}(), orientations[interface], equations)
-    flux_plus_ll  = splitting(u_ll, Val{:plus}(),  orientations[interface], equations)
-
-    # Save the upwind coupling into the appropriate side of the elements
-    for v in eachvariable(equations)
-      surface_flux_values[v, left_direction,  left_id]  = flux_minus_rr[v]
-      surface_flux_values[v, right_direction, right_id] = flux_plus_ll[v]
+    @unpack splitting = surface_integral
+    @unpack u, neighbor_ids, orientations = cache.interfaces
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Get neighboring elements
+        left_id = neighbor_ids[1, interface]
+        right_id = neighbor_ids[2, interface]
+
+        # Determine interface direction with respect to elements:
+        # orientation = 1: left -> 2, right -> 1
+        left_direction = 2 * orientations[interface]
+        right_direction = 2 * orientations[interface] - 1
+
+        # Pull the left and right solution data
+        u_ll, u_rr = get_surface_node_vars(u, equations, dg, interface)
+
+        # Compute the upwind coupling terms where right-traveling
+        # information comes from the left and left-traveling information
+        # comes from the right
+        flux_minus_rr = splitting(u_rr, Val{:minus}(), orientations[interface],
+                                  equations)
+        flux_plus_ll = splitting(u_ll, Val{:plus}(), orientations[interface], equations)
+
+        # Save the upwind coupling into the appropriate side of the elements
+        for v in eachvariable(equations)
+            surface_flux_values[v, left_direction, left_id] = flux_minus_rr[v]
+            surface_flux_values[v, right_direction, right_id] = flux_plus_ll[v]
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Implementation of fully upwind SATs. The surface flux values are pre-computed
 # in the specialized `calc_interface_flux` routine. These SATs are still of
 # a strong form penalty type, except that the interior flux at a particular
@@ -214,90 +215,88 @@ end
 function calc_surface_integral!(du, u, mesh::TreeMesh{1},
                                 equations, surface_integral::SurfaceIntegralUpwind,
                                 dg::FDSBP, cache)
-  inv_weight_left  = inv(left_boundary_weight(dg.basis))
-  inv_weight_right = inv(right_boundary_weight(dg.basis))
-  @unpack surface_flux_values = cache.elements
-  @unpack splitting = surface_integral
-
-  @threaded for element in eachelement(dg, cache)
-    # surface at -x
-    u_node = get_node_vars(u, equations, dg, 1, element)
-    f_node = splitting(u_node, Val{:plus}(), 1, equations)
-    f_num  = get_node_vars(surface_flux_values, equations, dg, 1, element)
-    multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
-                               equations, dg, 1, element)
-
-    # surface at +x
-    u_node = get_node_vars(u, equations, dg, nnodes(dg), element)
-    f_node = splitting(u_node, Val{:minus}(), 1, equations)
-    f_num  = get_node_vars(surface_flux_values, equations, dg, 2, element)
-    multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
-                               equations, dg, nnodes(dg), element)
-  end
-
-  return nothing
-end
+    inv_weight_left = inv(left_boundary_weight(dg.basis))
+    inv_weight_right = inv(right_boundary_weight(dg.basis))
+    @unpack surface_flux_values = cache.elements
+    @unpack splitting = surface_integral
+
+    @threaded for element in eachelement(dg, cache)
+        # surface at -x
+        u_node = get_node_vars(u, equations, dg, 1, element)
+        f_node = splitting(u_node, Val{:plus}(), 1, equations)
+        f_num = get_node_vars(surface_flux_values, equations, dg, 1, element)
+        multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
+                                   equations, dg, 1, element)
+
+        # surface at +x
+        u_node = get_node_vars(u, equations, dg, nnodes(dg), element)
+        f_node = splitting(u_node, Val{:minus}(), 1, equations)
+        f_num = get_node_vars(surface_flux_values, equations, dg, 2, element)
+        multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
+                                   equations, dg, nnodes(dg), element)
+    end
 
+    return nothing
+end
 
 # AnalysisCallback
 
 function integrate_via_indices(func::Func, u,
                                mesh::TreeMesh{1}, equations,
-                               dg::FDSBP, cache, args...; normalize=true) where {Func}
-  # TODO: FD. This is rather inefficient right now and allocates...
-  weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
-
-  # Initialize integral with zeros of the right shape
-  integral = zero(func(u, 1, 1, equations, dg, args...))
-
-  # Use quadrature to numerically integrate over entire domain
-  for element in eachelement(dg, cache)
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-    for i in eachnode(dg)
-      integral += volume_jacobian_ * weights[i] * func(u, i, element, equations, dg, args...)
+                               dg::FDSBP, cache, args...; normalize = true) where {Func}
+    # TODO: FD. This is rather inefficient right now and allocates...
+    weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
+
+    # Initialize integral with zeros of the right shape
+    integral = zero(func(u, 1, 1, equations, dg, args...))
+
+    # Use quadrature to numerically integrate over entire domain
+    for element in eachelement(dg, cache)
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+        for i in eachnode(dg)
+            integral += volume_jacobian_ * weights[i] *
+                        func(u, i, element, equations, dg, args...)
+        end
     end
-  end
 
-  # Normalize with total volume
-  if normalize
-    integral = integral / total_volume(mesh)
-  end
+    # Normalize with total volume
+    if normalize
+        integral = integral / total_volume(mesh)
+    end
 
-  return integral
+    return integral
 end
 
 function calc_error_norms(func, u, t, analyzer,
                           mesh::TreeMesh{1}, equations, initial_condition,
                           dg::FDSBP, cache, cache_analysis)
-  # TODO: FD. This is rather inefficient right now and allocates...
-  weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
-  @unpack node_coordinates = cache.elements
-
-  # Set up data structures
-  l2_error   = zero(func(get_node_vars(u, equations, dg, 1, 1), equations))
-  linf_error = copy(l2_error)
-
-  # Iterate over all elements for error calculations
-  for element in eachelement(dg, cache)
-    # Calculate errors at each node
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-
-    for i in eachnode(analyzer)
-      u_exact = initial_condition(
-        get_node_coords(node_coordinates, equations, dg, i, element), t, equations)
-      diff = func(u_exact, equations) - func(
-        get_node_vars(u, equations, dg, i, element), equations)
-      l2_error += diff.^2 * (weights[i] * volume_jacobian_)
-      linf_error = @. max(linf_error, abs(diff))
+    # TODO: FD. This is rather inefficient right now and allocates...
+    weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
+    @unpack node_coordinates = cache.elements
+
+    # Set up data structures
+    l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1), equations))
+    linf_error = copy(l2_error)
+
+    # Iterate over all elements for error calculations
+    for element in eachelement(dg, cache)
+        # Calculate errors at each node
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+
+        for i in eachnode(analyzer)
+            u_exact = initial_condition(get_node_coords(node_coordinates, equations, dg,
+                                                        i, element), t, equations)
+            diff = func(u_exact, equations) -
+                   func(get_node_vars(u, equations, dg, i, element), equations)
+            l2_error += diff .^ 2 * (weights[i] * volume_jacobian_)
+            linf_error = @. max(linf_error, abs(diff))
+        end
     end
-  end
 
-  # For L2 error, divide by total volume
-  total_volume_ = total_volume(mesh)
-  l2_error = @. sqrt(l2_error / total_volume_)
+    # For L2 error, divide by total volume
+    total_volume_ = total_volume(mesh)
+    l2_error = @. sqrt(l2_error / total_volume_)
 
-  return l2_error, linf_error
+    return l2_error, linf_error
 end
-
-
 end # @muladd
diff --git a/src/solvers/fdsbp_tree/fdsbp_2d.jl b/src/solvers/fdsbp_tree/fdsbp_2d.jl
index 295c0cada2a..241e0d95342 100644
--- a/src/solvers/fdsbp_tree/fdsbp_2d.jl
+++ b/src/solvers/fdsbp_tree/fdsbp_2d.jl
@@ -6,88 +6,89 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # 2D caches
 function create_cache(mesh::TreeMesh{2}, equations,
                       volume_integral::VolumeIntegralStrongForm, dg, uEltype)
+    prototype = Array{SVector{nvariables(equations), uEltype}, ndims(mesh)}(undef,
+                                                                            ntuple(_ -> nnodes(dg),
+                                                                                   ndims(mesh))...)
+    f_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
 
-  prototype = Array{SVector{nvariables(equations), uEltype}, ndims(mesh)}(
-    undef, ntuple(_ -> nnodes(dg), ndims(mesh))...)
-  f_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
-
-  return (; f_threaded,)
+    return (; f_threaded)
 end
 
 function create_cache(mesh::TreeMesh{2}, equations,
                       volume_integral::VolumeIntegralUpwind, dg, uEltype)
+    u_node = SVector{nvariables(equations), uEltype}(ntuple(_ -> zero(uEltype),
+                                                            Val{nvariables(equations)}()))
+    f = StructArray([(u_node, u_node)])
+    f_minus_plus_threaded = [similar(f, ntuple(_ -> nnodes(dg), ndims(mesh))...)
+                             for _ in 1:Threads.nthreads()]
+
+    f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[1])
+    f_minus_threaded = [f_minus]
+    f_plus_threaded = [f_plus]
+    for i in 2:Threads.nthreads()
+        f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[i])
+        push!(f_minus_threaded, f_minus)
+        push!(f_plus_threaded, f_plus)
+    end
 
-  u_node = SVector{nvariables(equations), uEltype}(ntuple(_ -> zero(uEltype), Val{nvariables(equations)}()))
-  f = StructArray([(u_node, u_node)])
-  f_minus_plus_threaded = [similar(f, ntuple(_ -> nnodes(dg), ndims(mesh))...) for _ in 1:Threads.nthreads()]
-
-  f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[1])
-  f_minus_threaded = [f_minus]
-  f_plus_threaded = [f_plus]
-  for i in 2:Threads.nthreads()
-    f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[i])
-    push!(f_minus_threaded, f_minus)
-    push!(f_plus_threaded, f_plus)
-  end
-
-  return (; f_minus_plus_threaded, f_minus_threaded, f_plus_threaded,)
+    return (; f_minus_plus_threaded, f_minus_threaded, f_plus_threaded)
 end
 
-
 # 2D volume integral contributions for `VolumeIntegralStrongForm`
 function calc_volume_integral!(du, u,
                                mesh::TreeMesh{2},
                                nonconservative_terms::False, equations,
                                volume_integral::VolumeIntegralStrongForm,
                                dg::FDSBP, cache)
-  D = dg.basis # SBP derivative operator
-  @unpack f_threaded = cache
-
-  # SBP operators from SummationByPartsOperators.jl implement the basic interface
-  # of matrix-vector multiplication. Thus, we pass an "array of structures",
-  # packing all variables per node in an `SVector`.
-  if nvariables(equations) == 1
-    # `reinterpret(reshape, ...)` removes the leading dimension only if more
-    # than one variable is used.
-    u_vectors  = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u),
-                         nnodes(dg), nnodes(dg), nelements(dg, cache))
-    du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, du),
-                         nnodes(dg), nnodes(dg), nelements(dg, cache))
-  else
-    u_vectors  = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u)
-    du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, du)
-  end
-
-  # Use the tensor product structure to compute the discrete derivatives of
-  # the fluxes line-by-line and add them to `du` for each element.
-  @threaded for element in eachelement(dg, cache)
-    f_element = f_threaded[Threads.threadid()]
-    u_element = view(u_vectors,  :, :, element)
-
-    # x direction
-    @. f_element = flux(u_element, 1, equations)
-    for j in eachnode(dg)
-      mul!(view(du_vectors, :, j, element), D, view(f_element, :, j),
-           one(eltype(du)), one(eltype(du)))
+    D = dg.basis # SBP derivative operator
+    @unpack f_threaded = cache
+
+    # SBP operators from SummationByPartsOperators.jl implement the basic interface
+    # of matrix-vector multiplication. Thus, we pass an "array of structures",
+    # packing all variables per node in an `SVector`.
+    if nvariables(equations) == 1
+        # `reinterpret(reshape, ...)` removes the leading dimension only if more
+        # than one variable is used.
+        u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u),
+                            nnodes(dg), nnodes(dg), nelements(dg, cache))
+        du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)},
+                                         du),
+                             nnodes(dg), nnodes(dg), nelements(dg, cache))
+    else
+        u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u)
+        du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)},
+                                 du)
     end
 
-    # y direction
-    @. f_element = flux(u_element, 2, equations)
-    for i in eachnode(dg)
-      mul!(view(du_vectors, i, :, element), D, view(f_element, i, :),
-           one(eltype(du)), one(eltype(du)))
+    # Use the tensor product structure to compute the discrete derivatives of
+    # the fluxes line-by-line and add them to `du` for each element.
+    @threaded for element in eachelement(dg, cache)
+        f_element = f_threaded[Threads.threadid()]
+        u_element = view(u_vectors, :, :, element)
+
+        # x direction
+        @. f_element = flux(u_element, 1, equations)
+        for j in eachnode(dg)
+            mul!(view(du_vectors, :, j, element), D, view(f_element, :, j),
+                 one(eltype(du)), one(eltype(du)))
+        end
+
+        # y direction
+        @. f_element = flux(u_element, 2, equations)
+        for i in eachnode(dg)
+            mul!(view(du_vectors, i, :, element), D, view(f_element, i, :),
+                 one(eltype(du)), one(eltype(du)))
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # 2D volume integral contributions for `VolumeIntegralUpwind`.
 # Note that the plus / minus notation of the operators does not refer to the
 # upwind / downwind directions of the fluxes.
@@ -100,106 +101,106 @@ function calc_volume_integral!(du, u,
                                nonconservative_terms::False, equations,
                                volume_integral::VolumeIntegralUpwind,
                                dg::FDSBP, cache)
-  # Assume that
-  # dg.basis isa SummationByPartsOperators.UpwindOperators
-  D_minus = dg.basis.minus # Upwind SBP D^- derivative operator
-  D_plus = dg.basis.plus   # Upwind SBP D^+ derivative operator
-  @unpack f_minus_plus_threaded, f_minus_threaded, f_plus_threaded = cache
-  @unpack splitting = volume_integral
-
-  # SBP operators from SummationByPartsOperators.jl implement the basic interface
-  # of matrix-vector multiplication. Thus, we pass an "array of structures",
-  # packing all variables per node in an `SVector`.
-  if nvariables(equations) == 1
-    # `reinterpret(reshape, ...)` removes the leading dimension only if more
-    # than one variable is used.
-    u_vectors  = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u),
-                         nnodes(dg), nnodes(dg), nelements(dg, cache))
-    du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, du),
-                         nnodes(dg), nnodes(dg), nelements(dg, cache))
-  else
-    u_vectors  = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u)
-    du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, du)
-  end
-
-  # Use the tensor product structure to compute the discrete derivatives of
-  # the fluxes line-by-line and add them to `du` for each element.
-  @threaded for element in eachelement(dg, cache)
-    # f_minus_plus_element wraps the storage provided by f_minus_element and
-    # f_plus_element such that we can use a single plain broadcasting below.
-    # f_minus_element and f_plus_element are updated in broadcasting calls
-    # of the form `@. f_minus_plus_element = ...`.
-    f_minus_plus_element = f_minus_plus_threaded[Threads.threadid()]
-    f_minus_element = f_minus_threaded[Threads.threadid()]
-    f_plus_element = f_plus_threaded[Threads.threadid()]
-    u_element = view(u_vectors, :, :, element)
-
-    # x direction
-    @. f_minus_plus_element = splitting(u_element, 1, equations)
-    for j in eachnode(dg)
-      mul!(view(du_vectors, :, j, element), D_minus, view(f_plus_element, :, j),
-           one(eltype(du)), one(eltype(du)))
-      mul!(view(du_vectors, :, j, element), D_plus, view(f_minus_element, :, j),
-           one(eltype(du)), one(eltype(du)))
+    # Assume that
+    # dg.basis isa SummationByPartsOperators.UpwindOperators
+    D_minus = dg.basis.minus # Upwind SBP D^- derivative operator
+    D_plus = dg.basis.plus   # Upwind SBP D^+ derivative operator
+    @unpack f_minus_plus_threaded, f_minus_threaded, f_plus_threaded = cache
+    @unpack splitting = volume_integral
+
+    # SBP operators from SummationByPartsOperators.jl implement the basic interface
+    # of matrix-vector multiplication. Thus, we pass an "array of structures",
+    # packing all variables per node in an `SVector`.
+    if nvariables(equations) == 1
+        # `reinterpret(reshape, ...)` removes the leading dimension only if more
+        # than one variable is used.
+        u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u),
+                            nnodes(dg), nnodes(dg), nelements(dg, cache))
+        du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)},
+                                         du),
+                             nnodes(dg), nnodes(dg), nelements(dg, cache))
+    else
+        u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u)
+        du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)},
+                                 du)
     end
 
-    # y direction
-    @. f_minus_plus_element = splitting(u_element, 2, equations)
-    for i in eachnode(dg)
-      mul!(view(du_vectors, i, :, element), D_minus, view(f_plus_element, i, :),
-           one(eltype(du)), one(eltype(du)))
-      mul!(view(du_vectors, i, :, element), D_plus, view(f_minus_element, i, :),
-           one(eltype(du)), one(eltype(du)))
+    # Use the tensor product structure to compute the discrete derivatives of
+    # the fluxes line-by-line and add them to `du` for each element.
+    @threaded for element in eachelement(dg, cache)
+        # f_minus_plus_element wraps the storage provided by f_minus_element and
+        # f_plus_element such that we can use a single plain broadcasting below.
+        # f_minus_element and f_plus_element are updated in broadcasting calls
+        # of the form `@. f_minus_plus_element = ...`.
+        f_minus_plus_element = f_minus_plus_threaded[Threads.threadid()]
+        f_minus_element = f_minus_threaded[Threads.threadid()]
+        f_plus_element = f_plus_threaded[Threads.threadid()]
+        u_element = view(u_vectors, :, :, element)
+
+        # x direction
+        @. f_minus_plus_element = splitting(u_element, 1, equations)
+        for j in eachnode(dg)
+            mul!(view(du_vectors, :, j, element), D_minus, view(f_plus_element, :, j),
+                 one(eltype(du)), one(eltype(du)))
+            mul!(view(du_vectors, :, j, element), D_plus, view(f_minus_element, :, j),
+                 one(eltype(du)), one(eltype(du)))
+        end
+
+        # y direction
+        @. f_minus_plus_element = splitting(u_element, 2, equations)
+        for i in eachnode(dg)
+            mul!(view(du_vectors, i, :, element), D_minus, view(f_plus_element, i, :),
+                 one(eltype(du)), one(eltype(du)))
+            mul!(view(du_vectors, i, :, element), D_plus, view(f_minus_element, i, :),
+                 one(eltype(du)), one(eltype(du)))
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 function calc_surface_integral!(du, u, mesh::TreeMesh{2},
                                 equations, surface_integral::SurfaceIntegralStrongForm,
                                 dg::DG, cache)
-  inv_weight_left  = inv(left_boundary_weight(dg.basis))
-  inv_weight_right = inv(right_boundary_weight(dg.basis))
-  @unpack surface_flux_values = cache.elements
-
-  @threaded for element in eachelement(dg, cache)
-    for l in eachnode(dg)
-      # surface at -x
-      u_node = get_node_vars(u, equations, dg, 1, l, element)
-      f_node = flux(u_node, 1, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, 1, element)
-      multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
-                                 equations, dg, 1, l, element)
-
-      # surface at +x
-      u_node = get_node_vars(u, equations, dg, nnodes(dg), l, element)
-      f_node = flux(u_node, 1, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, 2, element)
-      multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
-                                 equations, dg, nnodes(dg), l, element)
-
-      # surface at -y
-      u_node = get_node_vars(u, equations, dg, l, 1, element)
-      f_node = flux(u_node, 2, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, 3, element)
-      multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
-                                 equations, dg, l, 1, element)
-
-      # surface at +y
-      u_node = get_node_vars(u, equations, dg, l, nnodes(dg), element)
-      f_node = flux(u_node, 2, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, 4, element)
-      multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
-                                 equations, dg, l, nnodes(dg), element)
+    inv_weight_left = inv(left_boundary_weight(dg.basis))
+    inv_weight_right = inv(right_boundary_weight(dg.basis))
+    @unpack surface_flux_values = cache.elements
+
+    @threaded for element in eachelement(dg, cache)
+        for l in eachnode(dg)
+            # surface at -x
+            u_node = get_node_vars(u, equations, dg, 1, l, element)
+            f_node = flux(u_node, 1, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, 1, element)
+            multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
+                                       equations, dg, 1, l, element)
+
+            # surface at +x
+            u_node = get_node_vars(u, equations, dg, nnodes(dg), l, element)
+            f_node = flux(u_node, 1, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, 2, element)
+            multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
+                                       equations, dg, nnodes(dg), l, element)
+
+            # surface at -y
+            u_node = get_node_vars(u, equations, dg, l, 1, element)
+            f_node = flux(u_node, 2, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, 3, element)
+            multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
+                                       equations, dg, l, 1, element)
+
+            # surface at +y
+            u_node = get_node_vars(u, equations, dg, l, nnodes(dg), element)
+            f_node = flux(u_node, 2, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, 4, element)
+            multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
+                                       equations, dg, l, nnodes(dg), element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Specialized interface flux computation because the upwind solver does
 # not require a standard numerical flux (Riemann solver). The flux splitting
 # already separates the solution information into right-traveling and
@@ -210,42 +211,43 @@ function calc_interface_flux!(surface_flux_values,
                               nonconservative_terms::False, equations,
                               surface_integral::SurfaceIntegralUpwind,
                               dg::FDSBP, cache)
-  @unpack splitting = surface_integral
-  @unpack u, neighbor_ids, orientations = cache.interfaces
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Get neighboring elements
-    left_id  = neighbor_ids[1, interface]
-    right_id = neighbor_ids[2, interface]
-
-    # Determine interface direction with respect to elements:
-    # orientation = 1: left -> 2, right -> 1
-    # orientation = 2: left -> 4, right -> 3
-    left_direction  = 2 * orientations[interface]
-    right_direction = 2 * orientations[interface] - 1
-
-    for i in eachnode(dg)
-      # Pull the left and right solution data
-      u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface)
-
-      # Compute the upwind coupling terms where right-traveling
-      # information comes from the left and left-traveling information
-      # comes from the right
-      flux_minus_rr = splitting(u_rr, Val{:minus}(), orientations[interface], equations)
-      flux_plus_ll  = splitting(u_ll, Val{:plus}(),  orientations[interface], equations)
-
-      # Save the upwind coupling into the appropriate side of the elements
-      for v in eachvariable(equations)
-        surface_flux_values[v, i, left_direction,  left_id]  = flux_minus_rr[v]
-        surface_flux_values[v, i, right_direction, right_id] = flux_plus_ll[v]
-      end
+    @unpack splitting = surface_integral
+    @unpack u, neighbor_ids, orientations = cache.interfaces
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Get neighboring elements
+        left_id = neighbor_ids[1, interface]
+        right_id = neighbor_ids[2, interface]
+
+        # Determine interface direction with respect to elements:
+        # orientation = 1: left -> 2, right -> 1
+        # orientation = 2: left -> 4, right -> 3
+        left_direction = 2 * orientations[interface]
+        right_direction = 2 * orientations[interface] - 1
+
+        for i in eachnode(dg)
+            # Pull the left and right solution data
+            u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, interface)
+
+            # Compute the upwind coupling terms where right-traveling
+            # information comes from the left and left-traveling information
+            # comes from the right
+            flux_minus_rr = splitting(u_rr, Val{:minus}(), orientations[interface],
+                                      equations)
+            flux_plus_ll = splitting(u_ll, Val{:plus}(), orientations[interface],
+                                     equations)
+
+            # Save the upwind coupling into the appropriate side of the elements
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, left_direction, left_id] = flux_minus_rr[v]
+                surface_flux_values[v, i, right_direction, right_id] = flux_plus_ll[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Implementation of fully upwind SATs. The surface flux values are pre-computed
 # in the specialized `calc_interface_flux` routine. These SATs are still of
 # a strong form penalty type, except that the interior flux at a particular
@@ -253,106 +255,103 @@ end
 function calc_surface_integral!(du, u, mesh::TreeMesh{2},
                                 equations, surface_integral::SurfaceIntegralUpwind,
                                 dg::FDSBP, cache)
-  inv_weight_left  = inv(left_boundary_weight(dg.basis))
-  inv_weight_right = inv(right_boundary_weight(dg.basis))
-  @unpack surface_flux_values = cache.elements
-  @unpack splitting = surface_integral
-
-
-  @threaded for element in eachelement(dg, cache)
-    for l in eachnode(dg)
-      # surface at -x
-      u_node = get_node_vars(u, equations, dg, 1, l, element)
-      f_node = splitting(u_node, Val{:plus}(), 1, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, 1, element)
-      multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
-                                 equations, dg, 1, l, element)
-
-      # surface at +x
-      u_node = get_node_vars(u, equations, dg, nnodes(dg), l, element)
-      f_node = splitting(u_node, Val{:minus}(), 1, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, 2, element)
-      multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
-                                 equations, dg, nnodes(dg), l, element)
-
-      # surface at -y
-      u_node = get_node_vars(u, equations, dg, l, 1, element)
-      f_node = splitting(u_node, Val{:plus}(), 2, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, 3, element)
-      multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
-                                 equations, dg, l, 1, element)
-
-      # surface at +y
-      u_node = get_node_vars(u, equations, dg, l, nnodes(dg), element)
-      f_node = splitting(u_node, Val{:minus}(), 2, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, 4, element)
-      multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
-                                 equations, dg, l, nnodes(dg), element)
+    inv_weight_left = inv(left_boundary_weight(dg.basis))
+    inv_weight_right = inv(right_boundary_weight(dg.basis))
+    @unpack surface_flux_values = cache.elements
+    @unpack splitting = surface_integral
+
+    @threaded for element in eachelement(dg, cache)
+        for l in eachnode(dg)
+            # surface at -x
+            u_node = get_node_vars(u, equations, dg, 1, l, element)
+            f_node = splitting(u_node, Val{:plus}(), 1, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, 1, element)
+            multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
+                                       equations, dg, 1, l, element)
+
+            # surface at +x
+            u_node = get_node_vars(u, equations, dg, nnodes(dg), l, element)
+            f_node = splitting(u_node, Val{:minus}(), 1, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, 2, element)
+            multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
+                                       equations, dg, nnodes(dg), l, element)
+
+            # surface at -y
+            u_node = get_node_vars(u, equations, dg, l, 1, element)
+            f_node = splitting(u_node, Val{:plus}(), 2, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, 3, element)
+            multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
+                                       equations, dg, l, 1, element)
+
+            # surface at +y
+            u_node = get_node_vars(u, equations, dg, l, nnodes(dg), element)
+            f_node = splitting(u_node, Val{:minus}(), 2, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, 4, element)
+            multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
+                                       equations, dg, l, nnodes(dg), element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # AnalysisCallback
 function integrate_via_indices(func::Func, u,
                                mesh::TreeMesh{2}, equations,
-                               dg::FDSBP, cache, args...; normalize=true) where {Func}
-  # TODO: FD. This is rather inefficient right now and allocates...
-  weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
-
-  # Initialize integral with zeros of the right shape
-  integral = zero(func(u, 1, 1, 1, equations, dg, args...))
-
-  # Use quadrature to numerically integrate over entire domain
-  for element in eachelement(dg, cache)
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-    for j in eachnode(dg), i in eachnode(dg)
-      integral += volume_jacobian_ * weights[i] * weights[j] * func(u, i, j, element, equations, dg, args...)
+                               dg::FDSBP, cache, args...; normalize = true) where {Func}
+    # TODO: FD. This is rather inefficient right now and allocates...
+    weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
+
+    # Initialize integral with zeros of the right shape
+    integral = zero(func(u, 1, 1, 1, equations, dg, args...))
+
+    # Use quadrature to numerically integrate over entire domain
+    for element in eachelement(dg, cache)
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            integral += volume_jacobian_ * weights[i] * weights[j] *
+                        func(u, i, j, element, equations, dg, args...)
+        end
     end
-  end
 
-  # Normalize with total volume
-  if normalize
-    integral = integral / total_volume(mesh)
-  end
+    # Normalize with total volume
+    if normalize
+        integral = integral / total_volume(mesh)
+    end
 
-  return integral
+    return integral
 end
 
 function calc_error_norms(func, u, t, analyzer,
                           mesh::TreeMesh{2}, equations, initial_condition,
                           dg::FDSBP, cache, cache_analysis)
-  # TODO: FD. This is rather inefficient right now and allocates...
-  weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
-  @unpack node_coordinates = cache.elements
-
-  # Set up data structures
-  l2_error   = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))
-  linf_error = copy(l2_error)
-
-  # Iterate over all elements for error calculations
-  for element in eachelement(dg, cache)
-    # Calculate errors at each node
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-
-    for j in eachnode(analyzer), i in eachnode(analyzer)
-      u_exact = initial_condition(
-        get_node_coords(node_coordinates, equations, dg, i, j, element), t, equations)
-      diff = func(u_exact, equations) - func(
-        get_node_vars(u, equations, dg, i, j, element), equations)
-      l2_error += diff.^2 * (weights[i] * weights[j] * volume_jacobian_)
-      linf_error = @. max(linf_error, abs(diff))
+    # TODO: FD. This is rather inefficient right now and allocates...
+    weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
+    @unpack node_coordinates = cache.elements
+
+    # Set up data structures
+    l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1), equations))
+    linf_error = copy(l2_error)
+
+    # Iterate over all elements for error calculations
+    for element in eachelement(dg, cache)
+        # Calculate errors at each node
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+
+        for j in eachnode(analyzer), i in eachnode(analyzer)
+            u_exact = initial_condition(get_node_coords(node_coordinates, equations, dg,
+                                                        i, j, element), t, equations)
+            diff = func(u_exact, equations) -
+                   func(get_node_vars(u, equations, dg, i, j, element), equations)
+            l2_error += diff .^ 2 * (weights[i] * weights[j] * volume_jacobian_)
+            linf_error = @. max(linf_error, abs(diff))
+        end
     end
-  end
 
-  # For L2 error, divide by total volume
-  total_volume_ = total_volume(mesh)
-  l2_error = @. sqrt(l2_error / total_volume_)
+    # For L2 error, divide by total volume
+    total_volume_ = total_volume(mesh)
+    l2_error = @. sqrt(l2_error / total_volume_)
 
-  return l2_error, linf_error
+    return l2_error, linf_error
 end
-
-
 end # @muladd
diff --git a/src/solvers/fdsbp_tree/fdsbp_3d.jl b/src/solvers/fdsbp_tree/fdsbp_3d.jl
index ed5f8b102a3..a4f69d3d481 100644
--- a/src/solvers/fdsbp_tree/fdsbp_3d.jl
+++ b/src/solvers/fdsbp_tree/fdsbp_3d.jl
@@ -6,95 +6,96 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # 3D caches
 function create_cache(mesh::TreeMesh{3}, equations,
                       volume_integral::VolumeIntegralStrongForm, dg, uEltype)
+    prototype = Array{SVector{nvariables(equations), uEltype}, ndims(mesh)}(undef,
+                                                                            ntuple(_ -> nnodes(dg),
+                                                                                   ndims(mesh))...)
+    f_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
 
-  prototype = Array{SVector{nvariables(equations), uEltype}, ndims(mesh)}(
-    undef, ntuple(_ -> nnodes(dg), ndims(mesh))...)
-  f_threaded = [similar(prototype) for _ in 1:Threads.nthreads()]
-
-  return (; f_threaded,)
+    return (; f_threaded)
 end
 
 function create_cache(mesh::TreeMesh{3}, equations,
                       volume_integral::VolumeIntegralUpwind, dg, uEltype)
+    u_node = SVector{nvariables(equations), uEltype}(ntuple(_ -> zero(uEltype),
+                                                            Val{nvariables(equations)}()))
+    f = StructArray([(u_node, u_node)])
+    f_minus_plus_threaded = [similar(f, ntuple(_ -> nnodes(dg), ndims(mesh))...)
+                             for _ in 1:Threads.nthreads()]
+
+    f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[1])
+    f_minus_threaded = [f_minus]
+    f_plus_threaded = [f_plus]
+    for i in 2:Threads.nthreads()
+        f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[i])
+        push!(f_minus_threaded, f_minus)
+        push!(f_plus_threaded, f_plus)
+    end
 
-  u_node = SVector{nvariables(equations), uEltype}(ntuple(_ -> zero(uEltype), Val{nvariables(equations)}()))
-  f = StructArray([(u_node, u_node)])
-  f_minus_plus_threaded = [similar(f, ntuple(_ -> nnodes(dg), ndims(mesh))...) for _ in 1:Threads.nthreads()]
-
-  f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[1])
-  f_minus_threaded = [f_minus]
-  f_plus_threaded = [f_plus]
-  for i in 2:Threads.nthreads()
-    f_minus, f_plus = StructArrays.components(f_minus_plus_threaded[i])
-    push!(f_minus_threaded, f_minus)
-    push!(f_plus_threaded, f_plus)
-  end
-
-  return (; f_minus_plus_threaded, f_minus_threaded, f_plus_threaded,)
+    return (; f_minus_plus_threaded, f_minus_threaded, f_plus_threaded)
 end
 
-
 # 3D volume integral contributions for `VolumeIntegralStrongForm`
 function calc_volume_integral!(du, u,
                                mesh::TreeMesh{3},
                                nonconservative_terms::False, equations,
                                volume_integral::VolumeIntegralStrongForm,
                                dg::FDSBP, cache)
-  D = dg.basis # SBP derivative operator
-  @unpack f_threaded = cache
-
-  # SBP operators from SummationByPartsOperators.jl implement the basic interface
-  # of matrix-vector multiplication. Thus, we pass an "array of structures",
-  # packing all variables per node in an `SVector`.
-  if nvariables(equations) == 1
-    # `reinterpret(reshape, ...)` removes the leading dimension only if more
-    # than one variable is used.
-    u_vectors  = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u),
-                         nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache))
-    du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, du),
-                         nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache))
-  else
-    u_vectors  = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u)
-    du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, du)
-  end
-
-  # Use the tensor product structure to compute the discrete derivatives of
-  # the fluxes line-by-line and add them to `du` for each element.
-  @threaded for element in eachelement(dg, cache)
-    f_element = f_threaded[Threads.threadid()]
-    u_element = view(u_vectors, :, :, :, element)
-
-    # x direction
-    @. f_element = flux(u_element, 1, equations)
-    for j in eachnode(dg), k in eachnode(dg)
-      mul!(view(du_vectors, :, j, k, element), D, view(f_element, :, j, k),
-           one(eltype(du)), one(eltype(du)))
-    end
-
-    # y direction
-    @. f_element = flux(u_element, 2, equations)
-    for i in eachnode(dg), k in eachnode(dg)
-      mul!(view(du_vectors, i, :, k, element), D, view(f_element, i, :, k),
-           one(eltype(du)), one(eltype(du)))
+    D = dg.basis # SBP derivative operator
+    @unpack f_threaded = cache
+
+    # SBP operators from SummationByPartsOperators.jl implement the basic interface
+    # of matrix-vector multiplication. Thus, we pass an "array of structures",
+    # packing all variables per node in an `SVector`.
+    if nvariables(equations) == 1
+        # `reinterpret(reshape, ...)` removes the leading dimension only if more
+        # than one variable is used.
+        u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u),
+                            nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache))
+        du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)},
+                                         du),
+                             nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache))
+    else
+        u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u)
+        du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)},
+                                 du)
     end
 
-    # z direction
-    @. f_element = flux(u_element, 3, equations)
-    for i in eachnode(dg), j in eachnode(dg)
-      mul!(view(du_vectors, i, j, :, element), D, view(f_element, i, j, :),
-           one(eltype(du)), one(eltype(du)))
+    # Use the tensor product structure to compute the discrete derivatives of
+    # the fluxes line-by-line and add them to `du` for each element.
+    @threaded for element in eachelement(dg, cache)
+        f_element = f_threaded[Threads.threadid()]
+        u_element = view(u_vectors, :, :, :, element)
+
+        # x direction
+        @. f_element = flux(u_element, 1, equations)
+        for j in eachnode(dg), k in eachnode(dg)
+            mul!(view(du_vectors, :, j, k, element), D, view(f_element, :, j, k),
+                 one(eltype(du)), one(eltype(du)))
+        end
+
+        # y direction
+        @. f_element = flux(u_element, 2, equations)
+        for i in eachnode(dg), k in eachnode(dg)
+            mul!(view(du_vectors, i, :, k, element), D, view(f_element, i, :, k),
+                 one(eltype(du)), one(eltype(du)))
+        end
+
+        # z direction
+        @. f_element = flux(u_element, 3, equations)
+        for i in eachnode(dg), j in eachnode(dg)
+            mul!(view(du_vectors, i, j, :, element), D, view(f_element, i, j, :),
+                 one(eltype(du)), one(eltype(du)))
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # 3D volume integral contributions for `VolumeIntegralUpwind`.
 # Note that the plus / minus notation of the operators does not refer to the
 # upwind / downwind directions of the fluxes.
@@ -107,129 +108,135 @@ function calc_volume_integral!(du, u,
                                nonconservative_terms::False, equations,
                                volume_integral::VolumeIntegralUpwind,
                                dg::FDSBP, cache)
-  # Assume that
-  # dg.basis isa SummationByPartsOperators.UpwindOperators
-  D_minus = dg.basis.minus # Upwind SBP D^- derivative operator
-  D_plus = dg.basis.plus   # Upwind SBP D^+ derivative operator
-  @unpack f_minus_plus_threaded, f_minus_threaded, f_plus_threaded = cache
-  @unpack splitting = volume_integral
-
-  # SBP operators from SummationByPartsOperators.jl implement the basic interface
-  # of matrix-vector multiplication. Thus, we pass an "array of structures",
-  # packing all variables per node in an `SVector`.
-  if nvariables(equations) == 1
-    # `reinterpret(reshape, ...)` removes the leading dimension only if more
-    # than one variable is used.
-    u_vectors  = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u),
-                         nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache))
-    du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)}, du),
-                         nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache))
-  else
-    u_vectors  = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u)
-    du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)}, du)
-  end
-
-  # Use the tensor product structure to compute the discrete derivatives of
-  # the fluxes line-by-line and add them to `du` for each element.
-  @threaded for element in eachelement(dg, cache)
-    # f_minus_plus_element wraps the storage provided by f_minus_element and
-    # f_plus_element such that we can use a single plain broadcasting below.
-    # f_minus_element and f_plus_element are updated in broadcasting calls
-    # of the form `@. f_minus_plus_element = ...`.
-    f_minus_plus_element = f_minus_plus_threaded[Threads.threadid()]
-    f_minus_element = f_minus_threaded[Threads.threadid()]
-    f_plus_element = f_plus_threaded[Threads.threadid()]
-    u_element = view(u_vectors, :, :, :, element)
-
-    # x direction
-    @. f_minus_plus_element = splitting(u_element, 1, equations)
-    for j in eachnode(dg), k in eachnode(dg)
-      mul!(view(du_vectors, :, j, k, element), D_minus, view(f_plus_element, :, j, k),
-           one(eltype(du)), one(eltype(du)))
-      mul!(view(du_vectors, :, j, k, element), D_plus, view(f_minus_element, :, j, k),
-           one(eltype(du)), one(eltype(du)))
+    # Assume that
+    # dg.basis isa SummationByPartsOperators.UpwindOperators
+    D_minus = dg.basis.minus # Upwind SBP D^- derivative operator
+    D_plus = dg.basis.plus   # Upwind SBP D^+ derivative operator
+    @unpack f_minus_plus_threaded, f_minus_threaded, f_plus_threaded = cache
+    @unpack splitting = volume_integral
+
+    # SBP operators from SummationByPartsOperators.jl implement the basic interface
+    # of matrix-vector multiplication. Thus, we pass an "array of structures",
+    # packing all variables per node in an `SVector`.
+    if nvariables(equations) == 1
+        # `reinterpret(reshape, ...)` removes the leading dimension only if more
+        # than one variable is used.
+        u_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(u)}, u),
+                            nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache))
+        du_vectors = reshape(reinterpret(SVector{nvariables(equations), eltype(du)},
+                                         du),
+                             nnodes(dg), nnodes(dg), nnodes(dg), nelements(dg, cache))
+    else
+        u_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(u)}, u)
+        du_vectors = reinterpret(reshape, SVector{nvariables(equations), eltype(du)},
+                                 du)
     end
 
-    # y direction
-    @. f_minus_plus_element = splitting(u_element, 2, equations)
-    for i in eachnode(dg), k in eachnode(dg)
-      mul!(view(du_vectors, i, :, k, element), D_minus, view(f_plus_element, i, :, k),
-           one(eltype(du)), one(eltype(du)))
-      mul!(view(du_vectors, i, :, k, element), D_plus, view(f_minus_element, i, :, k),
-           one(eltype(du)), one(eltype(du)))
+    # Use the tensor product structure to compute the discrete derivatives of
+    # the fluxes line-by-line and add them to `du` for each element.
+    @threaded for element in eachelement(dg, cache)
+        # f_minus_plus_element wraps the storage provided by f_minus_element and
+        # f_plus_element such that we can use a single plain broadcasting below.
+        # f_minus_element and f_plus_element are updated in broadcasting calls
+        # of the form `@. f_minus_plus_element = ...`.
+        f_minus_plus_element = f_minus_plus_threaded[Threads.threadid()]
+        f_minus_element = f_minus_threaded[Threads.threadid()]
+        f_plus_element = f_plus_threaded[Threads.threadid()]
+        u_element = view(u_vectors, :, :, :, element)
+
+        # x direction
+        @. f_minus_plus_element = splitting(u_element, 1, equations)
+        for j in eachnode(dg), k in eachnode(dg)
+            mul!(view(du_vectors, :, j, k, element), D_minus,
+                 view(f_plus_element, :, j, k),
+                 one(eltype(du)), one(eltype(du)))
+            mul!(view(du_vectors, :, j, k, element), D_plus,
+                 view(f_minus_element, :, j, k),
+                 one(eltype(du)), one(eltype(du)))
+        end
+
+        # y direction
+        @. f_minus_plus_element = splitting(u_element, 2, equations)
+        for i in eachnode(dg), k in eachnode(dg)
+            mul!(view(du_vectors, i, :, k, element), D_minus,
+                 view(f_plus_element, i, :, k),
+                 one(eltype(du)), one(eltype(du)))
+            mul!(view(du_vectors, i, :, k, element), D_plus,
+                 view(f_minus_element, i, :, k),
+                 one(eltype(du)), one(eltype(du)))
+        end
+
+        # z direction
+        @. f_minus_plus_element = splitting(u_element, 3, equations)
+        for i in eachnode(dg), j in eachnode(dg)
+            mul!(view(du_vectors, i, j, :, element), D_minus,
+                 view(f_plus_element, i, j, :),
+                 one(eltype(du)), one(eltype(du)))
+            mul!(view(du_vectors, i, j, :, element), D_plus,
+                 view(f_minus_element, i, j, :),
+                 one(eltype(du)), one(eltype(du)))
+        end
     end
 
-    # z direction
-    @. f_minus_plus_element = splitting(u_element, 3, equations)
-    for i in eachnode(dg), j in eachnode(dg)
-      mul!(view(du_vectors, i, j, :, element), D_minus, view(f_plus_element, i, j, :),
-           one(eltype(du)), one(eltype(du)))
-      mul!(view(du_vectors, i, j, :, element), D_plus, view(f_minus_element, i, j, :),
-           one(eltype(du)), one(eltype(du)))
-    end
-  end
-
-  return nothing
+    return nothing
 end
 
-
 function calc_surface_integral!(du, u, mesh::TreeMesh{3},
                                 equations, surface_integral::SurfaceIntegralStrongForm,
                                 dg::DG, cache)
-  inv_weight_left  = inv(left_boundary_weight(dg.basis))
-  inv_weight_right = inv(right_boundary_weight(dg.basis))
-  @unpack surface_flux_values = cache.elements
-
-  @threaded for element in eachelement(dg, cache)
-    for m in eachnode(dg), l in eachnode(dg)
-      # surface at -x
-      u_node = get_node_vars(u, equations, dg, 1, l, m, element)
-      f_node = flux(u_node, 1, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, m, 1, element)
-      multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
-                                 equations, dg, 1, l, m, element)
-
-      # surface at +x
-      u_node = get_node_vars(u, equations, dg, nnodes(dg), l, m, element)
-      f_node = flux(u_node, 1, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, m, 2, element)
-      multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
-                                 equations, dg, nnodes(dg), l, m, element)
-
-      # surface at -y
-      u_node = get_node_vars(u, equations, dg, l, 1, m, element)
-      f_node = flux(u_node, 2, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, m, 3, element)
-      multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
-                                 equations, dg, l, 1, m, element)
-
-      # surface at +y
-      u_node = get_node_vars(u, equations, dg, l, nnodes(dg), m, element)
-      f_node = flux(u_node, 2, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, m, 4, element)
-      multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
-                                 equations, dg, l, nnodes(dg), m, element)
-
-      # surface at -z
-      u_node = get_node_vars(u, equations, dg, l, m, 1, element)
-      f_node = flux(u_node, 3, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, m, 5, element)
-      multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
-                                 equations, dg, l, m, 1, element)
-
-      # surface at +z
-      u_node = get_node_vars(u, equations, dg, l, m, nnodes(dg), element)
-      f_node = flux(u_node, 3, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, m, 6, element)
-      multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
-                                 equations, dg, l, m ,nnodes(dg), element)
+    inv_weight_left = inv(left_boundary_weight(dg.basis))
+    inv_weight_right = inv(right_boundary_weight(dg.basis))
+    @unpack surface_flux_values = cache.elements
+
+    @threaded for element in eachelement(dg, cache)
+        for m in eachnode(dg), l in eachnode(dg)
+            # surface at -x
+            u_node = get_node_vars(u, equations, dg, 1, l, m, element)
+            f_node = flux(u_node, 1, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 1, element)
+            multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
+                                       equations, dg, 1, l, m, element)
+
+            # surface at +x
+            u_node = get_node_vars(u, equations, dg, nnodes(dg), l, m, element)
+            f_node = flux(u_node, 1, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 2, element)
+            multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
+                                       equations, dg, nnodes(dg), l, m, element)
+
+            # surface at -y
+            u_node = get_node_vars(u, equations, dg, l, 1, m, element)
+            f_node = flux(u_node, 2, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 3, element)
+            multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
+                                       equations, dg, l, 1, m, element)
+
+            # surface at +y
+            u_node = get_node_vars(u, equations, dg, l, nnodes(dg), m, element)
+            f_node = flux(u_node, 2, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 4, element)
+            multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
+                                       equations, dg, l, nnodes(dg), m, element)
+
+            # surface at -z
+            u_node = get_node_vars(u, equations, dg, l, m, 1, element)
+            f_node = flux(u_node, 3, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 5, element)
+            multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
+                                       equations, dg, l, m, 1, element)
+
+            # surface at +z
+            u_node = get_node_vars(u, equations, dg, l, m, nnodes(dg), element)
+            f_node = flux(u_node, 3, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 6, element)
+            multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
+                                       equations, dg, l, m, nnodes(dg), element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Specialized interface flux computation because the upwind solver does
 # not require a standard numerical flux (Riemann solver). The flux splitting
 # already separates the solution information into right-traveling and
@@ -240,43 +247,44 @@ function calc_interface_flux!(surface_flux_values,
                               nonconservative_terms::False, equations,
                               surface_integral::SurfaceIntegralUpwind,
                               dg::FDSBP, cache)
-  @unpack splitting = surface_integral
-  @unpack u, neighbor_ids, orientations = cache.interfaces
-
-  @threaded for interface in eachinterface(dg, cache)
-    # Get neighboring elements
-    left_id  = neighbor_ids[1, interface]
-    right_id = neighbor_ids[2, interface]
-
-    # Determine interface direction with respect to elements:
-    # orientation = 1: left -> 2, right -> 1
-    # orientation = 2: left -> 4, right -> 3
-    # orientation = 3: left -> 6, right -> 5
-    left_direction  = 2 * orientations[interface]
-    right_direction = 2 * orientations[interface] - 1
-
-    for j in eachnode(dg), i in eachnode(dg)
-      # Pull the left and right solution data
-      u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, interface)
-
-      # Compute the upwind coupling terms where right-traveling
-      # information comes from the left and left-traveling information
-      # comes from the right
-      flux_minus_rr = splitting(u_rr, Val{:minus}(), orientations[interface], equations)
-      flux_plus_ll  = splitting(u_ll, Val{:plus}(),  orientations[interface], equations)
-
-      # Save the upwind coupling into the appropriate side of the elements
-      for v in eachvariable(equations)
-        surface_flux_values[v, i, j, left_direction,  left_id]  = flux_minus_rr[v]
-        surface_flux_values[v, i, j, right_direction, right_id] = flux_plus_ll[v]
-      end
+    @unpack splitting = surface_integral
+    @unpack u, neighbor_ids, orientations = cache.interfaces
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Get neighboring elements
+        left_id = neighbor_ids[1, interface]
+        right_id = neighbor_ids[2, interface]
+
+        # Determine interface direction with respect to elements:
+        # orientation = 1: left -> 2, right -> 1
+        # orientation = 2: left -> 4, right -> 3
+        # orientation = 3: left -> 6, right -> 5
+        left_direction = 2 * orientations[interface]
+        right_direction = 2 * orientations[interface] - 1
+
+        for j in eachnode(dg), i in eachnode(dg)
+            # Pull the left and right solution data
+            u_ll, u_rr = get_surface_node_vars(u, equations, dg, i, j, interface)
+
+            # Compute the upwind coupling terms where right-traveling
+            # information comes from the left and left-traveling information
+            # comes from the right
+            flux_minus_rr = splitting(u_rr, Val{:minus}(), orientations[interface],
+                                      equations)
+            flux_plus_ll = splitting(u_ll, Val{:plus}(), orientations[interface],
+                                     equations)
+
+            # Save the upwind coupling into the appropriate side of the elements
+            for v in eachvariable(equations)
+                surface_flux_values[v, i, j, left_direction, left_id] = flux_minus_rr[v]
+                surface_flux_values[v, i, j, right_direction, right_id] = flux_plus_ll[v]
+            end
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # Implementation of fully upwind SATs. The surface flux values are pre-computed
 # in the specialized `calc_interface_flux` routine. These SATs are still of
 # a strong form penalty type, except that the interior flux at a particular
@@ -284,121 +292,119 @@ end
 function calc_surface_integral!(du, u, mesh::TreeMesh{3},
                                 equations, surface_integral::SurfaceIntegralUpwind,
                                 dg::FDSBP, cache)
-  inv_weight_left  = inv(left_boundary_weight(dg.basis))
-  inv_weight_right = inv(right_boundary_weight(dg.basis))
-  @unpack surface_flux_values = cache.elements
-  @unpack splitting = surface_integral
-
-
-  @threaded for element in eachelement(dg, cache)
-    for m in eachnode(dg), l in eachnode(dg)
-      # surface at -x
-      u_node = get_node_vars(u, equations, dg, 1, l, m, element)
-      f_node = splitting(u_node, Val{:plus}(), 1, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, m, 1, element)
-      multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
-                                 equations, dg, 1, l, m, element)
-
-      # surface at +x
-      u_node = get_node_vars(u, equations, dg, nnodes(dg), l, m, element)
-      f_node = splitting(u_node, Val{:minus}(), 1, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, m, 2, element)
-      multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
-                                 equations, dg, nnodes(dg), l, m, element)
-
-      # surface at -y
-      u_node = get_node_vars(u, equations, dg, l, 1, m, element)
-      f_node = splitting(u_node, Val{:plus}(), 2, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, m, 3, element)
-      multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
-                                 equations, dg, l, 1, m, element)
-
-      # surface at +y
-      u_node = get_node_vars(u, equations, dg, l, nnodes(dg), m, element)
-      f_node = splitting(u_node, Val{:minus}(), 2, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, m, 4, element)
-      multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
-                                 equations, dg, l, nnodes(dg), m, element)
-
-      # surface at -z
-      u_node = get_node_vars(u, equations, dg, l, m, 1, element)
-      f_node = splitting(u_node, Val{:plus}(), 3, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, m, 5, element)
-      multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
-                                 equations, dg, l, m, 1, element)
-
-      # surface at +z
-      u_node = get_node_vars(u, equations, dg, l, m, nnodes(dg), element)
-      f_node = splitting(u_node, Val{:minus}(), 3, equations)
-      f_num  = get_node_vars(surface_flux_values, equations, dg, l, m, 6, element)
-      multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
-                                 equations, dg, l, m, nnodes(dg), element)
+    inv_weight_left = inv(left_boundary_weight(dg.basis))
+    inv_weight_right = inv(right_boundary_weight(dg.basis))
+    @unpack surface_flux_values = cache.elements
+    @unpack splitting = surface_integral
+
+    @threaded for element in eachelement(dg, cache)
+        for m in eachnode(dg), l in eachnode(dg)
+            # surface at -x
+            u_node = get_node_vars(u, equations, dg, 1, l, m, element)
+            f_node = splitting(u_node, Val{:plus}(), 1, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 1, element)
+            multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
+                                       equations, dg, 1, l, m, element)
+
+            # surface at +x
+            u_node = get_node_vars(u, equations, dg, nnodes(dg), l, m, element)
+            f_node = splitting(u_node, Val{:minus}(), 1, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 2, element)
+            multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
+                                       equations, dg, nnodes(dg), l, m, element)
+
+            # surface at -y
+            u_node = get_node_vars(u, equations, dg, l, 1, m, element)
+            f_node = splitting(u_node, Val{:plus}(), 2, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 3, element)
+            multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
+                                       equations, dg, l, 1, m, element)
+
+            # surface at +y
+            u_node = get_node_vars(u, equations, dg, l, nnodes(dg), m, element)
+            f_node = splitting(u_node, Val{:minus}(), 2, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 4, element)
+            multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
+                                       equations, dg, l, nnodes(dg), m, element)
+
+            # surface at -z
+            u_node = get_node_vars(u, equations, dg, l, m, 1, element)
+            f_node = splitting(u_node, Val{:plus}(), 3, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 5, element)
+            multiply_add_to_node_vars!(du, inv_weight_left, -(f_num - f_node),
+                                       equations, dg, l, m, 1, element)
+
+            # surface at +z
+            u_node = get_node_vars(u, equations, dg, l, m, nnodes(dg), element)
+            f_node = splitting(u_node, Val{:minus}(), 3, equations)
+            f_num = get_node_vars(surface_flux_values, equations, dg, l, m, 6, element)
+            multiply_add_to_node_vars!(du, inv_weight_right, +(f_num - f_node),
+                                       equations, dg, l, m, nnodes(dg), element)
+        end
     end
-  end
 
-  return nothing
+    return nothing
 end
 
-
 # AnalysisCallback
 
 function integrate_via_indices(func::Func, u,
                                mesh::TreeMesh{3}, equations,
-                               dg::FDSBP, cache, args...; normalize=true) where {Func}
-  # TODO: FD. This is rather inefficient right now and allocates...
-  weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
-
-  # Initialize integral with zeros of the right shape
-  integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...))
-
-  # Use quadrature to numerically integrate over entire domain
-  for element in eachelement(dg, cache)
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-    for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
-      integral += volume_jacobian_ * weights[i] * weights[j] * weights[k] * func(u, i, j, k, element, equations, dg, args...)
+                               dg::FDSBP, cache, args...; normalize = true) where {Func}
+    # TODO: FD. This is rather inefficient right now and allocates...
+    weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
+
+    # Initialize integral with zeros of the right shape
+    integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...))
+
+    # Use quadrature to numerically integrate over entire domain
+    for element in eachelement(dg, cache)
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            integral += volume_jacobian_ * weights[i] * weights[j] * weights[k] *
+                        func(u, i, j, k, element, equations, dg, args...)
+        end
     end
-  end
 
-  # Normalize with total volume
-  if normalize
-    integral = integral / total_volume(mesh)
-  end
+    # Normalize with total volume
+    if normalize
+        integral = integral / total_volume(mesh)
+    end
 
-  return integral
+    return integral
 end
 
 function calc_error_norms(func, u, t, analyzer,
                           mesh::TreeMesh{3}, equations, initial_condition,
                           dg::FDSBP, cache, cache_analysis)
-  # TODO: FD. This is rather inefficient right now and allocates...
-  weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
-  @unpack node_coordinates = cache.elements
-
-  # Set up data structures
-  l2_error   = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations))
-  linf_error = copy(l2_error)
-
-  # Iterate over all elements for error calculations
-  for element in eachelement(dg, cache)
-    # Calculate errors at each node
-    volume_jacobian_ = volume_jacobian(element, mesh, cache)
-
-    for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer)
-      u_exact = initial_condition(
-        get_node_coords(node_coordinates, equations, dg, i, j, k, element), t, equations)
-      diff = func(u_exact, equations) - func(
-        get_node_vars(u, equations, dg, i, j, k, element), equations)
-      l2_error += diff.^2 * (weights[i] * weights[j] * weights[k] * volume_jacobian_)
-      linf_error = @. max(linf_error, abs(diff))
+    # TODO: FD. This is rather inefficient right now and allocates...
+    weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
+    @unpack node_coordinates = cache.elements
+
+    # Set up data structures
+    l2_error = zero(func(get_node_vars(u, equations, dg, 1, 1, 1, 1), equations))
+    linf_error = copy(l2_error)
+
+    # Iterate over all elements for error calculations
+    for element in eachelement(dg, cache)
+        # Calculate errors at each node
+        volume_jacobian_ = volume_jacobian(element, mesh, cache)
+
+        for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer)
+            u_exact = initial_condition(get_node_coords(node_coordinates, equations, dg,
+                                                        i, j, k, element), t, equations)
+            diff = func(u_exact, equations) -
+                   func(get_node_vars(u, equations, dg, i, j, k, element), equations)
+            l2_error += diff .^ 2 *
+                        (weights[i] * weights[j] * weights[k] * volume_jacobian_)
+            linf_error = @. max(linf_error, abs(diff))
+        end
     end
-  end
 
-  # For L2 error, divide by total volume
-  total_volume_ = total_volume(mesh)
-  l2_error = @. sqrt(l2_error / total_volume_)
+    # For L2 error, divide by total volume
+    total_volume_ = total_volume(mesh)
+    l2_error = @. sqrt(l2_error / total_volume_)
 
-  return l2_error, linf_error
+    return l2_error, linf_error
 end
-
-
 end # @muladd
diff --git a/src/solvers/solvers.jl b/src/solvers/solvers.jl
index 465e051482c..a39f7cb1751 100644
--- a/src/solvers/solvers.jl
+++ b/src/solvers/solvers.jl
@@ -3,11 +3,11 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 # define types for parabolic solvers
 include("solvers_parabolic.jl")
 
 include("dg.jl")
 include("dgmulti.jl")
-
 end # @muladd
diff --git a/src/solvers/solvers_parabolic.jl b/src/solvers/solvers_parabolic.jl
index f253cdbd03d..ed095cd675f 100644
--- a/src/solvers/solvers_parabolic.jl
+++ b/src/solvers/solvers_parabolic.jl
@@ -25,7 +25,7 @@ the LDG solver is equivalent to [`ViscousFormulationBassiRebay1`](@ref) with an
   [DOI: 10.1137/S0036142997316712](https://doi.org/10.1137/S0036142997316712)
 """
 struct ViscousFormulationLocalDG{P}
-  penalty_parameter::P
+    penalty_parameter::P
 end
 
-default_parabolic_solver() = ViscousFormulationBassiRebay1()
\ No newline at end of file
+default_parabolic_solver() = ViscousFormulationBassiRebay1()
diff --git a/src/time_integration/methods_2N.jl b/src/time_integration/methods_2N.jl
index b6fbdd939c2..557e8272128 100644
--- a/src/time_integration/methods_2N.jl
+++ b/src/time_integration/methods_2N.jl
@@ -3,12 +3,11 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Abstract base type for time integration schemes of storage class `2N`
 abstract type SimpleAlgorithm2N end
 
-
 """
     CarpenterKennedy2N54()
 
@@ -20,169 +19,176 @@ the low-storage explicit Runge-Kutta method of
 using the same interface as OrdinaryDiffEq.jl.
 """
 struct CarpenterKennedy2N54 <: SimpleAlgorithm2N
-  a::SVector{5, Float64}
-  b::SVector{5, Float64}
-  c::SVector{5, Float64}
-
-  function CarpenterKennedy2N54()
-    a = SVector(0.0, 567301805773.0 / 1357537059087.0,2404267990393.0 / 2016746695238.0,
-                3550918686646.0 / 2091501179385.0, 1275806237668.0 / 842570457699.0)
-    b = SVector(1432997174477.0 / 9575080441755.0, 5161836677717.0 / 13612068292357.0,
-                1720146321549.0 / 2090206949498.0, 3134564353537.0 / 4481467310338.0,
-                2277821191437.0 / 14882151754819.0)
-    c = SVector(0.0, 1432997174477.0 / 9575080441755.0, 2526269341429.0 / 6820363962896.0,
-                2006345519317.0 / 3224310063776.0, 2802321613138.0 / 2924317926251.0)
-
-    new(a, b, c)
-  end
+    a::SVector{5, Float64}
+    b::SVector{5, Float64}
+    c::SVector{5, Float64}
+
+    function CarpenterKennedy2N54()
+        a = SVector(0.0, 567301805773.0 / 1357537059087.0,
+                    2404267990393.0 / 2016746695238.0,
+                    3550918686646.0 / 2091501179385.0, 1275806237668.0 / 842570457699.0)
+        b = SVector(1432997174477.0 / 9575080441755.0,
+                    5161836677717.0 / 13612068292357.0,
+                    1720146321549.0 / 2090206949498.0,
+                    3134564353537.0 / 4481467310338.0,
+                    2277821191437.0 / 14882151754819.0)
+        c = SVector(0.0, 1432997174477.0 / 9575080441755.0,
+                    2526269341429.0 / 6820363962896.0,
+                    2006345519317.0 / 3224310063776.0,
+                    2802321613138.0 / 2924317926251.0)
+
+        new(a, b, c)
+    end
 end
 
-
 """
       CarpenterKennedy2N43()
 
 Carpenter, Kennedy (1994) Third order 2N storage RK schemes with error control
 """
 struct CarpenterKennedy2N43 <: SimpleAlgorithm2N
-  a::SVector{4, Float64}
-  b::SVector{4, Float64}
-  c::SVector{4, Float64}
+    a::SVector{4, Float64}
+    b::SVector{4, Float64}
+    c::SVector{4, Float64}
 
-  function CarpenterKennedy2N43()
-    a = SVector(0, 756391 / 934407, 36441873 / 15625000, 1953125 / 1085297)
-    b = SVector(8 / 141, 6627 / 2000, 609375 / 1085297, 198961 / 526383)
-    c = SVector(0, 8 / 141, 86 / 125, 1)
+    function CarpenterKennedy2N43()
+        a = SVector(0, 756391 / 934407, 36441873 / 15625000, 1953125 / 1085297)
+        b = SVector(8 / 141, 6627 / 2000, 609375 / 1085297, 198961 / 526383)
+        c = SVector(0, 8 / 141, 86 / 125, 1)
 
-    new(a, b, c)
-  end
+        new(a, b, c)
+    end
 end
 
-
 # This struct is needed to fake https://github.com/SciML/OrdinaryDiffEq.jl/blob/0c2048a502101647ac35faabd80da8a5645beac7/src/integrators/type.jl#L1
 mutable struct SimpleIntegrator2NOptions{Callback}
-  callback::Callback # callbacks; used in Trixi.jl
-  adaptive::Bool # whether the algorithm is adaptive; ignored
-  dtmax::Float64 # ignored
-  maxiters::Int # maximal number of time steps
-  tstops::Vector{Float64} # tstops from https://diffeq.sciml.ai/v6.8/basics/common_solver_opts/#Output-Control-1; ignored
+    callback::Callback # callbacks; used in Trixi.jl
+    adaptive::Bool # whether the algorithm is adaptive; ignored
+    dtmax::Float64 # ignored
+    maxiters::Int # maximal number of time steps
+    tstops::Vector{Float64} # tstops from https://diffeq.sciml.ai/v6.8/basics/common_solver_opts/#Output-Control-1; ignored
 end
 
-function SimpleIntegrator2NOptions(callback, tspan; maxiters=typemax(Int), kwargs...)
-  SimpleIntegrator2NOptions{typeof(callback)}(
-    callback, false, Inf, maxiters, [last(tspan)])
+function SimpleIntegrator2NOptions(callback, tspan; maxiters = typemax(Int), kwargs...)
+    SimpleIntegrator2NOptions{typeof(callback)}(callback, false, Inf, maxiters,
+                                                [last(tspan)])
 end
 
 # This struct is needed to fake https://github.com/SciML/OrdinaryDiffEq.jl/blob/0c2048a502101647ac35faabd80da8a5645beac7/src/integrators/type.jl#L77
 # This implements the interface components described at
 # https://diffeq.sciml.ai/v6.8/basics/integrator/#Handing-Integrators-1
 # which are used in Trixi.jl.
-mutable struct SimpleIntegrator2N{RealT<:Real, uType, Params, Sol, F, Alg, SimpleIntegrator2NOptions}
-  u::uType #
-  du::uType
-  u_tmp::uType
-  t::RealT
-  dt::RealT # current time step
-  dtcache::RealT # ignored
-  iter::Int # current number of time steps (iteration)
-  p::Params # will be the semidiscretization from Trixi.jl
-  sol::Sol # faked
-  f::F
-  alg::Alg
-  opts::SimpleIntegrator2NOptions
-  finalstep::Bool # added for convenience
+mutable struct SimpleIntegrator2N{RealT <: Real, uType, Params, Sol, F, Alg,
+                                  SimpleIntegrator2NOptions}
+    u::uType #
+    du::uType
+    u_tmp::uType
+    t::RealT
+    dt::RealT # current time step
+    dtcache::RealT # ignored
+    iter::Int # current number of time steps (iteration)
+    p::Params # will be the semidiscretization from Trixi.jl
+    sol::Sol # faked
+    f::F
+    alg::Alg
+    opts::SimpleIntegrator2NOptions
+    finalstep::Bool # added for convenience
 end
 
 # Forward integrator.stats.naccept to integrator.iter (see GitHub PR#771)
 function Base.getproperty(integrator::SimpleIntegrator2N, field::Symbol)
-  if field === :stats
-    return (naccept = getfield(integrator, :iter),)
-  end
-  # general fallback
-  return getfield(integrator, field)
+    if field === :stats
+        return (naccept = getfield(integrator, :iter),)
+    end
+    # general fallback
+    return getfield(integrator, field)
 end
 
 # Fakes `solve`: https://diffeq.sciml.ai/v6.8/basics/overview/#Solving-the-Problems-1
 function solve(ode::ODEProblem, alg::T;
-               dt, callback=nothing, kwargs...) where {T<:SimpleAlgorithm2N}
-  u = copy(ode.u0)
-  du = similar(u)
-  u_tmp = similar(u)
-  t = first(ode.tspan)
-  iter = 0
-  integrator = SimpleIntegrator2N(u, du, u_tmp, t, dt, zero(dt), iter, ode.p,
-                  (prob=ode,), ode.f, alg,
-                  SimpleIntegrator2NOptions(callback, ode.tspan; kwargs...), false)
-
-  # initialize callbacks
-  if callback isa CallbackSet
-    for cb in callback.continuous_callbacks
-      error("unsupported")
-    end
-    for cb in callback.discrete_callbacks
-      cb.initialize(cb, integrator.u, integrator.t, integrator)
+               dt, callback = nothing, kwargs...) where {T <: SimpleAlgorithm2N}
+    u = copy(ode.u0)
+    du = similar(u)
+    u_tmp = similar(u)
+    t = first(ode.tspan)
+    iter = 0
+    integrator = SimpleIntegrator2N(u, du, u_tmp, t, dt, zero(dt), iter, ode.p,
+                                    (prob = ode,), ode.f, alg,
+                                    SimpleIntegrator2NOptions(callback, ode.tspan;
+                                                              kwargs...), false)
+
+    # initialize callbacks
+    if callback isa CallbackSet
+        for cb in callback.continuous_callbacks
+            error("unsupported")
+        end
+        for cb in callback.discrete_callbacks
+            cb.initialize(cb, integrator.u, integrator.t, integrator)
+        end
+    elseif !isnothing(callback)
+        error("unsupported")
     end
-  elseif !isnothing(callback)
-    error("unsupported")
-  end
 
-  solve!(integrator)
+    solve!(integrator)
 end
 
 function solve!(integrator::SimpleIntegrator2N)
-  @unpack prob = integrator.sol
-  @unpack alg = integrator
-  t_end = last(prob.tspan)
-  callbacks = integrator.opts.callback
-
-  integrator.finalstep = false
-  @trixi_timeit timer() "main loop" while !integrator.finalstep
-    if isnan(integrator.dt)
-      error("time step size `dt` is NaN")
-    end
+    @unpack prob = integrator.sol
+    @unpack alg = integrator
+    t_end = last(prob.tspan)
+    callbacks = integrator.opts.callback
+
+    integrator.finalstep = false
+    @trixi_timeit timer() "main loop" while !integrator.finalstep
+        if isnan(integrator.dt)
+            error("time step size `dt` is NaN")
+        end
 
-    # if the next iteration would push the simulation beyond the end time, set dt accordingly
-    if integrator.t + integrator.dt > t_end || isapprox(integrator.t + integrator.dt, t_end)
-      integrator.dt = t_end - integrator.t
-      terminate!(integrator)
-    end
+        # if the next iteration would push the simulation beyond the end time, set dt accordingly
+        if integrator.t + integrator.dt > t_end ||
+           isapprox(integrator.t + integrator.dt, t_end)
+            integrator.dt = t_end - integrator.t
+            terminate!(integrator)
+        end
 
-    # one time step
-    integrator.u_tmp .= 0
-    for stage in eachindex(alg.c)
-      t_stage = integrator.t + integrator.dt * alg.c[stage]
-      integrator.f(integrator.du, integrator.u, prob.p, t_stage)
-
-      a_stage    = alg.a[stage]
-      b_stage_dt = alg.b[stage] * integrator.dt
-      @trixi_timeit timer() "Runge-Kutta step" begin
-        @threaded for i in eachindex(integrator.u)
-          integrator.u_tmp[i] = integrator.du[i] - integrator.u_tmp[i] * a_stage
-          integrator.u[i] += integrator.u_tmp[i] * b_stage_dt
+        # one time step
+        integrator.u_tmp .= 0
+        for stage in eachindex(alg.c)
+            t_stage = integrator.t + integrator.dt * alg.c[stage]
+            integrator.f(integrator.du, integrator.u, prob.p, t_stage)
+
+            a_stage = alg.a[stage]
+            b_stage_dt = alg.b[stage] * integrator.dt
+            @trixi_timeit timer() "Runge-Kutta step" begin
+                @threaded for i in eachindex(integrator.u)
+                    integrator.u_tmp[i] = integrator.du[i] -
+                                          integrator.u_tmp[i] * a_stage
+                    integrator.u[i] += integrator.u_tmp[i] * b_stage_dt
+                end
+            end
         end
-      end
-    end
-    integrator.iter += 1
-    integrator.t += integrator.dt
-
-    # handle callbacks
-    if callbacks isa CallbackSet
-      for cb in callbacks.discrete_callbacks
-        if cb.condition(integrator.u, integrator.t, integrator)
-          cb.affect!(integrator)
+        integrator.iter += 1
+        integrator.t += integrator.dt
+
+        # handle callbacks
+        if callbacks isa CallbackSet
+            for cb in callbacks.discrete_callbacks
+                if cb.condition(integrator.u, integrator.t, integrator)
+                    cb.affect!(integrator)
+                end
+            end
         end
-      end
-    end
 
-    # respect maximum number of iterations
-    if integrator.iter >= integrator.opts.maxiters && !integrator.finalstep
-      @warn "Interrupted. Larger maxiters is needed."
-      terminate!(integrator)
+        # respect maximum number of iterations
+        if integrator.iter >= integrator.opts.maxiters && !integrator.finalstep
+            @warn "Interrupted. Larger maxiters is needed."
+            terminate!(integrator)
+        end
     end
-  end
 
-  return TimeIntegratorSolution((first(prob.tspan), integrator.t),
-                                (prob.u0, integrator.u),
-                                integrator.sol.prob)
+    return TimeIntegratorSolution((first(prob.tspan), integrator.t),
+                                  (prob.u0, integrator.u),
+                                  integrator.sol.prob)
 end
 
 # get a cache where the RHS can be stored
@@ -194,21 +200,19 @@ u_modified!(integrator::SimpleIntegrator2N, ::Bool) = false
 
 # used by adaptive timestepping algorithms in DiffEq
 function set_proposed_dt!(integrator::SimpleIntegrator2N, dt)
-  integrator.dt = dt
+    integrator.dt = dt
 end
 
 # stop the time integration
 function terminate!(integrator::SimpleIntegrator2N)
-  integrator.finalstep = true
-  empty!(integrator.opts.tstops)
+    integrator.finalstep = true
+    empty!(integrator.opts.tstops)
 end
 
 # used for AMR
 function Base.resize!(integrator::SimpleIntegrator2N, new_size)
-  resize!(integrator.u, new_size)
-  resize!(integrator.du, new_size)
-  resize!(integrator.u_tmp, new_size)
+    resize!(integrator.u, new_size)
+    resize!(integrator.du, new_size)
+    resize!(integrator.u_tmp, new_size)
 end
-
-
 end # @muladd
diff --git a/src/time_integration/methods_3Sstar.jl b/src/time_integration/methods_3Sstar.jl
index 15299b9e197..03232c04122 100644
--- a/src/time_integration/methods_3Sstar.jl
+++ b/src/time_integration/methods_3Sstar.jl
@@ -3,12 +3,11 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Abstract base type for time integration schemes of storage class `3S*`
 abstract type SimpleAlgorithm3Sstar end
 
-
 """
     HypDiffN3Erk3Sstar52()
 
@@ -16,26 +15,37 @@ Five stage, second-order accurate explicit Runge-Kutta scheme with stability reg
 the hyperbolic diffusion equation with LLF flux and polynomials of degree polydeg=3.
 """
 struct HypDiffN3Erk3Sstar52 <: SimpleAlgorithm3Sstar
-  gamma1::SVector{5, Float64}
-  gamma2::SVector{5, Float64}
-  gamma3::SVector{5, Float64}
-  beta::SVector{5, Float64}
-  delta::SVector{5, Float64}
-  c::SVector{5, Float64}
-
-  function HypDiffN3Erk3Sstar52()
-    gamma1 = SVector(0.0000000000000000E+00, 5.2656474556752575E-01,  1.0385212774098265E+00, 3.6859755007388034E-01, -6.3350615190506088E-01)
-    gamma2 = SVector(1.0000000000000000E+00, 4.1892580153419307E-01, -2.7595818152587825E-02, 9.1271323651988631E-02,  6.8495995159465062E-01)
-    gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00,  0.0000000000000000E+00, 4.1301005663300466E-01, -5.4537881202277507E-03)
-    beta   = SVector(4.5158640252832094E-01, 7.5974836561844006E-01,  3.7561630338850771E-01, 2.9356700007428856E-02,  2.5205285143494666E-01)
-    delta  = SVector(1.0000000000000000E+00, 1.3011720142005145E-01,  2.6579275844515687E-01, 9.9687218193685878E-01,  0.0000000000000000E+00)
-    c      = SVector(0.0000000000000000E+00, 4.5158640252832094E-01,  1.0221535725056414E+00, 1.4280257701954349E+00,  7.1581334196229851E-01)
-
-    new(gamma1, gamma2, gamma3, beta, delta, c)
-  end
+    gamma1::SVector{5, Float64}
+    gamma2::SVector{5, Float64}
+    gamma3::SVector{5, Float64}
+    beta::SVector{5, Float64}
+    delta::SVector{5, Float64}
+    c::SVector{5, Float64}
+
+    function HypDiffN3Erk3Sstar52()
+        gamma1 = SVector(0.0000000000000000E+00, 5.2656474556752575E-01,
+                         1.0385212774098265E+00, 3.6859755007388034E-01,
+                         -6.3350615190506088E-01)
+        gamma2 = SVector(1.0000000000000000E+00, 4.1892580153419307E-01,
+                         -2.7595818152587825E-02, 9.1271323651988631E-02,
+                         6.8495995159465062E-01)
+        gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00,
+                         0.0000000000000000E+00, 4.1301005663300466E-01,
+                         -5.4537881202277507E-03)
+        beta = SVector(4.5158640252832094E-01, 7.5974836561844006E-01,
+                       3.7561630338850771E-01, 2.9356700007428856E-02,
+                       2.5205285143494666E-01)
+        delta = SVector(1.0000000000000000E+00, 1.3011720142005145E-01,
+                        2.6579275844515687E-01, 9.9687218193685878E-01,
+                        0.0000000000000000E+00)
+        c = SVector(0.0000000000000000E+00, 4.5158640252832094E-01,
+                    1.0221535725056414E+00, 1.4280257701954349E+00,
+                    7.1581334196229851E-01)
+
+        new(gamma1, gamma2, gamma3, beta, delta, c)
+    end
 end
 
-
 """
     ParsaniKetchesonDeconinck3Sstar94()
 
@@ -44,26 +54,49 @@ Parsani, Ketcheson, Deconinck (2013)
 [DOI: 10.1137/120885899](https://doi.org/10.1137/120885899)
 """
 struct ParsaniKetchesonDeconinck3Sstar94 <: SimpleAlgorithm3Sstar
-  gamma1::SVector{9, Float64}
-  gamma2::SVector{9, Float64}
-  gamma3::SVector{9, Float64}
-  beta::SVector{9, Float64}
-  delta::SVector{9, Float64}
-  c::SVector{9, Float64}
-
-  function ParsaniKetchesonDeconinck3Sstar94()
-    gamma1 = SVector(0.0000000000000000E+00, -4.6556413837561301E+00, -7.7202649689034453E-01, -4.0244202720632174E+00, -2.1296873883702272E-02, -2.4350219407769953E+00, 1.9856336960249132E-02, -2.8107894116913812E-01, 1.6894354373677900E-01)
-    gamma2 = SVector(1.0000000000000000E+00, 2.4992627683300688E+00, 5.8668202764174726E-01, 1.2051419816240785E+00, 3.4747937498564541E-01, 1.3213458736302766E+00, 3.1196363453264964E-01, 4.3514189245414447E-01, 2.3596980658341213E-01)
-    gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00, 0.0000000000000000E+00, 7.6209857891449362E-01, -1.9811817832965520E-01, -6.2289587091629484E-01, -3.7522475499063573E-01, -3.3554373281046146E-01, -4.5609629702116454E-02)
-    beta   = SVector(2.8363432481011769E-01, 9.7364980747486463E-01, 3.3823592364196498E-01, -3.5849518935750763E-01, -4.1139587569859462E-03, 1.4279689871485013E+00, 1.8084680519536503E-02, 1.6057708856060501E-01, 2.9522267863254809E-01)
-    delta  = SVector(1.0000000000000000E+00, 1.2629238731608268E+00, 7.5749675232391733E-01, 5.1635907196195419E-01, -2.7463346616574083E-02, -4.3826743572318672E-01, 1.2735870231839268E+00, -6.2947382217730230E-01, 0.0000000000000000E+00)
-    c      = SVector(0.0000000000000000E+00, 2.8363432481011769E-01, 5.4840742446661772E-01, 3.6872298094969475E-01, -6.8061183026103156E-01, 3.5185265855105619E-01, 1.6659419385562171E+00, 9.7152778807463247E-01, 9.0515694340066954E-01)
-
-    new(gamma1, gamma2, gamma3, beta, delta, c)
-  end
+    gamma1::SVector{9, Float64}
+    gamma2::SVector{9, Float64}
+    gamma3::SVector{9, Float64}
+    beta::SVector{9, Float64}
+    delta::SVector{9, Float64}
+    c::SVector{9, Float64}
+
+    function ParsaniKetchesonDeconinck3Sstar94()
+        gamma1 = SVector(0.0000000000000000E+00, -4.6556413837561301E+00,
+                         -7.7202649689034453E-01, -4.0244202720632174E+00,
+                         -2.1296873883702272E-02, -2.4350219407769953E+00,
+                         1.9856336960249132E-02, -2.8107894116913812E-01,
+                         1.6894354373677900E-01)
+        gamma2 = SVector(1.0000000000000000E+00, 2.4992627683300688E+00,
+                         5.8668202764174726E-01, 1.2051419816240785E+00,
+                         3.4747937498564541E-01, 1.3213458736302766E+00,
+                         3.1196363453264964E-01, 4.3514189245414447E-01,
+                         2.3596980658341213E-01)
+        gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00,
+                         0.0000000000000000E+00, 7.6209857891449362E-01,
+                         -1.9811817832965520E-01, -6.2289587091629484E-01,
+                         -3.7522475499063573E-01, -3.3554373281046146E-01,
+                         -4.5609629702116454E-02)
+        beta = SVector(2.8363432481011769E-01, 9.7364980747486463E-01,
+                       3.3823592364196498E-01, -3.5849518935750763E-01,
+                       -4.1139587569859462E-03, 1.4279689871485013E+00,
+                       1.8084680519536503E-02, 1.6057708856060501E-01,
+                       2.9522267863254809E-01)
+        delta = SVector(1.0000000000000000E+00, 1.2629238731608268E+00,
+                        7.5749675232391733E-01, 5.1635907196195419E-01,
+                        -2.7463346616574083E-02, -4.3826743572318672E-01,
+                        1.2735870231839268E+00, -6.2947382217730230E-01,
+                        0.0000000000000000E+00)
+        c = SVector(0.0000000000000000E+00, 2.8363432481011769E-01,
+                    5.4840742446661772E-01, 3.6872298094969475E-01,
+                    -6.8061183026103156E-01, 3.5185265855105619E-01,
+                    1.6659419385562171E+00, 9.7152778807463247E-01,
+                    9.0515694340066954E-01)
+
+        new(gamma1, gamma2, gamma3, beta, delta, c)
+    end
 end
 
-
 """
     ParsaniKetchesonDeconinck3Sstar32()
 
@@ -72,182 +105,193 @@ Parsani, Ketcheson, Deconinck (2013)
 [DOI: 10.1137/120885899](https://doi.org/10.1137/120885899)
 """
 struct ParsaniKetchesonDeconinck3Sstar32 <: SimpleAlgorithm3Sstar
-  gamma1::SVector{3, Float64}
-  gamma2::SVector{3, Float64}
-  gamma3::SVector{3, Float64}
-  beta::SVector{3, Float64}
-  delta::SVector{3, Float64}
-  c::SVector{3, Float64}
-
-  function ParsaniKetchesonDeconinck3Sstar32()
-    gamma1 = SVector(0.0000000000000000E+00, -1.2664395576322218E-01,  1.1426980685848858E+00)
-    gamma2 = SVector(1.0000000000000000E+00,  6.5427782599406470E-01, -8.2869287683723744E-02)
-    gamma3 = SVector(0.0000000000000000E+00,  0.0000000000000000E+00,  0.0000000000000000E+00)
-    beta   = SVector(7.2366074728360086E-01,  3.4217876502651023E-01,  3.6640216242653251E-01)
-    delta  = SVector(1.0000000000000000E+00,  7.2196567116037724E-01,  0.0000000000000000E+00)
-    c      = SVector(0.0000000000000000E+00,  7.2366074728360086E-01,  5.9236433182015646E-01)
-
-    new(gamma1, gamma2, gamma3, beta, delta, c)
-  end
+    gamma1::SVector{3, Float64}
+    gamma2::SVector{3, Float64}
+    gamma3::SVector{3, Float64}
+    beta::SVector{3, Float64}
+    delta::SVector{3, Float64}
+    c::SVector{3, Float64}
+
+    function ParsaniKetchesonDeconinck3Sstar32()
+        gamma1 = SVector(0.0000000000000000E+00, -1.2664395576322218E-01,
+                         1.1426980685848858E+00)
+        gamma2 = SVector(1.0000000000000000E+00, 6.5427782599406470E-01,
+                         -8.2869287683723744E-02)
+        gamma3 = SVector(0.0000000000000000E+00, 0.0000000000000000E+00,
+                         0.0000000000000000E+00)
+        beta = SVector(7.2366074728360086E-01, 3.4217876502651023E-01,
+                       3.6640216242653251E-01)
+        delta = SVector(1.0000000000000000E+00, 7.2196567116037724E-01,
+                        0.0000000000000000E+00)
+        c = SVector(0.0000000000000000E+00, 7.2366074728360086E-01,
+                    5.9236433182015646E-01)
+
+        new(gamma1, gamma2, gamma3, beta, delta, c)
+    end
 end
 
-
 mutable struct SimpleIntegrator3SstarOptions{Callback}
-  callback::Callback # callbacks; used in Trixi.jl
-  adaptive::Bool # whether the algorithm is adaptive; ignored
-  dtmax::Float64 # ignored
-  maxiters::Int # maximal number of time steps
-  tstops::Vector{Float64} # tstops from https://diffeq.sciml.ai/v6.8/basics/common_solver_opts/#Output-Control-1; ignored
+    callback::Callback # callbacks; used in Trixi.jl
+    adaptive::Bool # whether the algorithm is adaptive; ignored
+    dtmax::Float64 # ignored
+    maxiters::Int # maximal number of time steps
+    tstops::Vector{Float64} # tstops from https://diffeq.sciml.ai/v6.8/basics/common_solver_opts/#Output-Control-1; ignored
 end
 
-function SimpleIntegrator3SstarOptions(callback, tspan; maxiters=typemax(Int), kwargs...)
-  SimpleIntegrator3SstarOptions{typeof(callback)}(
-    callback, false, Inf, maxiters, [last(tspan)])
+function SimpleIntegrator3SstarOptions(callback, tspan; maxiters = typemax(Int),
+                                       kwargs...)
+    SimpleIntegrator3SstarOptions{typeof(callback)}(callback, false, Inf, maxiters,
+                                                    [last(tspan)])
 end
 
-mutable struct SimpleIntegrator3Sstar{RealT<:Real, uType, Params, Sol, F, Alg, SimpleIntegrator3SstarOptions}
-  u::uType #
-  du::uType
-  u_tmp1::uType
-  u_tmp2::uType
-  t::RealT
-  dt::RealT # current time step
-  dtcache::RealT # ignored
-  iter::Int # current number of time step (iteration)
-  p::Params # will be the semidiscretization from Trixi.jl
-  sol::Sol # faked
-  f::F
-  alg::Alg
-  opts::SimpleIntegrator3SstarOptions
-  finalstep::Bool # added for convenience
+mutable struct SimpleIntegrator3Sstar{RealT <: Real, uType, Params, Sol, F, Alg,
+                                      SimpleIntegrator3SstarOptions}
+    u::uType #
+    du::uType
+    u_tmp1::uType
+    u_tmp2::uType
+    t::RealT
+    dt::RealT # current time step
+    dtcache::RealT # ignored
+    iter::Int # current number of time step (iteration)
+    p::Params # will be the semidiscretization from Trixi.jl
+    sol::Sol # faked
+    f::F
+    alg::Alg
+    opts::SimpleIntegrator3SstarOptions
+    finalstep::Bool # added for convenience
 end
 
 # Forward integrator.stats.naccept to integrator.iter (see GitHub PR#771)
 function Base.getproperty(integrator::SimpleIntegrator3Sstar, field::Symbol)
-  if field === :stats
-    return (naccept = getfield(integrator, :iter),)
-  end
-  # general fallback
-  return getfield(integrator, field)
+    if field === :stats
+        return (naccept = getfield(integrator, :iter),)
+    end
+    # general fallback
+    return getfield(integrator, field)
 end
 
 # Fakes `solve`: https://diffeq.sciml.ai/v6.8/basics/overview/#Solving-the-Problems-1
 function solve(ode::ODEProblem, alg::T;
-               dt, callback=nothing, kwargs...) where {T<:SimpleAlgorithm3Sstar}
-  u = copy(ode.u0)
-  du = similar(u)
-  u_tmp1 = similar(u)
-  u_tmp2 = similar(u)
-  t = first(ode.tspan)
-  iter = 0
-  integrator = SimpleIntegrator3Sstar(u, du, u_tmp1, u_tmp2, t, dt, zero(dt), iter, ode.p,
-                  (prob=ode,), ode.f, alg,
-                  SimpleIntegrator3SstarOptions(callback, ode.tspan; kwargs...), false)
-
-  # initialize callbacks
-  if callback isa CallbackSet
-    for cb in callback.continuous_callbacks
-      error("unsupported")
-    end
-    for cb in callback.discrete_callbacks
-      cb.initialize(cb, integrator.u, integrator.t, integrator)
+               dt, callback = nothing, kwargs...) where {T <: SimpleAlgorithm3Sstar}
+    u = copy(ode.u0)
+    du = similar(u)
+    u_tmp1 = similar(u)
+    u_tmp2 = similar(u)
+    t = first(ode.tspan)
+    iter = 0
+    integrator = SimpleIntegrator3Sstar(u, du, u_tmp1, u_tmp2, t, dt, zero(dt), iter,
+                                        ode.p,
+                                        (prob = ode,), ode.f, alg,
+                                        SimpleIntegrator3SstarOptions(callback,
+                                                                      ode.tspan;
+                                                                      kwargs...), false)
+
+    # initialize callbacks
+    if callback isa CallbackSet
+        for cb in callback.continuous_callbacks
+            error("unsupported")
+        end
+        for cb in callback.discrete_callbacks
+            cb.initialize(cb, integrator.u, integrator.t, integrator)
+        end
+    elseif !isnothing(callback)
+        error("unsupported")
     end
-  elseif !isnothing(callback)
-    error("unsupported")
-  end
 
-  solve!(integrator)
+    solve!(integrator)
 end
 
 function solve!(integrator::SimpleIntegrator3Sstar)
-  @unpack prob = integrator.sol
-  @unpack alg = integrator
-  t_end = last(prob.tspan)
-  callbacks = integrator.opts.callback
-
-  integrator.finalstep = false
-  @trixi_timeit timer() "main loop" while !integrator.finalstep
-    if isnan(integrator.dt)
-      error("time step size `dt` is NaN")
-    end
+    @unpack prob = integrator.sol
+    @unpack alg = integrator
+    t_end = last(prob.tspan)
+    callbacks = integrator.opts.callback
+
+    integrator.finalstep = false
+    @trixi_timeit timer() "main loop" while !integrator.finalstep
+        if isnan(integrator.dt)
+            error("time step size `dt` is NaN")
+        end
 
-    # if the next iteration would push the simulation beyond the end time, set dt accordingly
-    if integrator.t + integrator.dt > t_end || isapprox(integrator.t + integrator.dt, t_end)
-      integrator.dt = t_end - integrator.t
-      terminate!(integrator)
-    end
+        # if the next iteration would push the simulation beyond the end time, set dt accordingly
+        if integrator.t + integrator.dt > t_end ||
+           isapprox(integrator.t + integrator.dt, t_end)
+            integrator.dt = t_end - integrator.t
+            terminate!(integrator)
+        end
 
-    # one time step
-    integrator.u_tmp1 .= zero(eltype(integrator.u_tmp1))
-    integrator.u_tmp2 .= integrator.u
-    for stage in eachindex(alg.c)
-      t_stage = integrator.t + integrator.dt * alg.c[stage]
-      prob.f(integrator.du, integrator.u, prob.p, t_stage)
-
-      delta_stage   = alg.delta[stage]
-      gamma1_stage  = alg.gamma1[stage]
-      gamma2_stage  = alg.gamma2[stage]
-      gamma3_stage  = alg.gamma3[stage]
-      beta_stage_dt = alg.beta[stage] * integrator.dt
-      @trixi_timeit timer() "Runge-Kutta step" begin
-        @threaded for i in eachindex(integrator.u)
-          integrator.u_tmp1[i] += delta_stage * integrator.u[i]
-          integrator.u[i]       = (gamma1_stage * integrator.u[i] +
-                                   gamma2_stage * integrator.u_tmp1[i] +
-                                   gamma3_stage * integrator.u_tmp2[i] +
-                                   beta_stage_dt * integrator.du[i])
+        # one time step
+        integrator.u_tmp1 .= zero(eltype(integrator.u_tmp1))
+        integrator.u_tmp2 .= integrator.u
+        for stage in eachindex(alg.c)
+            t_stage = integrator.t + integrator.dt * alg.c[stage]
+            prob.f(integrator.du, integrator.u, prob.p, t_stage)
+
+            delta_stage = alg.delta[stage]
+            gamma1_stage = alg.gamma1[stage]
+            gamma2_stage = alg.gamma2[stage]
+            gamma3_stage = alg.gamma3[stage]
+            beta_stage_dt = alg.beta[stage] * integrator.dt
+            @trixi_timeit timer() "Runge-Kutta step" begin
+                @threaded for i in eachindex(integrator.u)
+                    integrator.u_tmp1[i] += delta_stage * integrator.u[i]
+                    integrator.u[i] = (gamma1_stage * integrator.u[i] +
+                                       gamma2_stage * integrator.u_tmp1[i] +
+                                       gamma3_stage * integrator.u_tmp2[i] +
+                                       beta_stage_dt * integrator.du[i])
+                end
+            end
         end
-      end
-    end
-    integrator.iter += 1
-    integrator.t += integrator.dt
-
-    # handle callbacks
-    if callbacks isa CallbackSet
-      for cb in callbacks.discrete_callbacks
-        if cb.condition(integrator.u, integrator.t, integrator)
-          cb.affect!(integrator)
+        integrator.iter += 1
+        integrator.t += integrator.dt
+
+        # handle callbacks
+        if callbacks isa CallbackSet
+            for cb in callbacks.discrete_callbacks
+                if cb.condition(integrator.u, integrator.t, integrator)
+                    cb.affect!(integrator)
+                end
+            end
         end
-      end
-    end
 
-    # respect maximum number of iterations
-    if integrator.iter >= integrator.opts.maxiters && !integrator.finalstep
-      @warn "Interrupted. Larger maxiters is needed."
-      terminate!(integrator)
+        # respect maximum number of iterations
+        if integrator.iter >= integrator.opts.maxiters && !integrator.finalstep
+            @warn "Interrupted. Larger maxiters is needed."
+            terminate!(integrator)
+        end
     end
-  end
 
-  return TimeIntegratorSolution((first(prob.tspan), integrator.t),
-                                (prob.u0, integrator.u),
-                                integrator.sol.prob)
+    return TimeIntegratorSolution((first(prob.tspan), integrator.t),
+                                  (prob.u0, integrator.u),
+                                  integrator.sol.prob)
 end
 
 # get a cache where the RHS can be stored
 get_du(integrator::SimpleIntegrator3Sstar) = integrator.du
-get_tmp_cache(integrator::SimpleIntegrator3Sstar) = (integrator.u_tmp1, integrator.u_tmp2)
+function get_tmp_cache(integrator::SimpleIntegrator3Sstar)
+    (integrator.u_tmp1, integrator.u_tmp2)
+end
 
 # some algorithms from DiffEq like FSAL-ones need to be informed when a callback has modified u
 u_modified!(integrator::SimpleIntegrator3Sstar, ::Bool) = false
 
 # used by adaptive timestepping algorithms in DiffEq
 function set_proposed_dt!(integrator::SimpleIntegrator3Sstar, dt)
-  integrator.dt = dt
+    integrator.dt = dt
 end
 
 # stop the time integration
 function terminate!(integrator::SimpleIntegrator3Sstar)
-  integrator.finalstep = true
-  empty!(integrator.opts.tstops)
+    integrator.finalstep = true
+    empty!(integrator.opts.tstops)
 end
 
 # used for AMR
 function Base.resize!(integrator::SimpleIntegrator3Sstar, new_size)
-  resize!(integrator.u, new_size)
-  resize!(integrator.du, new_size)
-  resize!(integrator.u_tmp1, new_size)
-  resize!(integrator.u_tmp2, new_size)
+    resize!(integrator.u, new_size)
+    resize!(integrator.du, new_size)
+    resize!(integrator.u_tmp1, new_size)
+    resize!(integrator.u_tmp2, new_size)
 end
-
-
 end # @muladd
diff --git a/src/time_integration/time_integration.jl b/src/time_integration/time_integration.jl
index a661c0b25ee..539e00ff700 100644
--- a/src/time_integration/time_integration.jl
+++ b/src/time_integration/time_integration.jl
@@ -3,18 +3,16 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # Wrapper type for solutions from Trixi.jl's own time integrators, partially mimicking
 # SciMLBase.ODESolution
 struct TimeIntegratorSolution{tType, uType, P}
-  t::tType
-  u::uType
-  prob::P
+    t::tType
+    u::uType
+    prob::P
 end
 
 include("methods_2N.jl")
 include("methods_3Sstar.jl")
-
-
 end # @muladd
diff --git a/src/visualization/recipes_plots.jl b/src/visualization/recipes_plots.jl
index db621199947..d15f7e542e1 100644
--- a/src/visualization/recipes_plots.jl
+++ b/src/visualization/recipes_plots.jl
@@ -3,273 +3,276 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 # Visualize a single variable in a 2D plot (default: heatmap)
 #
 # Note: This is an experimental feature and may be changed in future releases without notice.
 RecipesBase.@recipe function f(pds::PlotDataSeries{<:AbstractPlotData{2}})
-  @unpack plot_data, variable_id = pds
-  @unpack x, y, data, variable_names, orientation_x, orientation_y = plot_data
-
-  # Set geometric properties
-  xlims --> (x[begin], x[end])
-  ylims --> (y[begin], y[end])
-  aspect_ratio --> :equal
-
-  # Set annotation properties
-  legend -->  :none
-  title --> variable_names[variable_id]
-  colorbar --> :true
-  xguide --> _get_guide(orientation_x)
-  yguide --> _get_guide(orientation_y)
-
-  # Set series properties
-  seriestype --> :heatmap
-
-  # Return data for plotting
-  x, y, data[variable_id]
+    @unpack plot_data, variable_id = pds
+    @unpack x, y, data, variable_names, orientation_x, orientation_y = plot_data
+
+    # Set geometric properties
+    xlims --> (x[begin], x[end])
+    ylims --> (y[begin], y[end])
+    aspect_ratio --> :equal
+
+    # Set annotation properties
+    legend --> :none
+    title --> variable_names[variable_id]
+    colorbar --> :true
+    xguide --> _get_guide(orientation_x)
+    yguide --> _get_guide(orientation_y)
+
+    # Set series properties
+    seriestype --> :heatmap
+
+    # Return data for plotting
+    x, y, data[variable_id]
 end
 
 # Visualize the mesh in a 2D plot
 #
 # Note: This is an experimental feature and may be changed in future releases without notice.
 RecipesBase.@recipe function f(pm::PlotMesh{<:AbstractPlotData{2}})
-  @unpack plot_data = pm
-  @unpack x, y, mesh_vertices_x, mesh_vertices_y = plot_data
-
-  # Set geometric and annotation properties
-  xlims --> (x[begin], x[end])
-  ylims --> (y[begin], y[end])
-  aspect_ratio --> :equal
-  legend -->  :none
-  grid --> false
-
-  # Set series properties
-  seriestype --> :path
-  linecolor --> :grey
-  linewidth --> 1
-
-  # Return data for plotting
-  mesh_vertices_x, mesh_vertices_y
+    @unpack plot_data = pm
+    @unpack x, y, mesh_vertices_x, mesh_vertices_y = plot_data
+
+    # Set geometric and annotation properties
+    xlims --> (x[begin], x[end])
+    ylims --> (y[begin], y[end])
+    aspect_ratio --> :equal
+    legend --> :none
+    grid --> false
+
+    # Set series properties
+    seriestype --> :path
+    linecolor --> :grey
+    linewidth --> 1
+
+    # Return data for plotting
+    mesh_vertices_x, mesh_vertices_y
 end
 
-
 # Visualize the mesh in a 2D plot
 #
 # Note: This is an experimental feature and may be changed in future releases without notice.
-RecipesBase.@recipe function f(pm::PlotMesh{<:PlotData2DCartesian{<:Any, <:AbstractVector{<:AbstractVector}}})
-  @unpack plot_data = pm
-  @unpack x, y, mesh_vertices_x, mesh_vertices_y = plot_data
-
-  # Set geometric and annotation properties
-  xlims --> (minimum(x), maximum(x))
-  ylims --> (minimum(y), maximum(y))
-  aspect_ratio --> :equal
-  legend -->  :none
-  grid --> false
-
-  # Set series properties
-  seriestype --> :path
-  linecolor --> :grey
-  linewidth --> 1
-
-  # Return data for plotting
-  mesh_vertices_x, mesh_vertices_y
+RecipesBase.@recipe function f(pm::PlotMesh{
+                                            <:PlotData2DCartesian{<:Any,
+                                                                  <:AbstractVector{
+                                                                                   <:AbstractVector
+                                                                                   }}})
+    @unpack plot_data = pm
+    @unpack x, y, mesh_vertices_x, mesh_vertices_y = plot_data
+
+    # Set geometric and annotation properties
+    xlims --> (minimum(x), maximum(x))
+    ylims --> (minimum(y), maximum(y))
+    aspect_ratio --> :equal
+    legend --> :none
+    grid --> false
+
+    # Set series properties
+    seriestype --> :path
+    linecolor --> :grey
+    linewidth --> 1
+
+    # Return data for plotting
+    mesh_vertices_x, mesh_vertices_y
 end
 
-
 # Plot all available variables at once for convenience
 #
 # Note: This is an experimental feature and may be changed in future releases without notice.
 RecipesBase.@recipe function f(pd::AbstractPlotData)
-  # Create layout that is as square as possible, when there are more than 3 subplots.
-  # This is done with a preference for more columns than rows if not.
-
-  if length(pd) <= 3
-    cols = length(pd)
-    rows = 1
-  else
-    cols = ceil(Int, sqrt(length(pd)))
-    rows = ceil(Int, length(pd)/cols)
-  end
-
-  layout := (rows, cols)
-
-  # Plot all existing variables
-  for (i, (variable_name, series)) in enumerate(pd)
-    RecipesBase.@series begin
-      subplot := i
-      series
+    # Create layout that is as square as possible, when there are more than 3 subplots.
+    # This is done with a preference for more columns than rows if not.
+
+    if length(pd) <= 3
+        cols = length(pd)
+        rows = 1
+    else
+        cols = ceil(Int, sqrt(length(pd)))
+        rows = ceil(Int, length(pd) / cols)
+    end
+
+    layout := (rows, cols)
+
+    # Plot all existing variables
+    for (i, (variable_name, series)) in enumerate(pd)
+        RecipesBase.@series begin
+            subplot := i
+            series
+        end
     end
-  end
-
-  # Fill remaining subplots with empty plot
-  for i in (length(pd)+1):(rows*cols)
-    RecipesBase.@series begin
-      subplot := i
-      axis := false
-      ticks := false
-      legend := false
-      [], []
+
+    # Fill remaining subplots with empty plot
+    for i in (length(pd) + 1):(rows * cols)
+        RecipesBase.@series begin
+            subplot := i
+            axis := false
+            ticks := false
+            legend := false
+            [], []
+        end
     end
-  end
 end
 
 # Plot a single variable.
 RecipesBase.@recipe function f(pds::PlotDataSeries{<:AbstractPlotData{1}})
-  @unpack plot_data, variable_id = pds
-  @unpack x, data, variable_names, orientation_x = plot_data
+    @unpack plot_data, variable_id = pds
+    @unpack x, data, variable_names, orientation_x = plot_data
 
-  # Set geometric properties
-  xlims --> (x[begin], x[end])
+    # Set geometric properties
+    xlims --> (x[begin], x[end])
 
-  # Set annotation properties
-  legend --> :none
-  title --> variable_names[variable_id]
-  xguide --> _get_guide(orientation_x)
+    # Set annotation properties
+    legend --> :none
+    title --> variable_names[variable_id]
+    xguide --> _get_guide(orientation_x)
 
-  # Return data for plotting
-  x, data[:, variable_id]
+    # Return data for plotting
+    x, data[:, variable_id]
 end
 
 # Plot the mesh as vertical lines from a PlotMesh object.
 RecipesBase.@recipe function f(pm::PlotMesh{<:AbstractPlotData{1}})
-  @unpack plot_data = pm
-  @unpack x, mesh_vertices_x = plot_data
+    @unpack plot_data = pm
+    @unpack x, mesh_vertices_x = plot_data
 
-  # Set geometric and annotation properties
-  xlims --> (x[begin], x[end])
-  legend -->  :none
+    # Set geometric and annotation properties
+    xlims --> (x[begin], x[end])
+    legend --> :none
 
-  # Set series properties
-  seriestype --> :vline
-  linecolor --> :grey
-  linewidth --> 1
+    # Set series properties
+    seriestype --> :vline
+    linecolor --> :grey
+    linewidth --> 1
 
-  # Return data for plotting
-  mesh_vertices_x
+    # Return data for plotting
+    mesh_vertices_x
 end
 
-
 # Create a plot directly from a TrixiODESolution for convenience
 # The plot is created by a PlotData1D or PlotData2D object.
 #
 # Note: This is an experimental feature and may be changed in future releases without notice.
 RecipesBase.@recipe function f(sol::TrixiODESolution)
-  # Redirect everything to the recipes below
-  return sol.u[end], sol.prob.p
+    # Redirect everything to the recipes below
+    return sol.u[end], sol.prob.p
 end
 
 # Recipe for general semidiscretizations
 # Note: If you change the defaults values here, you need to also change them in the PlotData1D or PlotData2D
 #       constructor.
 RecipesBase.@recipe function f(u, semi::AbstractSemidiscretization;
-                               solution_variables=nothing)
-  if ndims(semi) == 1
-    return PlotData1D(u, semi; solution_variables=solution_variables)
-  else
-    return PlotData2D(u, semi; solution_variables=solution_variables)
-  end
+                               solution_variables = nothing)
+    if ndims(semi) == 1
+        return PlotData1D(u, semi; solution_variables = solution_variables)
+    else
+        return PlotData2D(u, semi; solution_variables = solution_variables)
+    end
 end
 
 # Recipe specifically for TreeMesh-type solutions
 # Note: If you change the defaults values here, you need to also change them in the PlotData1D or PlotData2D
 #       constructor.
 RecipesBase.@recipe function f(u, semi::SemidiscretizationHyperbolic{<:TreeMesh};
-                               solution_variables=nothing,
-                               grid_lines=true, max_supported_level=11, nvisnodes=nothing, slice=:xy,
-                               point=(0.0, 0.0, 0.0), curve=nothing)
-  # Create a PlotData1D or PlotData2D object depending on the dimension.
-  if ndims(semi) == 1
-    return PlotData1D(u, semi; solution_variables, nvisnodes, slice, point, curve)
-  else
-    return PlotData2D(u, semi;
-                      solution_variables, grid_lines, max_supported_level,
-                      nvisnodes, slice, point)
-  end
+                               solution_variables = nothing,
+                               grid_lines = true, max_supported_level = 11,
+                               nvisnodes = nothing, slice = :xy,
+                               point = (0.0, 0.0, 0.0), curve = nothing)
+    # Create a PlotData1D or PlotData2D object depending on the dimension.
+    if ndims(semi) == 1
+        return PlotData1D(u, semi; solution_variables, nvisnodes, slice, point, curve)
+    else
+        return PlotData2D(u, semi;
+                          solution_variables, grid_lines, max_supported_level,
+                          nvisnodes, slice, point)
+    end
 end
 
 # Series recipe for PlotData2DTriangulated
 RecipesBase.@recipe function f(pds::PlotDataSeries{<:PlotData2DTriangulated})
+    pd = pds.plot_data
+    @unpack variable_id = pds
+    @unpack x, y, data, t, variable_names = pd
+
+    # extract specific solution field to plot
+    data_field = zeros(eltype(first(data)), size(data))
+    for (i, data_i) in enumerate(data)
+        data_field[i] = data_i[variable_id]
+    end
 
-  pd = pds.plot_data
-  @unpack variable_id = pds
-  @unpack x, y, data, t, variable_names = pd
-
-  # extract specific solution field to plot
-  data_field = zeros(eltype(first(data)), size(data))
-  for (i, data_i) in enumerate(data)
-    data_field[i] = data_i[variable_id]
-  end
-
-  legend --> false
-  aspect_ratio --> 1
-  title --> pd.variable_names[variable_id]
-  xlims --> extrema(x)
-  ylims --> extrema(y)
-  xguide --> _get_guide(1)
-  yguide --> _get_guide(2)
-  seriestype --> :heatmap
-  colorbar --> :true
-
-  return DGTriPseudocolor(global_plotting_triangulation_triplot((x, y), data_field, t)...)
+    legend --> false
+    aspect_ratio --> 1
+    title --> pd.variable_names[variable_id]
+    xlims --> extrema(x)
+    ylims --> extrema(y)
+    xguide --> _get_guide(1)
+    yguide --> _get_guide(2)
+    seriestype --> :heatmap
+    colorbar --> :true
+
+    return DGTriPseudocolor(global_plotting_triangulation_triplot((x, y), data_field,
+                                                                  t)...)
 end
 
 # Visualize a 2D mesh given an `PlotData2DTriangulated` object
 RecipesBase.@recipe function f(pm::PlotMesh{<:PlotData2DTriangulated})
-  pd = pm.plot_data
-  @unpack x_face, y_face = pd
-
-  # This line separates solution lines on each edge by NaNs to ensure that they are rendered
-  # separately. The coordinates `xf`, `yf` and the solution `sol_f`` are assumed to be a matrix
-  # whose columns correspond to different elements. We add NaN separators by appending a row of
-  # NaNs to this matrix. We also flatten (e.g., apply `vec` to) the result, as this speeds up
-  # plotting.
-  x_face, y_face = map(x->vec(vcat(x, fill(NaN, 1, size(x, 2)))), (x_face, y_face))
-
-  xlims --> extrema(x_face)
-  ylims --> extrema(y_face)
-  aspect_ratio --> :equal
-  legend -->  :none
-
-  # Set series properties
-  seriestype --> :path
-  linecolor --> :grey
-  linewidth --> 1
-
-  return x_face, y_face
+    pd = pm.plot_data
+    @unpack x_face, y_face = pd
+
+    # This line separates solution lines on each edge by NaNs to ensure that they are rendered
+    # separately. The coordinates `xf`, `yf` and the solution `sol_f`` are assumed to be a matrix
+    # whose columns correspond to different elements. We add NaN separators by appending a row of
+    # NaNs to this matrix. We also flatten (e.g., apply `vec` to) the result, as this speeds up
+    # plotting.
+    x_face, y_face = map(x -> vec(vcat(x, fill(NaN, 1, size(x, 2)))), (x_face, y_face))
+
+    xlims --> extrema(x_face)
+    ylims --> extrema(y_face)
+    aspect_ratio --> :equal
+    legend --> :none
+
+    # Set series properties
+    seriestype --> :path
+    linecolor --> :grey
+    linewidth --> 1
+
+    return x_face, y_face
 end
 
 # Visualizes a single scalar field. Intended for use with ScalarPlotData2D.
 # Example usage: `plot(ScalarPlotData2D(u, semi))`.
 RecipesBase.@recipe function f(pd::PlotData2DTriangulated{<:ScalarData})
-
-  @unpack x, y, data, t, variable_names = pd
-
-  title_string = isnothing(variable_names) ? "" : variable_names
-
-  legend --> false
-  aspect_ratio --> 1
-  title --> title_string
-  xlims --> extrema(x)
-  ylims --> extrema(y)
-  xguide --> _get_guide(1)
-  yguide --> _get_guide(2)
-  seriestype --> :heatmap
-  colorbar --> :true
-
-  # Since `data` is simply a ScalarData wrapper around the actual plot data, we pass in
-  # `data.data` instead.
-  return DGTriPseudocolor(global_plotting_triangulation_triplot((x, y), data.data, t)...)
+    @unpack x, y, data, t, variable_names = pd
+
+    title_string = isnothing(variable_names) ? "" : variable_names
+
+    legend --> false
+    aspect_ratio --> 1
+    title --> title_string
+    xlims --> extrema(x)
+    ylims --> extrema(y)
+    xguide --> _get_guide(1)
+    yguide --> _get_guide(2)
+    seriestype --> :heatmap
+    colorbar --> :true
+
+    # Since `data` is simply a ScalarData wrapper around the actual plot data, we pass in
+    # `data.data` instead.
+    return DGTriPseudocolor(global_plotting_triangulation_triplot((x, y), data.data,
+                                                                  t)...)
 end
 
-RecipesBase.@recipe function f(cb::DiscreteCallback{<:Any, <:TimeSeriesCallback}, point_id::Integer)
-  return cb.affect!, point_id
+RecipesBase.@recipe function f(cb::DiscreteCallback{<:Any, <:TimeSeriesCallback},
+                               point_id::Integer)
+    return cb.affect!, point_id
 end
 
-RecipesBase.@recipe function f(time_series_callback::TimeSeriesCallback, point_id::Integer)
-  return PlotData1D(time_series_callback, point_id)
+RecipesBase.@recipe function f(time_series_callback::TimeSeriesCallback,
+                               point_id::Integer)
+    return PlotData1D(time_series_callback, point_id)
 end
-
-
 end # @muladd
diff --git a/src/visualization/types.jl b/src/visualization/types.jl
index 62cfe93038d..b294ce25607 100644
--- a/src/visualization/types.jl
+++ b/src/visualization/types.jl
@@ -4,16 +4,18 @@
 # TimeIntegratorSolution.
 #
 # Note: This is an experimental feature and may be changed in future releases without notice.
+#! format: off
 const TrixiODESolution = Union{ODESolution{T, N, uType, uType2, DType, tType, rateType, P} where
     {T, N, uType, uType2, DType, tType, rateType, P<:ODEProblem{uType_, tType_, isinplace, P_, F_} where
      {uType_, tType_, isinplace, P_<:AbstractSemidiscretization, F_}}, TimeIntegratorSolution}
+#! format: on
 
 # By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
 # Since these FMAs can increase the performance of many numerical algorithms,
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
-
+#! format: noindent
 
 # This file holds plotting types which can be used for both Plots.jl and Makie.jl.
 
@@ -28,12 +30,12 @@ Base.length(pd::AbstractPlotData) = length(pd.variable_names)
 Base.size(pd::AbstractPlotData) = (length(pd),)
 Base.keys(pd::AbstractPlotData) = tuple(pd.variable_names...)
 
-function Base.iterate(pd::AbstractPlotData, state=1)
-  if state > length(pd)
-    return nothing
-  else
-    return (pd.variable_names[state] => pd[pd.variable_names[state]], state + 1)
-  end
+function Base.iterate(pd::AbstractPlotData, state = 1)
+    if state > length(pd)
+        return nothing
+    else
+        return (pd.variable_names[state] => pd[pd.variable_names[state]], state + 1)
+    end
 end
 
 """
@@ -45,19 +47,17 @@ Extract a single variable `variable_name` from `pd` for plotting with `Plots.plo
     This is an experimental feature and may change in future releases.
 """
 function Base.getindex(pd::AbstractPlotData, variable_name)
-  variable_id = findfirst(isequal(variable_name), pd.variable_names)
+    variable_id = findfirst(isequal(variable_name), pd.variable_names)
 
-  if isnothing(variable_id)
-    throw(KeyError(variable_name))
-  end
+    if isnothing(variable_id)
+        throw(KeyError(variable_name))
+    end
 
-  return PlotDataSeries(pd, variable_id)
+    return PlotDataSeries(pd, variable_id)
 end
 
 Base.eltype(pd::AbstractPlotData) = Pair{String, PlotDataSeries{typeof(pd)}}
 
-
-
 """
     PlotData2D
 
@@ -67,53 +67,55 @@ mesh.
 !!! warning "Experimental implementation"
     This is an experimental feature and may change in future releases.
 """
-struct PlotData2DCartesian{Coordinates, Data, VariableNames, Vertices} <: AbstractPlotData{2}
-  x::Coordinates
-  y::Coordinates
-  data::Data
-  variable_names::VariableNames
-  mesh_vertices_x::Vertices
-  mesh_vertices_y::Vertices
-  orientation_x::Int
-  orientation_y::Int
+struct PlotData2DCartesian{Coordinates, Data, VariableNames, Vertices} <:
+       AbstractPlotData{2}
+    x::Coordinates
+    y::Coordinates
+    data::Data
+    variable_names::VariableNames
+    mesh_vertices_x::Vertices
+    mesh_vertices_y::Vertices
+    orientation_x::Int
+    orientation_y::Int
 end
 
 # Show only a truncated output for convenience (the full data does not make sense)
 function Base.show(io::IO, pd::PlotData2DCartesian)
-  @nospecialize pd # reduce precompilation time
-
-  print(io, "PlotData2DCartesian{",
-            typeof(pd.x), ",",
-            typeof(pd.data), ",",
-            typeof(pd.variable_names), ",",
-            typeof(pd.mesh_vertices_x),
-            "}(<x>, <y>, <data>, <variable_names>, <mesh_vertices_x>, <mesh_vertices_y>)")
+    @nospecialize pd # reduce precompilation time
+
+    print(io, "PlotData2DCartesian{",
+          typeof(pd.x), ",",
+          typeof(pd.data), ",",
+          typeof(pd.variable_names), ",",
+          typeof(pd.mesh_vertices_x),
+          "}(<x>, <y>, <data>, <variable_names>, <mesh_vertices_x>, <mesh_vertices_y>)")
 end
 
-
 # holds plotting information for UnstructuredMesh2D and DGMulti-compatible meshes
-struct PlotData2DTriangulated{DataType, NodeType, FaceNodeType, FaceDataType, VariableNames, PlottingTriangulation} <: AbstractPlotData{2}
-  x::NodeType # physical nodal coordinates, size (num_plotting_nodes x num_elements)
-  y::NodeType
-  data::DataType
-  t::PlottingTriangulation
-  x_face::FaceNodeType
-  y_face::FaceNodeType
-  face_data::FaceDataType
-  variable_names::VariableNames
+struct PlotData2DTriangulated{DataType, NodeType, FaceNodeType, FaceDataType,
+                              VariableNames, PlottingTriangulation} <:
+       AbstractPlotData{2}
+    x::NodeType # physical nodal coordinates, size (num_plotting_nodes x num_elements)
+    y::NodeType
+    data::DataType
+    t::PlottingTriangulation
+    x_face::FaceNodeType
+    y_face::FaceNodeType
+    face_data::FaceDataType
+    variable_names::VariableNames
 end
 
 # Show only a truncated output for convenience (the full data does not make sense)
 function Base.show(io::IO, pd::PlotData2DTriangulated)
-  @nospecialize pd # reduce precompilation time
-
-  print(io, "PlotData2DTriangulated{",
-            typeof(pd.x), ", ",
-            typeof(pd.data), ", ",
-            typeof(pd.x_face), ", ",
-            typeof(pd.face_data), ", ",
-            typeof(pd.variable_names),
-            "}(<x>, <y>, <data>, <plot_triangulation>, <x_face>, <y_face>, <face_data>, <variable_names>)")
+    @nospecialize pd # reduce precompilation time
+
+    print(io, "PlotData2DTriangulated{",
+          typeof(pd.x), ", ",
+          typeof(pd.data), ", ",
+          typeof(pd.x_face), ", ",
+          typeof(pd.face_data), ", ",
+          typeof(pd.variable_names),
+          "}(<x>, <y>, <data>, <plot_triangulation>, <x_face>, <y_face>, <face_data>, <variable_names>)")
 end
 
 """
@@ -126,49 +128,49 @@ mesh.
     This is an experimental feature and may change in future releases.
 """
 struct PlotData1D{Coordinates, Data, VariableNames, Vertices} <: AbstractPlotData{1}
-  x::Coordinates
-  data::Data
-  variable_names::VariableNames
-  mesh_vertices_x::Vertices
-  orientation_x::Integer
+    x::Coordinates
+    data::Data
+    variable_names::VariableNames
+    mesh_vertices_x::Vertices
+    orientation_x::Integer
 end
 
 # Show only a truncated output for convenience (the full data does not make sense)
 function Base.show(io::IO, pd::PlotData1D)
-  print(io, "PlotData1D{",
-            typeof(pd.x), ",",
-            typeof(pd.data), ",",
-            typeof(pd.variable_names), ",",
-            typeof(pd.mesh_vertices_x),
-            "}(<x>, <data>, <variable_names>, <mesh_vertices_x>)")
+    print(io, "PlotData1D{",
+          typeof(pd.x), ",",
+          typeof(pd.data), ",",
+          typeof(pd.variable_names), ",",
+          typeof(pd.mesh_vertices_x),
+          "}(<x>, <data>, <variable_names>, <mesh_vertices_x>)")
 end
 
 # Auxiliary data structure for visualizing a single variable
 #
 # Note: This is an experimental feature and may be changed in future releases without notice.
-struct PlotDataSeries{PD<:AbstractPlotData}
-  plot_data::PD
-  variable_id::Int
+struct PlotDataSeries{PD <: AbstractPlotData}
+    plot_data::PD
+    variable_id::Int
 end
 
 # Show only a truncated output for convenience (the full data does not make sense)
 function Base.show(io::IO, pds::PlotDataSeries)
-  @nospecialize pds # reduce precompilation time
+    @nospecialize pds # reduce precompilation time
 
-  print(io, "PlotDataSeries{", typeof(pds.plot_data), "}(<plot_data>, ",
-        pds.variable_id, ")")
+    print(io, "PlotDataSeries{", typeof(pds.plot_data), "}(<plot_data>, ",
+          pds.variable_id, ")")
 end
 
 # Generic PlotMesh wrapper type.
-struct PlotMesh{PD<:AbstractPlotData}
-  plot_data::PD
+struct PlotMesh{PD <: AbstractPlotData}
+    plot_data::PD
 end
 
 # Show only a truncated output for convenience (the full data does not make sense)
 function Base.show(io::IO, pm::PlotMesh)
-  @nospecialize pm # reduce precompilation time
+    @nospecialize pm # reduce precompilation time
 
-  print(io, "PlotMesh{", typeof(pm.plot_data), "}(<plot_data>)")
+    print(io, "PlotMesh{", typeof(pm.plot_data), "}(<plot_data>)")
 end
 
 """
@@ -181,7 +183,6 @@ Extract grid lines from `pd` for plotting with `Plots.plot`.
 """
 getmesh(pd::AbstractPlotData) = PlotMesh(pd)
 
-
 """
     PlotData2D(u, semi [or mesh, equations, solver, cache];
                solution_variables=nothing,
@@ -226,52 +227,67 @@ julia> plot(pd["scalar"]) # To plot only a single variable
 julia> plot!(getmesh(pd)) # To add grid lines to the plot
 ```
 """
-PlotData2D(u_ode, semi; kwargs...) = PlotData2D(wrap_array_native(u_ode, semi),
-                                                mesh_equations_solver_cache(semi)...;
-                                                kwargs...)
+function PlotData2D(u_ode, semi; kwargs...)
+    PlotData2D(wrap_array_native(u_ode, semi),
+               mesh_equations_solver_cache(semi)...;
+               kwargs...)
+end
 
 # Redirect `PlotDataTriangulated2D` constructor.
-PlotData2DTriangulated(u_ode, semi; kwargs...) = PlotData2DTriangulated(wrap_array_native(u_ode, semi),
-                                                                        mesh_equations_solver_cache(semi)...;
-                                                                        kwargs...)
+function PlotData2DTriangulated(u_ode, semi; kwargs...)
+    PlotData2DTriangulated(wrap_array_native(u_ode, semi),
+                           mesh_equations_solver_cache(semi)...;
+                           kwargs...)
+end
 
 # Create a PlotData2DCartesian object for TreeMeshes on default.
-PlotData2D(u, mesh::TreeMesh, equations, solver, cache; kwargs...) = PlotData2DCartesian(u, mesh::TreeMesh, equations, solver, cache; kwargs...)
+function PlotData2D(u, mesh::TreeMesh, equations, solver, cache; kwargs...)
+    PlotData2DCartesian(u, mesh::TreeMesh, equations, solver, cache; kwargs...)
+end
 
 # Create a PlotData2DTriangulated object for any type of mesh other than the TreeMesh.
-PlotData2D(u, mesh, equations, solver, cache; kwargs...) = PlotData2DTriangulated(u, mesh, equations, solver, cache; kwargs...)
+function PlotData2D(u, mesh, equations, solver, cache; kwargs...)
+    PlotData2DTriangulated(u, mesh, equations, solver, cache; kwargs...)
+end
 
 # Create a PlotData2DCartesian for a TreeMesh.
 function PlotData2DCartesian(u, mesh::TreeMesh, equations, solver, cache;
-                             solution_variables=nothing,
-                             grid_lines=true, max_supported_level=11, nvisnodes=nothing,
-                             slice=:xy, point=(0.0, 0.0, 0.0))
-  @assert ndims(mesh) in (2, 3) "unsupported number of dimensions $ndims (must be 2 or 3)"
-  solution_variables_ = digest_solution_variables(equations, solution_variables)
-
-  # Extract mesh info
-  center_level_0 = mesh.tree.center_level_0
-  length_level_0 = mesh.tree.length_level_0
-  leaf_cell_ids = leaf_cells(mesh.tree)
-  coordinates = mesh.tree.coordinates[:, leaf_cell_ids]
-  levels = mesh.tree.levels[leaf_cell_ids]
-
-  unstructured_data = get_unstructured_data(u, solution_variables_, mesh, equations, solver, cache)
-  x, y, data, mesh_vertices_x, mesh_vertices_y = get_data_2d(center_level_0, length_level_0,
-                                                             leaf_cell_ids, coordinates, levels,
-                                                             ndims(mesh), unstructured_data,
-                                                             nnodes(solver), grid_lines,
-                                                             max_supported_level, nvisnodes,
-                                                             slice, point)
-  variable_names = SVector(varnames(solution_variables_, equations))
-
-  orientation_x, orientation_y = _get_orientations(mesh, slice)
-
-  return PlotData2DCartesian(x, y, data, variable_names, mesh_vertices_x, mesh_vertices_y,
-                             orientation_x, orientation_y)
+                             solution_variables = nothing,
+                             grid_lines = true, max_supported_level = 11,
+                             nvisnodes = nothing,
+                             slice = :xy, point = (0.0, 0.0, 0.0))
+    @assert ndims(mesh) in (2, 3) "unsupported number of dimensions $ndims (must be 2 or 3)"
+    solution_variables_ = digest_solution_variables(equations, solution_variables)
+
+    # Extract mesh info
+    center_level_0 = mesh.tree.center_level_0
+    length_level_0 = mesh.tree.length_level_0
+    leaf_cell_ids = leaf_cells(mesh.tree)
+    coordinates = mesh.tree.coordinates[:, leaf_cell_ids]
+    levels = mesh.tree.levels[leaf_cell_ids]
+
+    unstructured_data = get_unstructured_data(u, solution_variables_, mesh, equations,
+                                              solver, cache)
+    x, y, data, mesh_vertices_x, mesh_vertices_y = get_data_2d(center_level_0,
+                                                               length_level_0,
+                                                               leaf_cell_ids,
+                                                               coordinates, levels,
+                                                               ndims(mesh),
+                                                               unstructured_data,
+                                                               nnodes(solver),
+                                                               grid_lines,
+                                                               max_supported_level,
+                                                               nvisnodes,
+                                                               slice, point)
+    variable_names = SVector(varnames(solution_variables_, equations))
+
+    orientation_x, orientation_y = _get_orientations(mesh, slice)
+
+    return PlotData2DCartesian(x, y, data, variable_names, mesh_vertices_x,
+                               mesh_vertices_y,
+                               orientation_x, orientation_y)
 end
 
-
 """
     PlotData2D(sol; kwargs...)
 
@@ -282,128 +298,143 @@ returns a `SciMLBase.ODESolution`) or Trixi.jl's own `solve!` (which returns a
 !!! warning "Experimental implementation"
     This is an experimental feature and may change in future releases.
 """
-PlotData2D(sol::TrixiODESolution; kwargs...) = PlotData2D(sol.u[end], sol.prob.p; kwargs...)
+function PlotData2D(sol::TrixiODESolution; kwargs...)
+    PlotData2D(sol.u[end], sol.prob.p; kwargs...)
+end
 
 # Also redirect when using PlotData2DTriangulate.
-PlotData2DTriangulated(sol::TrixiODESolution; kwargs...) = PlotData2DTriangulated(sol.u[end], sol.prob.p; kwargs...)
-
+function PlotData2DTriangulated(sol::TrixiODESolution; kwargs...)
+    PlotData2DTriangulated(sol.u[end], sol.prob.p; kwargs...)
+end
 
 # If `u` is an `Array{<:SVectors}` and not a `StructArray`, convert it to a `StructArray` first.
 function PlotData2D(u::Array{<:SVector, 2}, mesh, equations, dg::DGMulti, cache;
-                    solution_variables=nothing, nvisnodes=2*nnodes(dg))
-  nvars = length(first(u))
-  u_structarray = StructArray{eltype(u)}(ntuple(_->zeros(eltype(first(u)), size(u)), nvars))
-  for (i, u_i) in enumerate(u)
-    u_structarray[i] = u_i
-  end
+                    solution_variables = nothing, nvisnodes = 2 * nnodes(dg))
+    nvars = length(first(u))
+    u_structarray = StructArray{eltype(u)}(ntuple(_ -> zeros(eltype(first(u)), size(u)),
+                                                  nvars))
+    for (i, u_i) in enumerate(u)
+        u_structarray[i] = u_i
+    end
 
-  # re-dispatch to PlotData2D with mesh, equations, dg, cache arguments
-  return PlotData2D(u_structarray, mesh, equations, dg, cache;
-                    solution_variables=solution_variables, nvisnodes=nvisnodes)
+    # re-dispatch to PlotData2D with mesh, equations, dg, cache arguments
+    return PlotData2D(u_structarray, mesh, equations, dg, cache;
+                      solution_variables = solution_variables, nvisnodes = nvisnodes)
 end
 
 # constructor which returns an `PlotData2DTriangulated` object.
 function PlotData2D(u::StructArray, mesh, equations, dg::DGMulti, cache;
-                    solution_variables=nothing, nvisnodes=2*nnodes(dg))
-
-  rd = dg.basis
-  md = mesh.md
-
-  # Vp = the interpolation matrix from nodal points to plotting points
-  @unpack Vp = rd
-  interpolate_to_plotting_points!(out, x) = mul!(out, Vp, x)
+                    solution_variables = nothing, nvisnodes = 2 * nnodes(dg))
+    rd = dg.basis
+    md = mesh.md
 
-  solution_variables_ = digest_solution_variables(equations, solution_variables)
-  variable_names = SVector(varnames(solution_variables_, equations))
+    # Vp = the interpolation matrix from nodal points to plotting points
+    @unpack Vp = rd
+    interpolate_to_plotting_points!(out, x) = mul!(out, Vp, x)
 
-  if Vp isa UniformScaling
-    num_plotting_points = size(u, 1)
-  else
-    num_plotting_points = size(Vp, 1)
-  end
-  nvars = nvariables(equations)
-  uEltype = eltype(first(u))
-  u_plot = StructArray{SVector{nvars, uEltype}}(ntuple(_->zeros(uEltype, num_plotting_points, md.num_elements), nvars))
+    solution_variables_ = digest_solution_variables(equations, solution_variables)
+    variable_names = SVector(varnames(solution_variables_, equations))
 
-  for e in eachelement(mesh, dg, cache)
-    # interpolate solution to plotting nodes element-by-element
-    StructArrays.foreachfield(interpolate_to_plotting_points!, view(u_plot, :, e), view(u, :, e))
-
-    # transform nodal values of the solution according to `solution_variables`
-    transform_to_solution_variables!(view(u_plot, :, e), solution_variables_, equations)
-  end
+    if Vp isa UniformScaling
+        num_plotting_points = size(u, 1)
+    else
+        num_plotting_points = size(Vp, 1)
+    end
+    nvars = nvariables(equations)
+    uEltype = eltype(first(u))
+    u_plot = StructArray{SVector{nvars, uEltype}}(ntuple(_ -> zeros(uEltype,
+                                                                    num_plotting_points,
+                                                                    md.num_elements),
+                                                         nvars))
+
+    for e in eachelement(mesh, dg, cache)
+        # interpolate solution to plotting nodes element-by-element
+        StructArrays.foreachfield(interpolate_to_plotting_points!, view(u_plot, :, e),
+                                  view(u, :, e))
+
+        # transform nodal values of the solution according to `solution_variables`
+        transform_to_solution_variables!(view(u_plot, :, e), solution_variables_,
+                                         equations)
+    end
 
-  # interpolate nodal coordinates to plotting points
-  x_plot, y_plot = map(x->Vp * x, md.xyz) # md.xyz is a tuple of arrays containing nodal coordinates
+    # interpolate nodal coordinates to plotting points
+    x_plot, y_plot = map(x -> Vp * x, md.xyz) # md.xyz is a tuple of arrays containing nodal coordinates
 
-  # construct a triangulation of the reference plotting nodes
-  t = reference_plotting_triangulation(rd.rstp) # rd.rstp = reference coordinates of plotting points
+    # construct a triangulation of the reference plotting nodes
+    t = reference_plotting_triangulation(rd.rstp) # rd.rstp = reference coordinates of plotting points
 
-  x_face, y_face, face_data = mesh_plotting_wireframe(u, mesh, equations, dg, cache;
-                                                      nvisnodes=nvisnodes)
+    x_face, y_face, face_data = mesh_plotting_wireframe(u, mesh, equations, dg, cache;
+                                                        nvisnodes = nvisnodes)
 
-  return PlotData2DTriangulated(x_plot, y_plot, u_plot, t, x_face, y_face, face_data, variable_names)
+    return PlotData2DTriangulated(x_plot, y_plot, u_plot, t, x_face, y_face, face_data,
+                                  variable_names)
 end
 
 # specializes the PlotData2D constructor to return an PlotData2DTriangulated for any type of mesh.
 function PlotData2DTriangulated(u, mesh, equations, dg::DGSEM, cache;
-                                solution_variables=nothing, nvisnodes=2*polydeg(dg))
-
-  @assert ndims(mesh) == 2 "Input must be two-dimensional."
-
-  n_nodes_2d = nnodes(dg)^ndims(mesh)
-  n_elements = nelements(dg, cache)
-
-  # build nodes on reference element (seems to be the right ordering)
-  r, s = reference_node_coordinates_2d(dg)
-
-  # reference plotting nodes
-  if nvisnodes == 0 || nvisnodes === nothing
-    nvisnodes = polydeg(dg) + 1
-  end
-  plotting_interp_matrix = plotting_interpolation_matrix(dg; nvisnodes=nvisnodes)
-
-  # create triangulation for plotting nodes
-  r_plot, s_plot = (x->plotting_interp_matrix*x).((r, s)) # interpolate dg nodes to plotting nodes
-
-  # construct a triangulation of the plotting nodes
-  t = reference_plotting_triangulation((r_plot, s_plot))
-
-  # extract x,y coordinates and solutions on each element
-  uEltype = eltype(u)
-  nvars = nvariables(equations)
-  x = reshape(view(cache.elements.node_coordinates, 1, :, :, :), n_nodes_2d, n_elements)
-  y = reshape(view(cache.elements.node_coordinates, 2, :, :, :), n_nodes_2d, n_elements)
-  u_extracted = StructArray{SVector{nvars, uEltype}}(ntuple(_->similar(x, (n_nodes_2d, n_elements)), nvars))
-  for element in eachelement(dg, cache)
-    sk = 1
-    for j in eachnode(dg), i in eachnode(dg)
-      u_node = get_node_vars(u, equations, dg, i, j, element)
-      u_extracted[sk, element] = u_node
-      sk += 1
+                                solution_variables = nothing,
+                                nvisnodes = 2 * polydeg(dg))
+    @assert ndims(mesh)==2 "Input must be two-dimensional."
+
+    n_nodes_2d = nnodes(dg)^ndims(mesh)
+    n_elements = nelements(dg, cache)
+
+    # build nodes on reference element (seems to be the right ordering)
+    r, s = reference_node_coordinates_2d(dg)
+
+    # reference plotting nodes
+    if nvisnodes == 0 || nvisnodes === nothing
+        nvisnodes = polydeg(dg) + 1
+    end
+    plotting_interp_matrix = plotting_interpolation_matrix(dg; nvisnodes = nvisnodes)
+
+    # create triangulation for plotting nodes
+    r_plot, s_plot = (x -> plotting_interp_matrix * x).((r, s)) # interpolate dg nodes to plotting nodes
+
+    # construct a triangulation of the plotting nodes
+    t = reference_plotting_triangulation((r_plot, s_plot))
+
+    # extract x,y coordinates and solutions on each element
+    uEltype = eltype(u)
+    nvars = nvariables(equations)
+    x = reshape(view(cache.elements.node_coordinates, 1, :, :, :), n_nodes_2d,
+                n_elements)
+    y = reshape(view(cache.elements.node_coordinates, 2, :, :, :), n_nodes_2d,
+                n_elements)
+    u_extracted = StructArray{SVector{nvars, uEltype}}(ntuple(_ -> similar(x,
+                                                                           (n_nodes_2d,
+                                                                            n_elements)),
+                                                              nvars))
+    for element in eachelement(dg, cache)
+        sk = 1
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, element)
+            u_extracted[sk, element] = u_node
+            sk += 1
+        end
     end
-  end
 
-  # interpolate to volume plotting points
-  xplot, yplot = plotting_interp_matrix*x, plotting_interp_matrix*y
-  uplot = StructArray{SVector{nvars, uEltype}}(map(x->plotting_interp_matrix*x,
-                                                   StructArrays.components(u_extracted)))
+    # interpolate to volume plotting points
+    xplot, yplot = plotting_interp_matrix * x, plotting_interp_matrix * y
+    uplot = StructArray{SVector{nvars, uEltype}}(map(x -> plotting_interp_matrix * x,
+                                                     StructArrays.components(u_extracted)))
 
-  xfp, yfp, ufp = mesh_plotting_wireframe(u_extracted, mesh, equations, dg, cache; nvisnodes=nvisnodes)
+    xfp, yfp, ufp = mesh_plotting_wireframe(u_extracted, mesh, equations, dg, cache;
+                                            nvisnodes = nvisnodes)
 
-  # convert variables based on solution_variables mapping
-  solution_variables_ = digest_solution_variables(equations, solution_variables)
-  variable_names = SVector(varnames(solution_variables_, equations))
+    # convert variables based on solution_variables mapping
+    solution_variables_ = digest_solution_variables(equations, solution_variables)
+    variable_names = SVector(varnames(solution_variables_, equations))
 
-  transform_to_solution_variables!(uplot, solution_variables_, equations)
-  transform_to_solution_variables!(ufp, solution_variables_, equations)
+    transform_to_solution_variables!(uplot, solution_variables_, equations)
+    transform_to_solution_variables!(ufp, solution_variables_, equations)
 
-  return PlotData2DTriangulated(xplot, yplot, uplot, t, xfp, yfp, ufp, variable_names)
+    return PlotData2DTriangulated(xplot, yplot, uplot, t, xfp, yfp, ufp, variable_names)
 end
 
 # Wrapper struct to indicate that an array represents a scalar data field. Used only for dispatch.
 struct ScalarData{T}
-  data::T
+    data::T
 end
 
 """
@@ -412,77 +443,77 @@ end
 Returns an `PlotData2DTriangulated` object which is used to visualize a single scalar field.
 `u` should be an array whose entries correspond to values of the scalar field at nodal points.
 """
-ScalarPlotData2D(u, semi::AbstractSemidiscretization; kwargs...) =
-  ScalarPlotData2D(u, mesh_equations_solver_cache(semi)...; kwargs...)
+function ScalarPlotData2D(u, semi::AbstractSemidiscretization; kwargs...)
+    ScalarPlotData2D(u, mesh_equations_solver_cache(semi)...; kwargs...)
+end
 
 # Returns an `PlotData2DTriangulated` which is used to visualize a single scalar field
 function ScalarPlotData2D(u, mesh, equations, dg::DGMulti, cache;
-                          variable_name=nothing, nvisnodes=2*nnodes(dg))
-
-  rd = dg.basis
-  md = mesh.md
+                          variable_name = nothing, nvisnodes = 2 * nnodes(dg))
+    rd = dg.basis
+    md = mesh.md
 
-  # Vp = the interpolation matrix from nodal points to plotting points
-  @unpack Vp = rd
+    # Vp = the interpolation matrix from nodal points to plotting points
+    @unpack Vp = rd
 
-  # interpolate nodal coordinates and solution field to plotting points
-  x_plot, y_plot = map(x->Vp * x, md.xyz) # md.xyz is a tuple of arrays containing nodal coordinates
-  u_plot = Vp * u
+    # interpolate nodal coordinates and solution field to plotting points
+    x_plot, y_plot = map(x -> Vp * x, md.xyz) # md.xyz is a tuple of arrays containing nodal coordinates
+    u_plot = Vp * u
 
-  # construct a triangulation of the reference plotting nodes
-  t = reference_plotting_triangulation(rd.rstp) # rd.rstp = reference coordinates of plotting points
+    # construct a triangulation of the reference plotting nodes
+    t = reference_plotting_triangulation(rd.rstp) # rd.rstp = reference coordinates of plotting points
 
-  # Ignore face data when plotting `ScalarPlotData2D`, since mesh lines can be plotted using
-  # existing functionality based on `PlotData2D(sol)`.
-  x_face, y_face, face_data = mesh_plotting_wireframe(ScalarData(u), mesh, equations, dg, cache;
-                                                      nvisnodes=2*nnodes(dg))
+    # Ignore face data when plotting `ScalarPlotData2D`, since mesh lines can be plotted using
+    # existing functionality based on `PlotData2D(sol)`.
+    x_face, y_face, face_data = mesh_plotting_wireframe(ScalarData(u), mesh, equations,
+                                                        dg, cache;
+                                                        nvisnodes = 2 * nnodes(dg))
 
-  # wrap solution in ScalarData struct for recipe dispatch
-  return PlotData2DTriangulated(x_plot, y_plot, ScalarData(u_plot), t,
-                                x_face, y_face, face_data, variable_name)
+    # wrap solution in ScalarData struct for recipe dispatch
+    return PlotData2DTriangulated(x_plot, y_plot, ScalarData(u_plot), t,
+                                  x_face, y_face, face_data, variable_name)
 end
 
-function ScalarPlotData2D(u, mesh, equations, dg::DGSEM, cache; variable_name=nothing, nvisnodes=2*nnodes(dg))
+function ScalarPlotData2D(u, mesh, equations, dg::DGSEM, cache; variable_name = nothing,
+                          nvisnodes = 2 * nnodes(dg))
+    n_nodes_2d = nnodes(dg)^ndims(mesh)
+    n_elements = nelements(dg, cache)
 
-  n_nodes_2d = nnodes(dg)^ndims(mesh)
-  n_elements = nelements(dg, cache)
+    # build nodes on reference element (seems to be the right ordering)
+    r, s = reference_node_coordinates_2d(dg)
 
-  # build nodes on reference element (seems to be the right ordering)
-  r, s = reference_node_coordinates_2d(dg)
-
-  # reference plotting nodes
-  if nvisnodes == 0 || nvisnodes === nothing
-    nvisnodes = polydeg(dg) + 1
-  end
-  plotting_interp_matrix = plotting_interpolation_matrix(dg; nvisnodes=nvisnodes)
-
-  # create triangulation for plotting nodes
-  r_plot, s_plot = (x->plotting_interp_matrix*x).((r, s)) # interpolate dg nodes to plotting nodes
+    # reference plotting nodes
+    if nvisnodes == 0 || nvisnodes === nothing
+        nvisnodes = polydeg(dg) + 1
+    end
+    plotting_interp_matrix = plotting_interpolation_matrix(dg; nvisnodes = nvisnodes)
 
-  # construct a triangulation of the plotting nodes
-  t = reference_plotting_triangulation((r_plot, s_plot))
+    # create triangulation for plotting nodes
+    r_plot, s_plot = (x -> plotting_interp_matrix * x).((r, s)) # interpolate dg nodes to plotting nodes
 
-  # extract x,y coordinates and reshape them into matrices of size (n_nodes_2d, n_elements)
-  x = view(cache.elements.node_coordinates, 1, :, :, :)
-  y = view(cache.elements.node_coordinates, 2, :, :, :)
-  x, y = reshape.((x, y), n_nodes_2d, n_elements)
+    # construct a triangulation of the plotting nodes
+    t = reference_plotting_triangulation((r_plot, s_plot))
 
-  # interpolate to volume plotting points by multiplying each column by `plotting_interp_matrix`
-  x_plot, y_plot = plotting_interp_matrix * x, plotting_interp_matrix * y
-  u_plot = plotting_interp_matrix * reshape(u, size(x))
+    # extract x,y coordinates and reshape them into matrices of size (n_nodes_2d, n_elements)
+    x = view(cache.elements.node_coordinates, 1, :, :, :)
+    y = view(cache.elements.node_coordinates, 2, :, :, :)
+    x, y = reshape.((x, y), n_nodes_2d, n_elements)
 
-  # Ignore face data when plotting `ScalarPlotData2D`, since mesh lines can be plotted using
-  # existing functionality based on `PlotData2D(sol)`.
-  x_face, y_face, face_data = mesh_plotting_wireframe(ScalarData(u), mesh, equations, dg, cache;
-                                                      nvisnodes=2*nnodes(dg))
+    # interpolate to volume plotting points by multiplying each column by `plotting_interp_matrix`
+    x_plot, y_plot = plotting_interp_matrix * x, plotting_interp_matrix * y
+    u_plot = plotting_interp_matrix * reshape(u, size(x))
 
+    # Ignore face data when plotting `ScalarPlotData2D`, since mesh lines can be plotted using
+    # existing functionality based on `PlotData2D(sol)`.
+    x_face, y_face, face_data = mesh_plotting_wireframe(ScalarData(u), mesh, equations,
+                                                        dg, cache;
+                                                        nvisnodes = 2 * nnodes(dg))
 
-  # wrap solution in ScalarData struct for recipe dispatch
-  return PlotData2DTriangulated(x_plot, y_plot, ScalarData(u_plot), t,
-                                x_face, y_face, face_data, variable_name)
+    # wrap solution in ScalarData struct for recipe dispatch
+    return PlotData2DTriangulated(x_plot, y_plot, ScalarData(u_plot), t,
+                                  x_face, y_face, face_data, variable_name)
 end
 
-
 """
     PlotData1D(u, semi [or mesh, equations, solver, cache];
                solution_variables=nothing, nvisnodes=nothing)
@@ -510,133 +541,149 @@ which define the curve. When using `curve` any other input from `slice` or `poin
 !!! warning "Experimental implementation"
     This is an experimental feature and may change in future releases.
 """
-PlotData1D(u_ode, semi; kwargs...) = PlotData1D(wrap_array_native(u_ode, semi),
-                                                mesh_equations_solver_cache(semi)...;
-                                                kwargs...)
+function PlotData1D(u_ode, semi; kwargs...)
+    PlotData1D(wrap_array_native(u_ode, semi),
+               mesh_equations_solver_cache(semi)...;
+               kwargs...)
+end
 
 function PlotData1D(u, mesh::TreeMesh, equations, solver, cache;
-                    solution_variables=nothing, nvisnodes=nothing,
-                    slice=:x, point=(0.0, 0.0, 0.0), curve=nothing)
-
-  solution_variables_ = digest_solution_variables(equations, solution_variables)
-  variable_names = SVector(varnames(solution_variables_, equations))
-
-  original_nodes = cache.elements.node_coordinates
-  unstructured_data = get_unstructured_data(u, solution_variables_, mesh, equations, solver, cache)
-
-  orientation_x = 0 # Set 'orientation' to zero on default.
-
-  if ndims(mesh) == 1
-    x, data, mesh_vertices_x = get_data_1d(original_nodes, unstructured_data, nvisnodes)
-    orientation_x = 1
-
-    # Special care is required for first-order FV approximations since the nodes are the
-    # cell centers and do not contain the boundaries
-    n_nodes = size(unstructured_data, 1)
-    if n_nodes == 1
-      n_visnodes = length(x) ÷ nelements(solver, cache)
-      if n_visnodes != 2
-        throw(ArgumentError("This number of visualization nodes is currently not supported for finite volume approximations."))
-      end
-      left_boundary = mesh.tree.center_level_0[1] - mesh.tree.length_level_0 / 2
-      dx_2 = zero(left_boundary)
-      for i in 1:div(length(x), 2)
-        # Adjust plot nodes so that they are at the boundaries of each element
-        dx_2 = x[2 * i - 1] - left_boundary
-        x[2 * i - 1] -= dx_2
-        x[2 * i    ] += dx_2
-        left_boundary = left_boundary+ 2 * dx_2
-
-        # Adjust mesh plot nodes
-        mesh_vertices_x[i] -= dx_2
-      end
-      mesh_vertices_x[end] += dx_2
-    end
-  elseif ndims(mesh) == 2
-    if curve !== nothing
-      x, data, mesh_vertices_x = unstructured_2d_to_1d_curve(original_nodes, unstructured_data, nvisnodes, curve, mesh, solver, cache)
-    else
-      x, data, mesh_vertices_x = unstructured_2d_to_1d(original_nodes, unstructured_data, nvisnodes, slice, point)
+                    solution_variables = nothing, nvisnodes = nothing,
+                    slice = :x, point = (0.0, 0.0, 0.0), curve = nothing)
+    solution_variables_ = digest_solution_variables(equations, solution_variables)
+    variable_names = SVector(varnames(solution_variables_, equations))
+
+    original_nodes = cache.elements.node_coordinates
+    unstructured_data = get_unstructured_data(u, solution_variables_, mesh, equations,
+                                              solver, cache)
+
+    orientation_x = 0 # Set 'orientation' to zero on default.
+
+    if ndims(mesh) == 1
+        x, data, mesh_vertices_x = get_data_1d(original_nodes, unstructured_data,
+                                               nvisnodes)
+        orientation_x = 1
+
+        # Special care is required for first-order FV approximations since the nodes are the
+        # cell centers and do not contain the boundaries
+        n_nodes = size(unstructured_data, 1)
+        if n_nodes == 1
+            n_visnodes = length(x) ÷ nelements(solver, cache)
+            if n_visnodes != 2
+                throw(ArgumentError("This number of visualization nodes is currently not supported for finite volume approximations."))
+            end
+            left_boundary = mesh.tree.center_level_0[1] - mesh.tree.length_level_0 / 2
+            dx_2 = zero(left_boundary)
+            for i in 1:div(length(x), 2)
+                # Adjust plot nodes so that they are at the boundaries of each element
+                dx_2 = x[2 * i - 1] - left_boundary
+                x[2 * i - 1] -= dx_2
+                x[2 * i] += dx_2
+                left_boundary = left_boundary + 2 * dx_2
+
+                # Adjust mesh plot nodes
+                mesh_vertices_x[i] -= dx_2
+            end
+            mesh_vertices_x[end] += dx_2
+        end
+    elseif ndims(mesh) == 2
+        if curve !== nothing
+            x, data, mesh_vertices_x = unstructured_2d_to_1d_curve(original_nodes,
+                                                                   unstructured_data,
+                                                                   nvisnodes, curve,
+                                                                   mesh, solver, cache)
+        else
+            x, data, mesh_vertices_x = unstructured_2d_to_1d(original_nodes,
+                                                             unstructured_data,
+                                                             nvisnodes, slice, point)
+        end
+    else # ndims(mesh) == 3
+        if curve !== nothing
+            x, data, mesh_vertices_x = unstructured_3d_to_1d_curve(original_nodes,
+                                                                   unstructured_data,
+                                                                   nvisnodes, curve,
+                                                                   mesh, solver, cache)
+        else
+            x, data, mesh_vertices_x = unstructured_3d_to_1d(original_nodes,
+                                                             unstructured_data,
+                                                             nvisnodes, slice, point)
+        end
     end
-  else # ndims(mesh) == 3
-    if curve !== nothing
-      x, data, mesh_vertices_x = unstructured_3d_to_1d_curve(original_nodes, unstructured_data, nvisnodes, curve, mesh, solver, cache)
-    else
-      x, data, mesh_vertices_x = unstructured_3d_to_1d(original_nodes, unstructured_data, nvisnodes, slice, point)
-    end
-  end
 
-  return PlotData1D(x, data, variable_names, mesh_vertices_x,
-                    orientation_x)
+    return PlotData1D(x, data, variable_names, mesh_vertices_x,
+                      orientation_x)
 end
 
 function PlotData1D(u, mesh, equations, solver, cache;
-                    solution_variables=nothing, nvisnodes=nothing,
-                    slice=:x, point=(0.0, 0.0, 0.0), curve=nothing)
-
-  solution_variables_ = digest_solution_variables(equations, solution_variables)
-  variable_names = SVector(varnames(solution_variables_, equations))
-
-  original_nodes = cache.elements.node_coordinates
-  unstructured_data = get_unstructured_data(u, solution_variables_, mesh, equations, solver, cache)
-
-  orientation_x = 0 # Set 'orientation' to zero on default.
-
-  if ndims(mesh) == 1
-    x, data, mesh_vertices_x = get_data_1d(original_nodes, unstructured_data, nvisnodes)
-    orientation_x = 1
-  elseif ndims(mesh) == 2
-    # Create a 'PlotData2DTriangulated' object so a triangulation can be used when extracting relevant data.
-    pd = PlotData2DTriangulated(u, mesh, equations, solver, cache; solution_variables, nvisnodes)
-    x, data, mesh_vertices_x = unstructured_2d_to_1d_curve(pd, curve, slice, point, nvisnodes)
-  else # ndims(mesh) == 3
-    # Extract the information required to create a PlotData1D object.
-    x, data, mesh_vertices_x = unstructured_3d_to_1d_curve(original_nodes, u, curve, slice, point, nvisnodes)
-  end
+                    solution_variables = nothing, nvisnodes = nothing,
+                    slice = :x, point = (0.0, 0.0, 0.0), curve = nothing)
+    solution_variables_ = digest_solution_variables(equations, solution_variables)
+    variable_names = SVector(varnames(solution_variables_, equations))
+
+    original_nodes = cache.elements.node_coordinates
+    unstructured_data = get_unstructured_data(u, solution_variables_, mesh, equations,
+                                              solver, cache)
+
+    orientation_x = 0 # Set 'orientation' to zero on default.
+
+    if ndims(mesh) == 1
+        x, data, mesh_vertices_x = get_data_1d(original_nodes, unstructured_data,
+                                               nvisnodes)
+        orientation_x = 1
+    elseif ndims(mesh) == 2
+        # Create a 'PlotData2DTriangulated' object so a triangulation can be used when extracting relevant data.
+        pd = PlotData2DTriangulated(u, mesh, equations, solver, cache;
+                                    solution_variables, nvisnodes)
+        x, data, mesh_vertices_x = unstructured_2d_to_1d_curve(pd, curve, slice, point,
+                                                               nvisnodes)
+    else # ndims(mesh) == 3
+        # Extract the information required to create a PlotData1D object.
+        x, data, mesh_vertices_x = unstructured_3d_to_1d_curve(original_nodes, u, curve,
+                                                               slice, point, nvisnodes)
+    end
 
-  return PlotData1D(x, data, variable_names, mesh_vertices_x,
-                    orientation_x)
+    return PlotData1D(x, data, variable_names, mesh_vertices_x,
+                      orientation_x)
 end
 
 # Specializes the `PlotData1D` constructor for one-dimensional `DGMulti` solvers.
 function PlotData1D(u, mesh, equations, dg::DGMulti{1}, cache;
-                    solution_variables=nothing)
-
-  solution_variables_ = digest_solution_variables(equations, solution_variables)
-  variable_names = SVector(varnames(solution_variables_, equations))
-
-  orientation_x = 0 # Set 'orientation' to zero on default.
-
-  if u isa StructArray
-    # Convert conserved variables to the given `solution_variables` and set up
-    # plotting coordinates
-    # This uses a "structure of arrays"
-    data = map(x -> vcat(dg.basis.Vp * x, fill(NaN, 1, size(u, 2))),
-              StructArrays.components(solution_variables_.(u, equations)))
-    x = vcat(dg.basis.Vp * mesh.md.x, fill(NaN, 1, size(u, 2)))
-
-    # Here, we ensure that `DGMulti` visualization uses the same data layout and format
-    # as `TreeMesh`. This enables us to reuse existing plot recipes. In particular,
-    # `hcat(data...)` creates a matrix of size `num_plotting_points` by `nvariables(equations)`,
-    # with data on different elements separated by `NaNs`.
-    x_plot = vec(x)
-    data_plot = hcat(vec.(data)...)
-  else
-    # Convert conserved variables to the given `solution_variables` and set up
-    # plotting coordinates
-    # This uses an "array of structures"
-    data_tmp = dg.basis.Vp * solution_variables_.(u, equations)
-    data = vcat(data_tmp, fill(NaN * zero(eltype(data_tmp)), 1, size(u, 2)))
-    x = vcat(dg.basis.Vp * mesh.md.x, fill(NaN, 1, size(u, 2)))
-
-    # Same as above - we create `data_plot` as array of size `num_plotting_points`
-    # by "number of plotting variables".
-    x_plot = vec(x)
-    data_plot = permutedims(reinterpret(reshape, eltype(eltype(data)), vec(data)),
-      (2, 1))
-  end
-
-  return PlotData1D(x_plot, data_plot, variable_names, mesh.md.VX, orientation_x)
+                    solution_variables = nothing)
+    solution_variables_ = digest_solution_variables(equations, solution_variables)
+    variable_names = SVector(varnames(solution_variables_, equations))
+
+    orientation_x = 0 # Set 'orientation' to zero on default.
+
+    if u isa StructArray
+        # Convert conserved variables to the given `solution_variables` and set up
+        # plotting coordinates
+        # This uses a "structure of arrays"
+        data = map(x -> vcat(dg.basis.Vp * x, fill(NaN, 1, size(u, 2))),
+                   StructArrays.components(solution_variables_.(u, equations)))
+        x = vcat(dg.basis.Vp * mesh.md.x, fill(NaN, 1, size(u, 2)))
+
+        # Here, we ensure that `DGMulti` visualization uses the same data layout and format
+        # as `TreeMesh`. This enables us to reuse existing plot recipes. In particular,
+        # `hcat(data...)` creates a matrix of size `num_plotting_points` by `nvariables(equations)`,
+        # with data on different elements separated by `NaNs`.
+        x_plot = vec(x)
+        data_plot = hcat(vec.(data)...)
+    else
+        # Convert conserved variables to the given `solution_variables` and set up
+        # plotting coordinates
+        # This uses an "array of structures"
+        data_tmp = dg.basis.Vp * solution_variables_.(u, equations)
+        data = vcat(data_tmp, fill(NaN * zero(eltype(data_tmp)), 1, size(u, 2)))
+        x = vcat(dg.basis.Vp * mesh.md.x, fill(NaN, 1, size(u, 2)))
+
+        # Same as above - we create `data_plot` as array of size `num_plotting_points`
+        # by "number of plotting variables".
+        x_plot = vec(x)
+        data_plot = permutedims(reinterpret(reshape, eltype(eltype(data)), vec(data)),
+                                (2, 1))
+    end
+
+    return PlotData1D(x_plot, data_plot, variable_names, mesh.md.VX, orientation_x)
 end
 
 """
@@ -649,26 +696,27 @@ Create a `PlotData1D` object from a solution object created by either `OrdinaryD
 !!! warning "Experimental implementation"
     This is an experimental feature and may change in future releases.
 """
-PlotData1D(sol::TrixiODESolution; kwargs...) = PlotData1D(sol.u[end], sol.prob.p; kwargs...)
+function PlotData1D(sol::TrixiODESolution; kwargs...)
+    PlotData1D(sol.u[end], sol.prob.p; kwargs...)
+end
 
 function PlotData1D(time_series_callback::TimeSeriesCallback, point_id::Integer)
-  @unpack time, variable_names, point_data = time_series_callback
+    @unpack time, variable_names, point_data = time_series_callback
 
-  n_solution_variables = length(variable_names)
-  data = Matrix{Float64}(undef, length(time), n_solution_variables)
-  reshaped = reshape(point_data[point_id], n_solution_variables, length(time))
-  for v in 1:n_solution_variables
-    @views data[:, v] = reshaped[v, :]
-  end
+    n_solution_variables = length(variable_names)
+    data = Matrix{Float64}(undef, length(time), n_solution_variables)
+    reshaped = reshape(point_data[point_id], n_solution_variables, length(time))
+    for v in 1:n_solution_variables
+        @views data[:, v] = reshaped[v, :]
+    end
 
-  mesh_vertices_x = Vector{Float64}(undef, 0)
+    mesh_vertices_x = Vector{Float64}(undef, 0)
 
-  return PlotData1D(time, data, SVector(variable_names), mesh_vertices_x, 0)
+    return PlotData1D(time, data, SVector(variable_names), mesh_vertices_x, 0)
 end
 
-function PlotData1D(cb::DiscreteCallback{<:Any, <:TimeSeriesCallback}, point_id::Integer)
-  return PlotData1D(cb.affect!, point_id)
+function PlotData1D(cb::DiscreteCallback{<:Any, <:TimeSeriesCallback},
+                    point_id::Integer)
+    return PlotData1D(cb.affect!, point_id)
 end
-
-
 end # @muladd
diff --git a/src/visualization/utilities.jl b/src/visualization/utilities.jl
index ba589073b92..05457395ac0 100644
--- a/src/visualization/utilities.jl
+++ b/src/visualization/utilities.jl
@@ -3,6 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 @inline num_faces(elem::Tri) = 3
 @inline num_faces(elem::Quad) = 4
@@ -13,7 +14,7 @@
 # using the [Shoelace_formula](https://en.wikipedia.org/wiki/Shoelace_formula).
 function compute_triangle_area(tri)
     A, B, C = tri
-    return 0.5 * (A[1] * (B[2] - C[2]) + B[1] * (C[2]-A[2]) + C[1] * (A[2] - B[2]))
+    return 0.5 * (A[1] * (B[2] - C[2]) + B[1] * (C[2] - A[2]) + C[1] * (A[2] - B[2]))
 end
 
 #   reference_plotting_triangulation(reference_plotting_coordinates)
@@ -26,32 +27,33 @@ end
 # triangulation of the plotting points, with zero-volume triangles removed.
 #
 # For example, r[t[1, i]] returns the first reference coordinate of the 1st point on the ith triangle.
-function reference_plotting_triangulation(reference_plotting_coordinates, tol=50*eps())
-  # on-the-fly triangulation of plotting nodes on the reference element
-  tri_in = Triangulate.TriangulateIO()
-  tri_in.pointlist = permutedims(hcat(reference_plotting_coordinates...))
-  tri_out, _ = Triangulate.triangulate("Q", tri_in)
-  triangles = tri_out.trianglelist
-
-  # filter out sliver triangles
-  has_volume = fill(true, size(triangles, 2))
-  for i in axes(triangles, 2)
-      ids = @view triangles[:, i]
-      x_points = @view tri_out.pointlist[1, ids]
-      y_points = @view tri_out.pointlist[2, ids]
-      area = compute_triangle_area(zip(x_points, y_points))
-      if abs(area) < tol
-          has_volume[i] = false
-      end
-  end
-  return permutedims(triangles[:, findall(has_volume)])
+function reference_plotting_triangulation(reference_plotting_coordinates,
+                                          tol = 50 * eps())
+    # on-the-fly triangulation of plotting nodes on the reference element
+    tri_in = Triangulate.TriangulateIO()
+    tri_in.pointlist = permutedims(hcat(reference_plotting_coordinates...))
+    tri_out, _ = Triangulate.triangulate("Q", tri_in)
+    triangles = tri_out.trianglelist
+
+    # filter out sliver triangles
+    has_volume = fill(true, size(triangles, 2))
+    for i in axes(triangles, 2)
+        ids = @view triangles[:, i]
+        x_points = @view tri_out.pointlist[1, ids]
+        y_points = @view tri_out.pointlist[2, ids]
+        area = compute_triangle_area(zip(x_points, y_points))
+        if abs(area) < tol
+            has_volume[i] = false
+        end
+    end
+    return permutedims(triangles[:, findall(has_volume)])
 end
 
 # This function is used to avoid type instabilities when calling `digest_solution_variables`.
 function transform_to_solution_variables!(u, solution_variables, equations)
-  for (i, u_i) in enumerate(u)
-    u[i] = solution_variables(u_i, equations)
-  end
+    for (i, u_i) in enumerate(u)
+        u[i] = solution_variables(u_i, equations)
+    end
 end
 
 #     global_plotting_triangulation_triplot(u_plot, rst_plot, xyz_plot)
@@ -64,174 +66,196 @@ end
 #   - u_plot = matrix of size (Nplot, K) representing solution to plot.
 #   - t = triangulation of reference plotting points
 function global_plotting_triangulation_triplot(xyz_plot, u_plot, t)
-
-  @assert size(first(xyz_plot), 1) == size(u_plot, 1) "Row dimension of u_plot does not match row dimension of xyz_plot"
-
-  # build discontinuous data on plotting triangular mesh
-  num_plotting_points, num_elements = size(u_plot)
-  num_reference_plotting_triangles = size(t, 1)
-  num_plotting_elements_total = num_reference_plotting_triangles * num_elements
-
-  # each column of `tp` corresponds to a vertex of a plotting triangle
-  tp = zeros(Int32, 3, num_plotting_elements_total)
-  zp = similar(tp, eltype(u_plot))
-  for e = 1:num_elements
-    for i = 1:num_reference_plotting_triangles
-      tp[:, i + (e-1)*num_reference_plotting_triangles] .= @views t[i, :] .+ (e-1) * num_plotting_points
-      zp[:, i + (e-1)*num_reference_plotting_triangles] .= @views u_plot[t[i, :], e]
+    @assert size(first(xyz_plot), 1)==size(u_plot, 1) "Row dimension of u_plot does not match row dimension of xyz_plot"
+
+    # build discontinuous data on plotting triangular mesh
+    num_plotting_points, num_elements = size(u_plot)
+    num_reference_plotting_triangles = size(t, 1)
+    num_plotting_elements_total = num_reference_plotting_triangles * num_elements
+
+    # each column of `tp` corresponds to a vertex of a plotting triangle
+    tp = zeros(Int32, 3, num_plotting_elements_total)
+    zp = similar(tp, eltype(u_plot))
+    for e in 1:num_elements
+        for i in 1:num_reference_plotting_triangles
+            tp[:, i + (e - 1) * num_reference_plotting_triangles] .= @views t[i, :] .+
+                                                                            (e - 1) *
+                                                                            num_plotting_points
+            zp[:, i + (e - 1) * num_reference_plotting_triangles] .= @views u_plot[t[i,
+                                                                                     :],
+                                                                                   e]
+        end
     end
-  end
-  return vec.(xyz_plot)..., zp, tp
+    return vec.(xyz_plot)..., zp, tp
 end
 
-function get_face_node_indices(r, s, dg::DGSEM, tol=100*eps())
-  face_1 = findall(@. abs(s+1) < tol)
-  face_2 = findall(@. abs(r-1) < tol)
-  face_3 = findall(@. abs(s-1) < tol)
-  face_4 = findall(@. abs(r+1) < tol)
-  Fmask = hcat(face_1, face_2, face_3, face_4)
-  return Fmask
+function get_face_node_indices(r, s, dg::DGSEM, tol = 100 * eps())
+    face_1 = findall(@. abs(s + 1) < tol)
+    face_2 = findall(@. abs(r - 1) < tol)
+    face_3 = findall(@. abs(s - 1) < tol)
+    face_4 = findall(@. abs(r + 1) < tol)
+    Fmask = hcat(face_1, face_2, face_3, face_4)
+    return Fmask
 end
 
 # dispatch on semi
-mesh_plotting_wireframe(u, semi) = mesh_plotting_wireframe(u, mesh_equations_solver_cache(semi)...)
+function mesh_plotting_wireframe(u, semi)
+    mesh_plotting_wireframe(u, mesh_equations_solver_cache(semi)...)
+end
 
 #     mesh_plotting_wireframe(u, mesh, equations, dg::DGMulti, cache; num_plotting_pts=25)
 #
 # Generates data for plotting a mesh wireframe given StartUpDG data types.
 # Returns (plotting_coordinates_x, plotting_coordinates_y, nothing) for a 2D mesh wireframe.
 function mesh_plotting_wireframe(u::StructArray, mesh, equations, dg::DGMulti, cache;
-                                 nvisnodes=2*nnodes(dg))
-  @unpack md = mesh
-  rd = dg.basis
-
-  # Construct 1D plotting interpolation matrix `Vp1D` for a single face
-  @unpack N, Fmask = rd
-  num_face_points = length(Fmask) ÷ num_faces(rd.element_type)
-  vandermonde_matrix_1D = StartUpDG.vandermonde(Line(), N, StartUpDG.nodes(Line(), num_face_points - 1))
-  rplot = LinRange(-1, 1, nvisnodes)
-  Vp1D = StartUpDG.vandermonde(Line(), N, rplot) / vandermonde_matrix_1D
-
-  num_faces_total = num_faces(rd.element_type) * md.num_elements
-  xf, yf = map(x->reshape(view(x, Fmask, :), num_face_points, num_faces_total), md.xyz)
-  uf = similar(u, size(xf))
-  apply_to_each_field((out, x)->out .= reshape(view(x, Fmask, :), num_face_points, num_faces_total), uf, u)
-
-  num_face_plotting_points = size(Vp1D, 1)
-  x_mesh, y_mesh = ntuple(_->zeros(num_face_plotting_points, num_faces_total), 2)
-  u_mesh = similar(u, (num_face_plotting_points, num_faces_total))
-  for f in 1:num_faces_total
-    mul!(view(x_mesh, :, f), Vp1D, view(xf, :, f))
-    mul!(view(y_mesh, :, f), Vp1D, view(yf, :, f))
-    apply_to_each_field(mul_by!(Vp1D), view(u_mesh, :, f), view(uf, :, f))
-  end
-
-  return x_mesh, y_mesh, u_mesh
-end
-
-function mesh_plotting_wireframe(u::StructArray, mesh, equations, dg::DGSEM, cache; nvisnodes=2*nnodes(dg))
-
-  # build nodes on reference element (seems to be the right ordering)
-  r, s = reference_node_coordinates_2d(dg)
-
-  # extract node coordinates
-  uEltype = eltype(first(u))
-  nvars = nvariables(equations)
-  n_nodes_2d = nnodes(dg)^ndims(mesh)
-  n_elements = nelements(dg, cache)
-  x = reshape(view(cache.elements.node_coordinates, 1, :, :, :), n_nodes_2d, n_elements)
-  y = reshape(view(cache.elements.node_coordinates, 2, :, :, :), n_nodes_2d, n_elements)
-
-  # extract indices of local face nodes for wireframe plotting
-  Fmask = get_face_node_indices(r, s, dg)
-  plotting_interp_matrix1D = face_plotting_interpolation_matrix(dg; nvisnodes=nvisnodes)
-
-  # These 5 lines extract the face values on each element from the arrays x,y,sol_to_plot.
-  # The resulting arrays are then reshaped so that xf, yf, sol_f are Matrix types of size
-  # (Number of face plotting nodes) x (Number of faces).
-  function face_first_reshape(x, num_nodes_1D, num_nodes, num_elements)
-      num_reference_faces = 2 * ndims(mesh)
-      xf = view(reshape(x, num_nodes, num_elements), vec(Fmask), :)
-      return reshape(xf, num_nodes_1D, num_elements * num_reference_faces)
-  end
-  reshape_and_interpolate(x) = plotting_interp_matrix1D * face_first_reshape(x, nnodes(dg), n_nodes_2d, n_elements)
-  xfp, yfp = map(reshape_and_interpolate, (x, y))
-  ufp = StructArray{SVector{nvars, uEltype}}(map(reshape_and_interpolate, StructArrays.components(u)))
-
-  return xfp, yfp, ufp
-end
+                                 nvisnodes = 2 * nnodes(dg))
+    @unpack md = mesh
+    rd = dg.basis
+
+    # Construct 1D plotting interpolation matrix `Vp1D` for a single face
+    @unpack N, Fmask = rd
+    num_face_points = length(Fmask) ÷ num_faces(rd.element_type)
+    vandermonde_matrix_1D = StartUpDG.vandermonde(Line(), N,
+                                                  StartUpDG.nodes(Line(),
+                                                                  num_face_points - 1))
+    rplot = LinRange(-1, 1, nvisnodes)
+    Vp1D = StartUpDG.vandermonde(Line(), N, rplot) / vandermonde_matrix_1D
+
+    num_faces_total = num_faces(rd.element_type) * md.num_elements
+    xf, yf = map(x -> reshape(view(x, Fmask, :), num_face_points, num_faces_total),
+                 md.xyz)
+    uf = similar(u, size(xf))
+    apply_to_each_field((out, x) -> out .= reshape(view(x, Fmask, :), num_face_points,
+                                                   num_faces_total), uf, u)
+
+    num_face_plotting_points = size(Vp1D, 1)
+    x_mesh, y_mesh = ntuple(_ -> zeros(num_face_plotting_points, num_faces_total), 2)
+    u_mesh = similar(u, (num_face_plotting_points, num_faces_total))
+    for f in 1:num_faces_total
+        mul!(view(x_mesh, :, f), Vp1D, view(xf, :, f))
+        mul!(view(y_mesh, :, f), Vp1D, view(yf, :, f))
+        apply_to_each_field(mul_by!(Vp1D), view(u_mesh, :, f), view(uf, :, f))
+    end
 
-function mesh_plotting_wireframe(u::ScalarData, mesh, equations, dg::DGSEM, cache; nvisnodes=2*nnodes(dg))
-
-  # build nodes on reference element (seems to be the right ordering)
-  r, s = reference_node_coordinates_2d(dg)
-
-  # extract node coordinates
-  n_nodes_2d = nnodes(dg)^ndims(mesh)
-  n_elements = nelements(dg, cache)
-  x = reshape(view(cache.elements.node_coordinates, 1, :, :, :), n_nodes_2d, n_elements)
-  y = reshape(view(cache.elements.node_coordinates, 2, :, :, :), n_nodes_2d, n_elements)
-
-  # extract indices of local face nodes for wireframe plotting
-  Fmask = get_face_node_indices(r, s, dg)
-  plotting_interp_matrix1D = face_plotting_interpolation_matrix(dg; nvisnodes=nvisnodes)
-
-  # These 5 lines extract the face values on each element from the arrays x,y,sol_to_plot.
-  # The resulting arrays are then reshaped so that xf, yf, sol_f are Matrix types of size
-  # (Number of face plotting nodes) x (Number of faces).
-  function face_first_reshape(x, num_nodes_1D, num_nodes, num_elements)
-      num_reference_faces = 2 * ndims(mesh)
-      xf = view(reshape(x, num_nodes, num_elements), vec(Fmask), :)
-      return reshape(xf, num_nodes_1D, num_elements * num_reference_faces)
-  end
-  reshape_and_interpolate(x) = plotting_interp_matrix1D * face_first_reshape(x, nnodes(dg), n_nodes_2d, n_elements)
-  xfp, yfp, ufp = map(reshape_and_interpolate, (x, y, u.data))
-
-  return xfp, yfp, ufp
+    return x_mesh, y_mesh, u_mesh
 end
 
-function mesh_plotting_wireframe(u::ScalarData, mesh, equations, dg::DGMulti, cache; nvisnodes=2*nnodes(dg))
-
-  @unpack md = mesh
-  rd = dg.basis
-
-  # Construct 1D plotting interpolation matrix `Vp1D` for a single face
-  @unpack N, Fmask = rd
-  vandermonde_matrix_1D = StartUpDG.vandermonde(Line(), N, StartUpDG.nodes(Line(), N))
-  rplot = LinRange(-1, 1, nvisnodes)
-  Vp1D = StartUpDG.vandermonde(Line(), N, rplot) / vandermonde_matrix_1D
+function mesh_plotting_wireframe(u::StructArray, mesh, equations, dg::DGSEM, cache;
+                                 nvisnodes = 2 * nnodes(dg))
+
+    # build nodes on reference element (seems to be the right ordering)
+    r, s = reference_node_coordinates_2d(dg)
+
+    # extract node coordinates
+    uEltype = eltype(first(u))
+    nvars = nvariables(equations)
+    n_nodes_2d = nnodes(dg)^ndims(mesh)
+    n_elements = nelements(dg, cache)
+    x = reshape(view(cache.elements.node_coordinates, 1, :, :, :), n_nodes_2d,
+                n_elements)
+    y = reshape(view(cache.elements.node_coordinates, 2, :, :, :), n_nodes_2d,
+                n_elements)
+
+    # extract indices of local face nodes for wireframe plotting
+    Fmask = get_face_node_indices(r, s, dg)
+    plotting_interp_matrix1D = face_plotting_interpolation_matrix(dg;
+                                                                  nvisnodes = nvisnodes)
+
+    # These 5 lines extract the face values on each element from the arrays x,y,sol_to_plot.
+    # The resulting arrays are then reshaped so that xf, yf, sol_f are Matrix types of size
+    # (Number of face plotting nodes) x (Number of faces).
+    function face_first_reshape(x, num_nodes_1D, num_nodes, num_elements)
+        num_reference_faces = 2 * ndims(mesh)
+        xf = view(reshape(x, num_nodes, num_elements), vec(Fmask), :)
+        return reshape(xf, num_nodes_1D, num_elements * num_reference_faces)
+    end
+    function reshape_and_interpolate(x)
+        plotting_interp_matrix1D *
+        face_first_reshape(x, nnodes(dg), n_nodes_2d, n_elements)
+    end
+    xfp, yfp = map(reshape_and_interpolate, (x, y))
+    ufp = StructArray{SVector{nvars, uEltype}}(map(reshape_and_interpolate,
+                                                   StructArrays.components(u)))
 
-  num_face_points = N+1
-  num_faces_total = num_faces(rd.element_type) * md.num_elements
-  xf, yf, uf = map(x->reshape(view(x, Fmask, :), num_face_points, num_faces_total), (md.xyz..., u.data))
+    return xfp, yfp, ufp
+end
 
-  num_face_plotting_points = size(Vp1D, 1)
-  x_mesh, y_mesh = ntuple(_->zeros(num_face_plotting_points, num_faces_total), 2)
-  u_mesh = similar(u.data, (num_face_plotting_points, num_faces_total))
-  for f in 1:num_faces_total
-    mul!(view(x_mesh, :, f), Vp1D, view(xf, :, f))
-    mul!(view(y_mesh, :, f), Vp1D, view(yf, :, f))
-    mul!(view(u_mesh, :, f), Vp1D, view(uf, :, f))
-  end
+function mesh_plotting_wireframe(u::ScalarData, mesh, equations, dg::DGSEM, cache;
+                                 nvisnodes = 2 * nnodes(dg))
+
+    # build nodes on reference element (seems to be the right ordering)
+    r, s = reference_node_coordinates_2d(dg)
+
+    # extract node coordinates
+    n_nodes_2d = nnodes(dg)^ndims(mesh)
+    n_elements = nelements(dg, cache)
+    x = reshape(view(cache.elements.node_coordinates, 1, :, :, :), n_nodes_2d,
+                n_elements)
+    y = reshape(view(cache.elements.node_coordinates, 2, :, :, :), n_nodes_2d,
+                n_elements)
+
+    # extract indices of local face nodes for wireframe plotting
+    Fmask = get_face_node_indices(r, s, dg)
+    plotting_interp_matrix1D = face_plotting_interpolation_matrix(dg;
+                                                                  nvisnodes = nvisnodes)
+
+    # These 5 lines extract the face values on each element from the arrays x,y,sol_to_plot.
+    # The resulting arrays are then reshaped so that xf, yf, sol_f are Matrix types of size
+    # (Number of face plotting nodes) x (Number of faces).
+    function face_first_reshape(x, num_nodes_1D, num_nodes, num_elements)
+        num_reference_faces = 2 * ndims(mesh)
+        xf = view(reshape(x, num_nodes, num_elements), vec(Fmask), :)
+        return reshape(xf, num_nodes_1D, num_elements * num_reference_faces)
+    end
+    function reshape_and_interpolate(x)
+        plotting_interp_matrix1D *
+        face_first_reshape(x, nnodes(dg), n_nodes_2d, n_elements)
+    end
+    xfp, yfp, ufp = map(reshape_and_interpolate, (x, y, u.data))
 
-  return x_mesh, y_mesh, u_mesh
+    return xfp, yfp, ufp
 end
 
+function mesh_plotting_wireframe(u::ScalarData, mesh, equations, dg::DGMulti, cache;
+                                 nvisnodes = 2 * nnodes(dg))
+    @unpack md = mesh
+    rd = dg.basis
+
+    # Construct 1D plotting interpolation matrix `Vp1D` for a single face
+    @unpack N, Fmask = rd
+    vandermonde_matrix_1D = StartUpDG.vandermonde(Line(), N, StartUpDG.nodes(Line(), N))
+    rplot = LinRange(-1, 1, nvisnodes)
+    Vp1D = StartUpDG.vandermonde(Line(), N, rplot) / vandermonde_matrix_1D
+
+    num_face_points = N + 1
+    num_faces_total = num_faces(rd.element_type) * md.num_elements
+    xf, yf, uf = map(x -> reshape(view(x, Fmask, :), num_face_points, num_faces_total),
+                     (md.xyz..., u.data))
+
+    num_face_plotting_points = size(Vp1D, 1)
+    x_mesh, y_mesh = ntuple(_ -> zeros(num_face_plotting_points, num_faces_total), 2)
+    u_mesh = similar(u.data, (num_face_plotting_points, num_faces_total))
+    for f in 1:num_faces_total
+        mul!(view(x_mesh, :, f), Vp1D, view(xf, :, f))
+        mul!(view(y_mesh, :, f), Vp1D, view(yf, :, f))
+        mul!(view(u_mesh, :, f), Vp1D, view(uf, :, f))
+    end
 
+    return x_mesh, y_mesh, u_mesh
+end
 
 # These methods are used internally to set the default value of the solution variables:
 # - If a `cons2prim` for the given `equations` exists, use it
 # - Otherwise, use `cons2cons`, which is defined for all systems of equations
 digest_solution_variables(equations, solution_variables) = solution_variables
 function digest_solution_variables(equations, solution_variables::Nothing)
-  if hasmethod(cons2prim, Tuple{AbstractVector, typeof(equations)})
-    return cons2prim
-  else
-    return cons2cons
-  end
+    if hasmethod(cons2prim, Tuple{AbstractVector, typeof(equations)})
+        return cons2prim
+    else
+        return cons2cons
+    end
 end
 
-
 """
     adapt_to_mesh_level!(u_ode, semi, level)
     adapt_to_mesh_level!(sol::Trixi.TrixiODESolution, level)
@@ -240,21 +264,23 @@ Like [`adapt_to_mesh_level`](@ref), but modifies the solution and parts of the
 semidiscretization (mesh and caches) in place.
 """
 function adapt_to_mesh_level!(u_ode, semi, level)
-  # Create AMR callback with controller that refines everything towards a single level
-  amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable=first), base_level=level)
-  amr_callback = AMRCallback(semi, amr_controller, interval=0)
+    # Create AMR callback with controller that refines everything towards a single level
+    amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable = first),
+                                          base_level = level)
+    amr_callback = AMRCallback(semi, amr_controller, interval = 0)
 
-  # Adapt mesh until it does not change anymore
-  has_changed = amr_callback.affect!(u_ode, semi, 0.0, 0)
-  while has_changed
+    # Adapt mesh until it does not change anymore
     has_changed = amr_callback.affect!(u_ode, semi, 0.0, 0)
-  end
+    while has_changed
+        has_changed = amr_callback.affect!(u_ode, semi, 0.0, 0)
+    end
 
-  return u_ode, semi
+    return u_ode, semi
 end
 
-adapt_to_mesh_level!(sol::TrixiODESolution, level) = adapt_to_mesh_level!(sol.u[end], sol.prob.p, level)
-
+function adapt_to_mesh_level!(sol::TrixiODESolution, level)
+    adapt_to_mesh_level!(sol.u[end], sol.prob.p, level)
+end
 
 """
     adapt_to_mesh_level(u_ode, semi, level)
@@ -270,15 +296,16 @@ extracted as needed.
 See also: [`adapt_to_mesh_level!`](@ref)
 """
 function adapt_to_mesh_level(u_ode, semi, level)
-  # Create new semidiscretization with copy of the current mesh
-  mesh, _, _, _ = mesh_equations_solver_cache(semi)
-  new_semi = remake(semi, mesh=deepcopy(mesh))
+    # Create new semidiscretization with copy of the current mesh
+    mesh, _, _, _ = mesh_equations_solver_cache(semi)
+    new_semi = remake(semi, mesh = deepcopy(mesh))
 
-  return adapt_to_mesh_level!(deepcopy(u_ode), new_semi, level)
+    return adapt_to_mesh_level!(deepcopy(u_ode), new_semi, level)
 end
 
-adapt_to_mesh_level(sol::TrixiODESolution, level) = adapt_to_mesh_level(sol.u[end], sol.prob.p, level)
-
+function adapt_to_mesh_level(sol::TrixiODESolution, level)
+    adapt_to_mesh_level(sol.u[end], sol.prob.p, level)
+end
 
 # Extract data from a 2D/3D DG solution and prepare it for visualization as a heatmap/contour plot.
 #
@@ -291,69 +318,74 @@ adapt_to_mesh_level(sol::TrixiODESolution, level) = adapt_to_mesh_level(sol.u[en
 #
 # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may
 #       thus be changed in future releases.
-function get_data_2d(center_level_0, length_level_0, leaf_cells, coordinates, levels, ndims,
-                     unstructured_data, n_nodes,
-                     grid_lines=false, max_supported_level=11, nvisnodes=nothing,
-                     slice=:xy, point=(0.0, 0.0, 0.0))
-  # Determine resolution for data interpolation
-  max_level = maximum(levels)
-  if max_level > max_supported_level
-    error("Maximum refinement level $max_level is higher than " *
-          "maximum supported level $max_supported_level")
-  end
-  max_available_nodes_per_finest_element = 2^(max_supported_level - max_level)
-  if nvisnodes === nothing
-    max_nvisnodes = 2 * n_nodes
-  elseif nvisnodes == 0
-    max_nvisnodes = n_nodes
-  else
-    max_nvisnodes = nvisnodes
-  end
-  nvisnodes_at_max_level = min(max_available_nodes_per_finest_element, max_nvisnodes)
-  resolution = nvisnodes_at_max_level * 2^max_level
-  nvisnodes_per_level = [2^(max_level - level)*nvisnodes_at_max_level for level in 0:max_level]
-  # nvisnodes_per_level is an array (accessed by "level + 1" to accommodate
-  # level-0-cell) that contains the number of visualization nodes for any
-  # refinement level to visualize on an equidistant grid
-
-  if ndims == 3
-    (unstructured_data, coordinates, levels,
+function get_data_2d(center_level_0, length_level_0, leaf_cells, coordinates, levels,
+                     ndims, unstructured_data, n_nodes,
+                     grid_lines = false, max_supported_level = 11, nvisnodes = nothing,
+                     slice = :xy, point = (0.0, 0.0, 0.0))
+    # Determine resolution for data interpolation
+    max_level = maximum(levels)
+    if max_level > max_supported_level
+        error("Maximum refinement level $max_level is higher than " *
+              "maximum supported level $max_supported_level")
+    end
+    max_available_nodes_per_finest_element = 2^(max_supported_level - max_level)
+    if nvisnodes === nothing
+        max_nvisnodes = 2 * n_nodes
+    elseif nvisnodes == 0
+        max_nvisnodes = n_nodes
+    else
+        max_nvisnodes = nvisnodes
+    end
+    nvisnodes_at_max_level = min(max_available_nodes_per_finest_element, max_nvisnodes)
+    resolution = nvisnodes_at_max_level * 2^max_level
+    nvisnodes_per_level = [2^(max_level - level) * nvisnodes_at_max_level
+                           for level in 0:max_level]
+    # nvisnodes_per_level is an array (accessed by "level + 1" to accommodate
+    # level-0-cell) that contains the number of visualization nodes for any
+    # refinement level to visualize on an equidistant grid
+
+    if ndims == 3
+        (unstructured_data, coordinates, levels,
         center_level_0) = unstructured_3d_to_2d(unstructured_data,
-        coordinates, levels, length_level_0, center_level_0, slice,
-        point)
-  end
-
-  # Normalize element coordinates: move center to (0, 0) and domain size to [-1, 1]²
-  n_elements = length(levels)
-  normalized_coordinates = similar(coordinates)
-  for element_id in 1:n_elements
-    @views normalized_coordinates[:, element_id] .= (
-          (coordinates[:, element_id] .- center_level_0) ./ (length_level_0 / 2 ))
-  end
-
-  # Interpolate unstructured DG data to structured data
-  (structured_data =
-      unstructured2structured(unstructured_data, normalized_coordinates,
-                              levels, resolution, nvisnodes_per_level))
-
-  # Interpolate cell-centered values to node-centered values
-  node_centered_data = cell2node(structured_data)
-
-  # Determine axis coordinates for contour plot
-  xs = collect(range(-1, 1, length=resolution+1)) .* length_level_0/2 .+ center_level_0[1]
-  ys = collect(range(-1, 1, length=resolution+1)) .* length_level_0/2 .+ center_level_0[2]
-
-  # Determine element vertices to plot grid lines
-  if grid_lines
-    mesh_vertices_x, mesh_vertices_y = calc_vertices(coordinates, levels, length_level_0)
-  else
-    mesh_vertices_x = Vector{Float64}(undef, 0)
-    mesh_vertices_y = Vector{Float64}(undef, 0)
-  end
-
-  return xs, ys, node_centered_data, mesh_vertices_x, mesh_vertices_y
-end
+                                                coordinates, levels, length_level_0,
+                                                center_level_0, slice,
+                                                point)
+    end
+
+    # Normalize element coordinates: move center to (0, 0) and domain size to [-1, 1]²
+    n_elements = length(levels)
+    normalized_coordinates = similar(coordinates)
+    for element_id in 1:n_elements
+        @views normalized_coordinates[:, element_id] .= ((coordinates[:, element_id] .-
+                                                          center_level_0) ./
+                                                         (length_level_0 / 2))
+    end
+
+    # Interpolate unstructured DG data to structured data
+    (structured_data = unstructured2structured(unstructured_data,
+                                               normalized_coordinates,
+                                               levels, resolution, nvisnodes_per_level))
+
+    # Interpolate cell-centered values to node-centered values
+    node_centered_data = cell2node(structured_data)
 
+    # Determine axis coordinates for contour plot
+    xs = collect(range(-1, 1, length = resolution + 1)) .* length_level_0 / 2 .+
+         center_level_0[1]
+    ys = collect(range(-1, 1, length = resolution + 1)) .* length_level_0 / 2 .+
+         center_level_0[2]
+
+    # Determine element vertices to plot grid lines
+    if grid_lines
+        mesh_vertices_x, mesh_vertices_y = calc_vertices(coordinates, levels,
+                                                         length_level_0)
+    else
+        mesh_vertices_x = Vector{Float64}(undef, 0)
+        mesh_vertices_y = Vector{Float64}(undef, 0)
+    end
+
+    return xs, ys, node_centered_data, mesh_vertices_x, mesh_vertices_y
+end
 
 # Extract data from a 1D DG solution and prepare it for visualization as a line plot.
 # This returns a tuple with
@@ -363,43 +395,49 @@ end
 # Note: This is a low-level function that is not considered as part of Trixi's interface and may
 #       thus be changed in future releases.
 function get_data_1d(original_nodes, unstructured_data, nvisnodes)
-  # Get the dimensions of u; where n_vars is the number of variables, n_nodes the number of nodal values per element and n_elements the total number of elements.
-  n_nodes, n_elements, n_vars = size(unstructured_data)
-
-  # Set the amount of nodes visualized according to nvisnodes.
-  if nvisnodes === nothing
-    max_nvisnodes = 2 * n_nodes
-  elseif nvisnodes == 0
-    max_nvisnodes = n_nodes
-  else
-    @assert nvisnodes >= 2 "nvisnodes must be zero or >= 2"
-    max_nvisnodes = nvisnodes
-  end
-
-  interpolated_nodes = Array{eltype(original_nodes),    2}(undef, max_nvisnodes, n_elements)
-  interpolated_data  = Array{eltype(unstructured_data), 3}(undef, max_nvisnodes, n_elements, n_vars)
-
-  for j in 1:n_elements
-    # Interpolate on an equidistant grid.
-    interpolated_nodes[:, j] .= range(original_nodes[1,1,j], original_nodes[1,end,j], length = max_nvisnodes)
-  end
-
-  nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes)
-  nodes_out = collect(range(-1, 1, length = max_nvisnodes))
-
-  # Calculate vandermonde matrix for interpolation.
-  vandermonde = polynomial_interpolation_matrix(nodes_in, nodes_out)
-
-  # Iterate over all variables.
-  for v in 1:n_vars
-    # Interpolate data for each element.
-    for element in 1:n_elements
-      multiply_scalar_dimensionwise!(@view(interpolated_data[:, element, v]),
-        vandermonde, @view(unstructured_data[:, element, v]))
+    # Get the dimensions of u; where n_vars is the number of variables, n_nodes the number of nodal values per element and n_elements the total number of elements.
+    n_nodes, n_elements, n_vars = size(unstructured_data)
+
+    # Set the amount of nodes visualized according to nvisnodes.
+    if nvisnodes === nothing
+        max_nvisnodes = 2 * n_nodes
+    elseif nvisnodes == 0
+        max_nvisnodes = n_nodes
+    else
+        @assert nvisnodes>=2 "nvisnodes must be zero or >= 2"
+        max_nvisnodes = nvisnodes
+    end
+
+    interpolated_nodes = Array{eltype(original_nodes), 2}(undef, max_nvisnodes,
+                                                          n_elements)
+    interpolated_data = Array{eltype(unstructured_data), 3}(undef, max_nvisnodes,
+                                                            n_elements, n_vars)
+
+    for j in 1:n_elements
+        # Interpolate on an equidistant grid.
+        interpolated_nodes[:, j] .= range(original_nodes[1, 1, j],
+                                          original_nodes[1, end, j],
+                                          length = max_nvisnodes)
+    end
+
+    nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes)
+    nodes_out = collect(range(-1, 1, length = max_nvisnodes))
+
+    # Calculate vandermonde matrix for interpolation.
+    vandermonde = polynomial_interpolation_matrix(nodes_in, nodes_out)
+
+    # Iterate over all variables.
+    for v in 1:n_vars
+        # Interpolate data for each element.
+        for element in 1:n_elements
+            multiply_scalar_dimensionwise!(@view(interpolated_data[:, element, v]),
+                                           vandermonde,
+                                           @view(unstructured_data[:, element, v]))
+        end
     end
-  end
-  # Return results after data is reshaped
-  return vec(interpolated_nodes), reshape(interpolated_data, :, n_vars), vcat(original_nodes[1, 1, :], original_nodes[1, end, end])
+    # Return results after data is reshaped
+    return vec(interpolated_nodes), reshape(interpolated_data, :, n_vars),
+           vcat(original_nodes[1, 1, :], original_nodes[1, end, end])
 end
 
 # Change order of dimensions (variables are now last) and convert data to `solution_variables`
@@ -407,40 +445,41 @@ end
 # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may
 #       thus be changed in future releases.
 function get_unstructured_data(u, solution_variables, mesh, equations, solver, cache)
+    if solution_variables === cons2cons
+        raw_data = u
+        n_vars = size(raw_data, 1)
+    else
+        # FIXME: Remove this comment once the implementation following it has been verified
+        # Reinterpret the solution array as an array of conservative variables,
+        # compute the solution variables via broadcasting, and reinterpret the
+        # result as a plain array of floating point numbers
+        # raw_data = Array(reinterpret(eltype(u),
+        #        solution_variables.(reinterpret(SVector{nvariables(equations),eltype(u)}, u),
+        #                   Ref(equations))))
+        # n_vars = size(raw_data, 1)
+        n_vars_in = nvariables(equations)
+        n_vars = length(solution_variables(get_node_vars(u, equations, solver),
+                                           equations))
+        raw_data = Array{eltype(u)}(undef, n_vars, Base.tail(size(u))...)
+        reshaped_u = reshape(u, n_vars_in, :)
+        reshaped_r = reshape(raw_data, n_vars, :)
+        for idx in axes(reshaped_u, 2)
+            reshaped_r[:, idx] = solution_variables(get_node_vars(reshaped_u, equations,
+                                                                  solver, idx),
+                                                    equations)
+        end
+    end
 
-  if solution_variables === cons2cons
-    raw_data = u
-    n_vars = size(raw_data, 1)
-  else
-    # FIXME: Remove this comment once the implementation following it has been verified
-    # Reinterpret the solution array as an array of conservative variables,
-    # compute the solution variables via broadcasting, and reinterpret the
-    # result as a plain array of floating point numbers
-    # raw_data = Array(reinterpret(eltype(u),
-    #        solution_variables.(reinterpret(SVector{nvariables(equations),eltype(u)}, u),
-    #                   Ref(equations))))
-    # n_vars = size(raw_data, 1)
-    n_vars_in = nvariables(equations)
-    n_vars = length(solution_variables(get_node_vars(u, equations, solver), equations))
-    raw_data = Array{eltype(u)}(undef, n_vars, Base.tail(size(u))...)
-    reshaped_u = reshape(u, n_vars_in, :)
-    reshaped_r = reshape(raw_data, n_vars, :)
-    for idx in axes(reshaped_u, 2)
-      reshaped_r[:, idx] = solution_variables(get_node_vars(reshaped_u, equations, solver, idx), equations)
-    end
-  end
-
-  unstructured_data = Array{eltype(raw_data)}(undef,
-                                              ntuple((d) -> nnodes(solver), ndims(equations))...,
-                                              nelements(solver, cache), n_vars)
-  for variable in 1:n_vars
-    @views unstructured_data[.., :, variable] .= raw_data[variable, .., :]
-  end
-
-  return unstructured_data
-end
-
+    unstructured_data = Array{eltype(raw_data)}(undef,
+                                                ntuple((d) -> nnodes(solver),
+                                                       ndims(equations))...,
+                                                nelements(solver, cache), n_vars)
+    for variable in 1:n_vars
+        @views unstructured_data[.., :, variable] .= raw_data[variable, .., :]
+    end
 
+    return unstructured_data
+end
 
 # Convert cell-centered values to node-centered values by averaging over all
 # four neighbors and making use of the periodicity of the solution
@@ -448,53 +487,51 @@ end
 # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may
 #       thus be changed in future releases.
 function cell2node(cell_centered_data)
-  # Create temporary data structure to make the averaging algorithm as simple
-  # as possible (by using a ghost layer)
-  tmp = similar(first(cell_centered_data), size(first(cell_centered_data)) .+ (2, 2))
-
-  # Create output data structure
-  resolution_in, _ = size(first(cell_centered_data))
-  resolution_out = resolution_in + 1
-  node_centered_data = [Matrix{Float64}(undef, resolution_out, resolution_out)
-                        for _ in 1:length(cell_centered_data)]
-
-
-  for (cell_data, node_data) in zip(cell_centered_data, node_centered_data)
-    # Fill center with original data
-    tmp[2:end-1, 2:end-1] .= cell_data
-
-    # Fill sides with opposite data (periodic domain)
-    # x-direction
-    tmp[1,   2:end-1] .= cell_data[end, :]
-    tmp[end, 2:end-1] .= cell_data[1,   :]
-    # y-direction
-    tmp[2:end-1, 1, ] .= cell_data[:, end]
-    tmp[2:end-1, end] .= cell_data[:, 1, ]
-    # Corners
-    tmp[1,   1, ] = cell_data[end, end]
-    tmp[end, 1, ] = cell_data[1,   end]
-    tmp[1,   end] = cell_data[end, 1, ]
-    tmp[end, end] = cell_data[1,   1, ]
-
-    # Obtain node-centered value by averaging over neighboring cell-centered values
-    for j in 1:resolution_out
-      for i in 1:resolution_out
-        node_data[i, j] = (tmp[i,   j, ] +
-                           tmp[i+1, j, ] +
-                           tmp[i,   j+1] +
-                           tmp[i+1, j+1]) / 4
-      end
-    end
-  end
-
-  # Transpose
-  for (index, data) in enumerate(node_centered_data)
-    node_centered_data[index] = permutedims(data)
-  end
-
-  return node_centered_data
-end
+    # Create temporary data structure to make the averaging algorithm as simple
+    # as possible (by using a ghost layer)
+    tmp = similar(first(cell_centered_data), size(first(cell_centered_data)) .+ (2, 2))
+
+    # Create output data structure
+    resolution_in, _ = size(first(cell_centered_data))
+    resolution_out = resolution_in + 1
+    node_centered_data = [Matrix{Float64}(undef, resolution_out, resolution_out)
+                          for _ in 1:length(cell_centered_data)]
+
+    for (cell_data, node_data) in zip(cell_centered_data, node_centered_data)
+        # Fill center with original data
+        tmp[2:(end - 1), 2:(end - 1)] .= cell_data
+
+        # Fill sides with opposite data (periodic domain)
+        # x-direction
+        tmp[1, 2:(end - 1)] .= cell_data[end, :]
+        tmp[end, 2:(end - 1)] .= cell_data[1, :]
+        # y-direction
+        tmp[2:(end - 1), 1] .= cell_data[:, end]
+        tmp[2:(end - 1), end] .= cell_data[:, 1]
+        # Corners
+        tmp[1, 1] = cell_data[end, end]
+        tmp[end, 1] = cell_data[1, end]
+        tmp[1, end] = cell_data[end, 1]
+        tmp[end, end] = cell_data[1, 1]
+
+        # Obtain node-centered value by averaging over neighboring cell-centered values
+        for j in 1:resolution_out
+            for i in 1:resolution_out
+                node_data[i, j] = (tmp[i, j] +
+                                   tmp[i + 1, j] +
+                                   tmp[i, j + 1] +
+                                   tmp[i + 1, j + 1]) / 4
+            end
+        end
+    end
+
+    # Transpose
+    for (index, data) in enumerate(node_centered_data)
+        node_centered_data[index] = permutedims(data)
+    end
 
+    return node_centered_data
+end
 
 # Convert 3d unstructured data to 2d data.
 # Additional to the new unstructured data updated coordinates, levels and
@@ -505,572 +542,627 @@ end
 function unstructured_3d_to_2d(unstructured_data, coordinates, levels,
                                length_level_0, center_level_0, slice,
                                point)
-  if slice === :yz
-    slice_dimension = 1
-    other_dimensions = [2, 3]
-  elseif slice === :xz
-    slice_dimension = 2
-    other_dimensions = [1, 3]
-  elseif slice === :xy
-    slice_dimension = 3
-    other_dimensions = [1, 2]
-  else
-    error("illegal dimension '$slice', supported dimensions are :yz, :xz, and :xy")
-  end
-
-  # Limits of domain in slice dimension
-  lower_limit = center_level_0[slice_dimension] - length_level_0 / 2
-  upper_limit = center_level_0[slice_dimension] + length_level_0 / 2
-
-  @assert length(point) >= 3 "Point must be three-dimensional."
-  if point[slice_dimension] < lower_limit || point[slice_dimension] > upper_limit
-    error(string("Slice plane is outside of domain.",
-        " point[$slice_dimension]=$(point[slice_dimension]) must be between $lower_limit and $upper_limit"))
-  end
-
-  # Extract data shape information
-  n_nodes_in, _, _, n_elements, n_variables = size(unstructured_data)
-
-  # Get node coordinates for DG locations on reference element
-  nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in)
-
-  # New unstructured data has one dimension less.
-  # The redundant element ids are removed later.
-  @views new_unstructured_data = similar(unstructured_data[1, ..])
-
-  # Declare new empty arrays to fill in new coordinates and levels
-  new_coordinates = Array{Float64}(undef, 2, n_elements)
-  new_levels = Array{eltype(levels)}(undef, n_elements)
-
-  # Counter for new element ids
-  new_id = 0
-
-  # Save vandermonde matrices in a Dict to prevent redundant generation
-  vandermonde_to_2d = Dict()
-
-  # Permute dimensions such that the slice dimension is always the
-  # third dimension of the array. Below we can always interpolate in the
-  # third dimension.
-  if slice === :yz
-    unstructured_data = permutedims(unstructured_data, [2, 3, 1, 4, 5])
-  elseif slice === :xz
-    unstructured_data = permutedims(unstructured_data, [1, 3, 2, 4, 5])
-  end
-
-  for element_id in 1:n_elements
-    # Distance from center to border of this element (half the length)
-    element_length = length_level_0 / 2^levels[element_id]
-    min_coordinate = coordinates[:, element_id] .- element_length / 2
-    max_coordinate = coordinates[:, element_id] .+ element_length / 2
-
-    # Check if slice plane and current element intersect.
-    # The first check uses a "greater but not equal" to only match one cell if the
-    # slice plane lies between two cells.
-    # The second check is needed if the slice plane is at the upper border of
-    # the domain due to this.
-    if !((min_coordinate[slice_dimension] <= point[slice_dimension] &&
-          max_coordinate[slice_dimension] > point[slice_dimension]) ||
-        (point[slice_dimension] == upper_limit &&
-          max_coordinate[slice_dimension] == upper_limit))
-      # Continue for loop if they don't intersect
-      continue
-    end
-
-    # This element is of interest
-    new_id += 1
-
-    # Add element to new coordinates and levels
-    new_coordinates[:, new_id] = coordinates[other_dimensions, element_id]
-    new_levels[new_id] = levels[element_id]
-
-    # Construct vandermonde matrix (or load from Dict if possible)
-    normalized_intercept =
-        (point[slice_dimension] - min_coordinate[slice_dimension]) /
-        element_length * 2 - 1
-
-    if haskey(vandermonde_to_2d, normalized_intercept)
-      vandermonde = vandermonde_to_2d[normalized_intercept]
+    if slice === :yz
+        slice_dimension = 1
+        other_dimensions = [2, 3]
+    elseif slice === :xz
+        slice_dimension = 2
+        other_dimensions = [1, 3]
+    elseif slice === :xy
+        slice_dimension = 3
+        other_dimensions = [1, 2]
     else
-      # Generate vandermonde matrix to interpolate values at nodes_in to one value
-      vandermonde = polynomial_interpolation_matrix(nodes_in, [normalized_intercept])
-      vandermonde_to_2d[normalized_intercept] = vandermonde
+        error("illegal dimension '$slice', supported dimensions are :yz, :xz, and :xy")
+    end
+
+    # Limits of domain in slice dimension
+    lower_limit = center_level_0[slice_dimension] - length_level_0 / 2
+    upper_limit = center_level_0[slice_dimension] + length_level_0 / 2
+
+    @assert length(point)>=3 "Point must be three-dimensional."
+    if point[slice_dimension] < lower_limit || point[slice_dimension] > upper_limit
+        error(string("Slice plane is outside of domain.",
+                     " point[$slice_dimension]=$(point[slice_dimension]) must be between $lower_limit and $upper_limit"))
+    end
+
+    # Extract data shape information
+    n_nodes_in, _, _, n_elements, n_variables = size(unstructured_data)
+
+    # Get node coordinates for DG locations on reference element
+    nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in)
+
+    # New unstructured data has one dimension less.
+    # The redundant element ids are removed later.
+    @views new_unstructured_data = similar(unstructured_data[1, ..])
+
+    # Declare new empty arrays to fill in new coordinates and levels
+    new_coordinates = Array{Float64}(undef, 2, n_elements)
+    new_levels = Array{eltype(levels)}(undef, n_elements)
+
+    # Counter for new element ids
+    new_id = 0
+
+    # Save vandermonde matrices in a Dict to prevent redundant generation
+    vandermonde_to_2d = Dict()
+
+    # Permute dimensions such that the slice dimension is always the
+    # third dimension of the array. Below we can always interpolate in the
+    # third dimension.
+    if slice === :yz
+        unstructured_data = permutedims(unstructured_data, [2, 3, 1, 4, 5])
+    elseif slice === :xz
+        unstructured_data = permutedims(unstructured_data, [1, 3, 2, 4, 5])
     end
 
-    # 1D interpolation to specified slice plane
-    # We permuted the dimensions above such that now the dimension in which
-    # we will interpolate is always the third one.
-    for i in 1:n_nodes_in
-      for ii in 1:n_nodes_in
-        # Interpolate in the third dimension
-        data = unstructured_data[i, ii, :, element_id, :]
+    for element_id in 1:n_elements
+        # Distance from center to border of this element (half the length)
+        element_length = length_level_0 / 2^levels[element_id]
+        min_coordinate = coordinates[:, element_id] .- element_length / 2
+        max_coordinate = coordinates[:, element_id] .+ element_length / 2
+
+        # Check if slice plane and current element intersect.
+        # The first check uses a "greater but not equal" to only match one cell if the
+        # slice plane lies between two cells.
+        # The second check is needed if the slice plane is at the upper border of
+        # the domain due to this.
+        if !((min_coordinate[slice_dimension] <= point[slice_dimension] &&
+              max_coordinate[slice_dimension] > point[slice_dimension]) ||
+             (point[slice_dimension] == upper_limit &&
+              max_coordinate[slice_dimension] == upper_limit))
+            # Continue for loop if they don't intersect
+            continue
+        end
+
+        # This element is of interest
+        new_id += 1
+
+        # Add element to new coordinates and levels
+        new_coordinates[:, new_id] = coordinates[other_dimensions, element_id]
+        new_levels[new_id] = levels[element_id]
+
+        # Construct vandermonde matrix (or load from Dict if possible)
+        normalized_intercept = (point[slice_dimension] -
+                                min_coordinate[slice_dimension]) /
+                               element_length * 2 - 1
+
+        if haskey(vandermonde_to_2d, normalized_intercept)
+            vandermonde = vandermonde_to_2d[normalized_intercept]
+        else
+            # Generate vandermonde matrix to interpolate values at nodes_in to one value
+            vandermonde = polynomial_interpolation_matrix(nodes_in,
+                                                          [normalized_intercept])
+            vandermonde_to_2d[normalized_intercept] = vandermonde
+        end
 
-        value = multiply_dimensionwise(vandermonde, permutedims(data))
-        new_unstructured_data[i, ii, new_id, :] = value[:, 1]
-      end
+        # 1D interpolation to specified slice plane
+        # We permuted the dimensions above such that now the dimension in which
+        # we will interpolate is always the third one.
+        for i in 1:n_nodes_in
+            for ii in 1:n_nodes_in
+                # Interpolate in the third dimension
+                data = unstructured_data[i, ii, :, element_id, :]
+
+                value = multiply_dimensionwise(vandermonde, permutedims(data))
+                new_unstructured_data[i, ii, new_id, :] = value[:, 1]
+            end
+        end
     end
-  end
 
-  # Remove redundant element ids
-  unstructured_data = new_unstructured_data[:, :, 1:new_id, :]
-  new_coordinates = new_coordinates[:, 1:new_id]
-  new_levels = new_levels[1:new_id]
+    # Remove redundant element ids
+    unstructured_data = new_unstructured_data[:, :, 1:new_id, :]
+    new_coordinates = new_coordinates[:, 1:new_id]
+    new_levels = new_levels[1:new_id]
 
-  center_level_0 = center_level_0[other_dimensions]
+    center_level_0 = center_level_0[other_dimensions]
 
-  return unstructured_data, new_coordinates, new_levels, center_level_0
+    return unstructured_data, new_coordinates, new_levels, center_level_0
 end
 
 # Convert 2d unstructured data to 1d slice and interpolate them.
-function unstructured_2d_to_1d(original_nodes, unstructured_data, nvisnodes, slice, point)
-
-  if slice === :x
-    slice_dimension = 2
-    other_dimension = 1
-  elseif slice === :y
-    slice_dimension = 1
-    other_dimension = 2
-  else
-    error("illegal dimension '$slice', supported dimensions are :x and :y")
-  end
-
-  # Set up data structures to store new 1D data.
-  @views new_unstructured_data = similar(unstructured_data[1, ..])
-  @views new_nodes = similar(original_nodes[1, 1, ..])
-
-  n_nodes_in, _, n_elements, n_variables = size(unstructured_data)
-  nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in)
-
-  # Test if point lies in the domain.
-  lower_limit = original_nodes[1, 1, 1, 1]
-  upper_limit = original_nodes[1, n_nodes_in, n_nodes_in, n_elements]
-
-  @assert length(point) >= 2 "Point must be two-dimensional."
-  if point[slice_dimension] < lower_limit || point[slice_dimension] > upper_limit
-    error(string("Slice axis is outside of domain. ",
-        " point[$slice_dimension]=$(point[slice_dimension]) must be between $lower_limit and $upper_limit"))
-  end
-
-  # Count the amount of new elements.
-  new_id = 0
-
-  # Permute dimensions so that the slice dimension is always in the correct place for later use.
-  if slice === :y
-    original_nodes = permutedims(original_nodes, [1, 3, 2, 4])
-    unstructured_data = permutedims(unstructured_data, [2, 1, 3, 4])
-  end
-
-  # Iterate over all elements to find the ones that lie on the slice axis.
-  for element_id in 1:n_elements
-    min_coordinate = original_nodes[:, 1, 1, element_id]
-    max_coordinate = original_nodes[:, n_nodes_in, n_nodes_in, element_id]
-    element_length = max_coordinate - min_coordinate
-
-    # Test if the element is on the slice axis. If not just continue with the next element.
-    if !((min_coordinate[slice_dimension] <= point[slice_dimension] &&
-        max_coordinate[slice_dimension] > point[slice_dimension]) ||
-        (point[slice_dimension] == upper_limit && max_coordinate[slice_dimension] == upper_limit))
-
-        continue
-    end
-
-    new_id += 1
-
-    # Construct vandermonde matrix for interpolation of each 2D element to a 1D element.
-    normalized_intercept =
-          (point[slice_dimension] - min_coordinate[slice_dimension]) /
-          element_length[1] * 2 - 1
-    vandermonde = polynomial_interpolation_matrix(nodes_in, normalized_intercept)
-
-    # Interpolate to each node of new 1D element.
-    for v in 1:n_variables
-      for node in 1:n_nodes_in
-        new_unstructured_data[node, new_id, v] = (vandermonde*unstructured_data[node, :, element_id, v])[1]
-      end
+function unstructured_2d_to_1d(original_nodes, unstructured_data, nvisnodes, slice,
+                               point)
+    if slice === :x
+        slice_dimension = 2
+        other_dimension = 1
+    elseif slice === :y
+        slice_dimension = 1
+        other_dimension = 2
+    else
+        error("illegal dimension '$slice', supported dimensions are :x and :y")
     end
 
-    new_nodes[:, new_id] = original_nodes[other_dimension, :, 1, element_id]
-  end
+    # Set up data structures to store new 1D data.
+    @views new_unstructured_data = similar(unstructured_data[1, ..])
+    @views new_nodes = similar(original_nodes[1, 1, ..])
 
-  return get_data_1d(reshape(new_nodes[:, 1:new_id], 1, n_nodes_in, new_id), new_unstructured_data[:, 1:new_id, :], nvisnodes)
-end
+    n_nodes_in, _, n_elements, n_variables = size(unstructured_data)
+    nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in)
 
-# Calculate the arc length of a curve given by ndims x npoints point coordinates (piece-wise linear approximation)
-function calc_arc_length(coordinates)
-  n_points = size(coordinates)[2]
-  arc_length = zeros(n_points)
-  for i in 1:n_points-1
-    arc_length[i+1] = arc_length[i] + sqrt(sum((coordinates[:,i]-coordinates[:,i+1]).^2))
-  end
-  return arc_length
-end
+    # Test if point lies in the domain.
+    lower_limit = original_nodes[1, 1, 1, 1]
+    upper_limit = original_nodes[1, n_nodes_in, n_nodes_in, n_elements]
 
-# Convert 2d unstructured data to 1d data at given curve.
-function unstructured_2d_to_1d_curve(original_nodes, unstructured_data, nvisnodes, curve, mesh, solver, cache)
+    @assert length(point)>=2 "Point must be two-dimensional."
+    if point[slice_dimension] < lower_limit || point[slice_dimension] > upper_limit
+        error(string("Slice axis is outside of domain. ",
+                     " point[$slice_dimension]=$(point[slice_dimension]) must be between $lower_limit and $upper_limit"))
+    end
 
-  n_points_curve = size(curve)[2]
-  n_nodes, _, n_elements, n_variables = size(unstructured_data)
-  nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes)
+    # Count the amount of new elements.
+    new_id = 0
 
-  # Check if input is correct.
-  min = original_nodes[:, 1, 1, 1]
-  max = max_coordinate = original_nodes[:, n_nodes, n_nodes, n_elements]
-  @assert size(curve) == (2, size(curve)[2]) "Coordinates along curve must be 2xn dimensional."
-  for element in 1:n_points_curve
-    @assert (prod(vcat(curve[:, n_points_curve] .>= min, curve[:, n_points_curve]
-            .<= max))) "Some coordinates from `curve` are outside of the domain.."
-  end
+    # Permute dimensions so that the slice dimension is always in the correct place for later use.
+    if slice === :y
+        original_nodes = permutedims(original_nodes, [1, 3, 2, 4])
+        unstructured_data = permutedims(unstructured_data, [2, 1, 3, 4])
+    end
 
-  # Set nodes according to the length of the curve.
-  arc_length = calc_arc_length(curve)
+    # Iterate over all elements to find the ones that lie on the slice axis.
+    for element_id in 1:n_elements
+        min_coordinate = original_nodes[:, 1, 1, element_id]
+        max_coordinate = original_nodes[:, n_nodes_in, n_nodes_in, element_id]
+        element_length = max_coordinate - min_coordinate
+
+        # Test if the element is on the slice axis. If not just continue with the next element.
+        if !((min_coordinate[slice_dimension] <= point[slice_dimension] &&
+              max_coordinate[slice_dimension] > point[slice_dimension]) ||
+             (point[slice_dimension] == upper_limit &&
+              max_coordinate[slice_dimension] == upper_limit))
+            continue
+        end
 
-  # Setup data structures.
-  data_on_curve = Array{Float64}(undef, n_points_curve, n_variables)
-  temp_data = Array{Float64}(undef, n_nodes, n_points_curve, n_variables)
+        new_id += 1
+
+        # Construct vandermonde matrix for interpolation of each 2D element to a 1D element.
+        normalized_intercept = (point[slice_dimension] -
+                                min_coordinate[slice_dimension]) /
+                               element_length[1] * 2 - 1
+        vandermonde = polynomial_interpolation_matrix(nodes_in, normalized_intercept)
+
+        # Interpolate to each node of new 1D element.
+        for v in 1:n_variables
+            for node in 1:n_nodes_in
+                new_unstructured_data[node, new_id, v] = (vandermonde * unstructured_data[node,
+                                                                                          :,
+                                                                                          element_id,
+                                                                                          v])[1]
+            end
+        end
 
-  # For each coordinate find the corresponding element with its id.
-  element_ids = get_elements_by_coordinates(curve, mesh, solver, cache)
+        new_nodes[:, new_id] = original_nodes[other_dimension, :, 1, element_id]
+    end
 
-  # Iterate over all found elements.
-  for element in 1:n_points_curve
+    return get_data_1d(reshape(new_nodes[:, 1:new_id], 1, n_nodes_in, new_id),
+                       new_unstructured_data[:, 1:new_id, :], nvisnodes)
+end
 
-    min_coordinate = original_nodes[:, 1, 1, element_ids[element]]
-    max_coordinate = original_nodes[:, n_nodes, n_nodes, element_ids[element]]
-    element_length = max_coordinate - min_coordinate
+# Calculate the arc length of a curve given by ndims x npoints point coordinates (piece-wise linear approximation)
+function calc_arc_length(coordinates)
+    n_points = size(coordinates)[2]
+    arc_length = zeros(n_points)
+    for i in 1:(n_points - 1)
+        arc_length[i + 1] = arc_length[i] +
+                            sqrt(sum((coordinates[:, i] - coordinates[:, i + 1]) .^ 2))
+    end
+    return arc_length
+end
 
-    normalized_coordinates = (curve[:, element] - min_coordinate)/element_length[1]*2 .-1
+# Convert 2d unstructured data to 1d data at given curve.
+function unstructured_2d_to_1d_curve(original_nodes, unstructured_data, nvisnodes,
+                                     curve, mesh, solver, cache)
+    n_points_curve = size(curve)[2]
+    n_nodes, _, n_elements, n_variables = size(unstructured_data)
+    nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes)
+
+    # Check if input is correct.
+    min = original_nodes[:, 1, 1, 1]
+    max = max_coordinate = original_nodes[:, n_nodes, n_nodes, n_elements]
+    @assert size(curve)==(2, size(curve)[2]) "Coordinates along curve must be 2xn dimensional."
+    for element in 1:n_points_curve
+        @assert (prod(vcat(curve[:, n_points_curve] .>= min,
+                           curve[:, n_points_curve]
+                           .<=
+                           max))) "Some coordinates from `curve` are outside of the domain.."
+    end
 
-    # Interpolate to a single point in each element.
-    vandermonde_x = polynomial_interpolation_matrix(nodes_in, normalized_coordinates[1])
-    vandermonde_y = polynomial_interpolation_matrix(nodes_in, normalized_coordinates[2])
-    for v in 1:n_variables
-      for i in 1:n_nodes
-        temp_data[i, element, v] = (vandermonde_y*unstructured_data[i, :, element_ids[element], v])[1]
-      end
-      data_on_curve[element, v] = (vandermonde_x*temp_data[:, element, v])[]
+    # Set nodes according to the length of the curve.
+    arc_length = calc_arc_length(curve)
+
+    # Setup data structures.
+    data_on_curve = Array{Float64}(undef, n_points_curve, n_variables)
+    temp_data = Array{Float64}(undef, n_nodes, n_points_curve, n_variables)
+
+    # For each coordinate find the corresponding element with its id.
+    element_ids = get_elements_by_coordinates(curve, mesh, solver, cache)
+
+    # Iterate over all found elements.
+    for element in 1:n_points_curve
+        min_coordinate = original_nodes[:, 1, 1, element_ids[element]]
+        max_coordinate = original_nodes[:, n_nodes, n_nodes, element_ids[element]]
+        element_length = max_coordinate - min_coordinate
+
+        normalized_coordinates = (curve[:, element] - min_coordinate) /
+                                 element_length[1] * 2 .- 1
+
+        # Interpolate to a single point in each element.
+        vandermonde_x = polynomial_interpolation_matrix(nodes_in,
+                                                        normalized_coordinates[1])
+        vandermonde_y = polynomial_interpolation_matrix(nodes_in,
+                                                        normalized_coordinates[2])
+        for v in 1:n_variables
+            for i in 1:n_nodes
+                temp_data[i, element, v] = (vandermonde_y * unstructured_data[i, :,
+                                                                              element_ids[element],
+                                                                              v])[1]
+            end
+            data_on_curve[element, v] = (vandermonde_x * temp_data[:, element, v])[]
+        end
     end
-  end
 
-  return arc_length, data_on_curve, nothing
+    return arc_length, data_on_curve, nothing
 end
 
 # Convert a PlotData2DTriangulate object to a 1d data along given curve.
 function unstructured_2d_to_1d_curve(pd, input_curve, slice, point, nvisnodes)
 
-  # If no curve is defined, create a axis curve.
-  if input_curve === nothing
-    input_curve = axis_curve(pd.x, pd.y, nothing, slice, point, nvisnodes)
-  end
+    # If no curve is defined, create a axis curve.
+    if input_curve === nothing
+        input_curve = axis_curve(pd.x, pd.y, nothing, slice, point, nvisnodes)
+    end
 
-  @assert size(input_curve, 1) == 2 "Input 'curve' must be 2xn dimensional."
+    @assert size(input_curve, 1)==2 "Input 'curve' must be 2xn dimensional."
 
-  # For each coordinate find the corresponding triangle with its ids.
-  ids_by_coordinates = get_ids_by_coordinates(input_curve, pd)
-  found_coordinates = ids_by_coordinates[:, 1] .!= nothing
+    # For each coordinate find the corresponding triangle with its ids.
+    ids_by_coordinates = get_ids_by_coordinates(input_curve, pd)
+    found_coordinates = ids_by_coordinates[:, 1] .!= nothing
 
-  @assert found_coordinates != zeros(size(input_curve, 2)) "No points of 'curve' are inside of the solutions domain."
+    @assert found_coordinates!=zeros(size(input_curve, 2)) "No points of 'curve' are inside of the solutions domain."
 
-  # These hold the ids of the elements and triangles the points of the curve sit in.
-  element_ids = @view ids_by_coordinates[found_coordinates, 1]
-  triangle_ids =  @view ids_by_coordinates[found_coordinates, 2]
+    # These hold the ids of the elements and triangles the points of the curve sit in.
+    element_ids = @view ids_by_coordinates[found_coordinates, 1]
+    triangle_ids = @view ids_by_coordinates[found_coordinates, 2]
 
-  # Shorten the curve, so that it contains only point that were found.
-  curve = @view input_curve[:, found_coordinates]
+    # Shorten the curve, so that it contains only point that were found.
+    curve = @view input_curve[:, found_coordinates]
 
-  n_variables = length(pd.data[1, 1])
-  n_points_curve = size(curve, 2)
+    n_variables = length(pd.data[1, 1])
+    n_points_curve = size(curve, 2)
 
-  # Set nodes according to the length of the curve.
-  arc_length = calc_arc_length(curve)
+    # Set nodes according to the length of the curve.
+    arc_length = calc_arc_length(curve)
 
-  # Setup data structures.
-  data_on_curve = Array{Float64}(undef, n_points_curve, n_variables)
+    # Setup data structures.
+    data_on_curve = Array{Float64}(undef, n_points_curve, n_variables)
 
-  # Iterate over all points on the curve.
-  for point in 1:n_points_curve
-    element = @view element_ids[point]
-    triangle = @view pd.t[triangle_ids[point], :]
-    for v in 1:n_variables
-      # Get the x and y coordinates of the corners of given triangle.
-      x_coordinates_triangle = SVector{3}(pd.x[triangle, element])
-      y_coordinates_triangle = SVector{3}(pd.y[triangle, element])
+    # Iterate over all points on the curve.
+    for point in 1:n_points_curve
+        element = @view element_ids[point]
+        triangle = @view pd.t[triangle_ids[point], :]
+        for v in 1:n_variables
+            # Get the x and y coordinates of the corners of given triangle.
+            x_coordinates_triangle = SVector{3}(pd.x[triangle, element])
+            y_coordinates_triangle = SVector{3}(pd.y[triangle, element])
 
-      # Extract solutions values in corners of the triangle.
-      values_triangle = SVector{3}(getindex.(view(pd.data, triangle, element), v))
+            # Extract solutions values in corners of the triangle.
+            values_triangle = SVector{3}(getindex.(view(pd.data, triangle, element), v))
 
-      # Linear interpolation in each triangle to the points on the curve.
-      data_on_curve[point, v] = triangle_interpolation(x_coordinates_triangle, y_coordinates_triangle, values_triangle, curve[:, point])
+            # Linear interpolation in each triangle to the points on the curve.
+            data_on_curve[point, v] = triangle_interpolation(x_coordinates_triangle,
+                                                             y_coordinates_triangle,
+                                                             values_triangle,
+                                                             curve[:, point])
+        end
     end
-  end
 
-  return arc_length, data_on_curve, nothing
+    return arc_length, data_on_curve, nothing
 end
 
 # Convert 3d unstructured data to 1d data at given curve.
-function unstructured_3d_to_1d_curve(original_nodes, unstructured_data, nvisnodes, curve, mesh, solver, cache)
-
-  n_points_curve = size(curve)[2]
-  n_nodes, _, _, n_elements, n_variables = size(unstructured_data)
-  nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes)
-
-  # Check if input is correct.
-  min = original_nodes[:, 1, 1, 1, 1]
-  max = max_coordinate = original_nodes[:, n_nodes, n_nodes, n_nodes, n_elements]
-  @assert size(curve) == (3, n_points_curve) "Coordinates along curve must be 3xn dimensional."
-  for element in 1:n_points_curve
-    @assert (prod(vcat(curve[:, n_points_curve] .>= min, curve[:, n_points_curve]
-            .<= max))) "Some coordinates from `curve` are outside of the domain.."
-  end
-
-  # Set nodes according to the length of the curve.
-  arc_length = calc_arc_length(curve)
-
-  # Setup data structures.
-  data_on_curve = Array{Float64}(undef, n_points_curve, n_variables)
-  temp_data = Array{Float64}(undef, n_nodes, n_nodes+1, n_points_curve, n_variables)
-
-  # For each coordinate find the corresponding element with its id.
-  element_ids = get_elements_by_coordinates(curve, mesh, solver, cache)
-
-  # Iterate over all found elements.
-  for element in 1:n_points_curve
-
-    min_coordinate = original_nodes[:, 1, 1, 1, element_ids[element]]
-    max_coordinate = original_nodes[:, n_nodes, n_nodes, n_nodes, element_ids[element]]
-    element_length = max_coordinate - min_coordinate
-
-    normalized_coordinates = (curve[:, element] - min_coordinate)/element_length[1]*2 .-1
+function unstructured_3d_to_1d_curve(original_nodes, unstructured_data, nvisnodes,
+                                     curve, mesh, solver, cache)
+    n_points_curve = size(curve)[2]
+    n_nodes, _, _, n_elements, n_variables = size(unstructured_data)
+    nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes)
+
+    # Check if input is correct.
+    min = original_nodes[:, 1, 1, 1, 1]
+    max = max_coordinate = original_nodes[:, n_nodes, n_nodes, n_nodes, n_elements]
+    @assert size(curve)==(3, n_points_curve) "Coordinates along curve must be 3xn dimensional."
+    for element in 1:n_points_curve
+        @assert (prod(vcat(curve[:, n_points_curve] .>= min,
+                           curve[:, n_points_curve]
+                           .<=
+                           max))) "Some coordinates from `curve` are outside of the domain.."
+    end
 
-    # Interpolate to a single point in each element.
-    vandermonde_x = polynomial_interpolation_matrix(nodes_in, normalized_coordinates[1])
-    vandermonde_y = polynomial_interpolation_matrix(nodes_in, normalized_coordinates[2])
-    vandermonde_z = polynomial_interpolation_matrix(nodes_in, normalized_coordinates[3])
-    for v in 1:n_variables
-      for i in 1:n_nodes
-        for ii in 1:n_nodes
-          temp_data[i, ii, element, v] = (vandermonde_z*unstructured_data[i, ii, :, element_ids[element], v])[1]
+    # Set nodes according to the length of the curve.
+    arc_length = calc_arc_length(curve)
+
+    # Setup data structures.
+    data_on_curve = Array{Float64}(undef, n_points_curve, n_variables)
+    temp_data = Array{Float64}(undef, n_nodes, n_nodes + 1, n_points_curve, n_variables)
+
+    # For each coordinate find the corresponding element with its id.
+    element_ids = get_elements_by_coordinates(curve, mesh, solver, cache)
+
+    # Iterate over all found elements.
+    for element in 1:n_points_curve
+        min_coordinate = original_nodes[:, 1, 1, 1, element_ids[element]]
+        max_coordinate = original_nodes[:, n_nodes, n_nodes, n_nodes,
+                                        element_ids[element]]
+        element_length = max_coordinate - min_coordinate
+
+        normalized_coordinates = (curve[:, element] - min_coordinate) /
+                                 element_length[1] * 2 .- 1
+
+        # Interpolate to a single point in each element.
+        vandermonde_x = polynomial_interpolation_matrix(nodes_in,
+                                                        normalized_coordinates[1])
+        vandermonde_y = polynomial_interpolation_matrix(nodes_in,
+                                                        normalized_coordinates[2])
+        vandermonde_z = polynomial_interpolation_matrix(nodes_in,
+                                                        normalized_coordinates[3])
+        for v in 1:n_variables
+            for i in 1:n_nodes
+                for ii in 1:n_nodes
+                    temp_data[i, ii, element, v] = (vandermonde_z * unstructured_data[i,
+                                                                                      ii,
+                                                                                      :,
+                                                                                      element_ids[element],
+                                                                                      v])[1]
+                end
+                temp_data[i, n_nodes + 1, element, v] = (vandermonde_y * temp_data[i,
+                                                                                   1:n_nodes,
+                                                                                   element,
+                                                                                   v])[1]
+            end
+            data_on_curve[element, v] = (vandermonde_x * temp_data[:, n_nodes + 1,
+                                                                   element, v])[1]
         end
-        temp_data[i, n_nodes+1, element, v] = (vandermonde_y*temp_data[i, 1:n_nodes, element, v])[1]
-      end
-      data_on_curve[element, v] = (vandermonde_x*temp_data[:, n_nodes+1, element, v])[1]
     end
-  end
 
-  return arc_length, data_on_curve, nothing
+    return arc_length, data_on_curve, nothing
 end
 
 # Convert 3d unstructured data from a general mesh to 1d data at given curve.
 function unstructured_3d_to_1d_curve(nodes, data, curve, slice, point, nvisnodes)
-  # If no curve is defined, create a axis curve.
-  if curve === nothing
-    curve = axis_curve(nodes[1,:,:,:,:], nodes[2,:,:,:,:], nodes[3,:,:,:,:], slice, point, nvisnodes)
-  end
+    # If no curve is defined, create a axis curve.
+    if curve === nothing
+        curve = axis_curve(nodes[1, :, :, :, :], nodes[2, :, :, :, :],
+                           nodes[3, :, :, :, :], slice, point, nvisnodes)
+    end
 
-  # Set up data structure.
-  n_points_curve = size(curve, 2)
-  n_variables = size(data, 1)
-  data_on_curve = Array{Float64}(undef, n_points_curve, n_variables)
+    # Set up data structure.
+    n_points_curve = size(curve, 2)
+    n_variables = size(data, 1)
+    data_on_curve = Array{Float64}(undef, n_points_curve, n_variables)
 
-  # Iterate over every point on the curve and determine the solutions value at given point.
-  for i in 1:n_points_curve
-    @views data_on_curve[i, :] .= get_value_at_point(curve[:,i], nodes, data)
-  end
+    # Iterate over every point on the curve and determine the solutions value at given point.
+    for i in 1:n_points_curve
+        @views data_on_curve[i, :] .= get_value_at_point(curve[:, i], nodes, data)
+    end
 
-  mesh_vertices_x = nothing
+    mesh_vertices_x = nothing
 
-  return calc_arc_length(curve), data_on_curve, mesh_vertices_x
+    return calc_arc_length(curve), data_on_curve, mesh_vertices_x
 end
 
 # Check if the first 'amount'-many points can still form a valid tetrahedron.
-function is_valid_tetrahedron(amount, coordinates; tol=10^-4)
-  a = coordinates[:,1]; b = coordinates[:,2]; c = coordinates[:,3]; d = coordinates[:,4];
-  if amount == 2 # If two points are the same, then no tetrahedron can be formed.
-    return !(isapprox(a, b; atol=tol))
-  elseif amount == 3 # Check if three points are on the same line.
-    return !on_the_same_line(a, b, c; tol=tol)
-  elseif amount == 4 # Check if four points form a tetrahedron.
-    A = hcat(coordinates[1, :], coordinates[2, :], coordinates[3, :], SVector(1, 1, 1, 1))
-    return !isapprox(det(A), 0; atol=tol)
-  else # With one point a tetrahedron can always be formed.
-    return true
-  end
+function is_valid_tetrahedron(amount, coordinates; tol = 10^-4)
+    a = coordinates[:, 1]
+    b = coordinates[:, 2]
+    c = coordinates[:, 3]
+    d = coordinates[:, 4]
+    if amount == 2 # If two points are the same, then no tetrahedron can be formed.
+        return !(isapprox(a, b; atol = tol))
+    elseif amount == 3 # Check if three points are on the same line.
+        return !on_the_same_line(a, b, c; tol = tol)
+    elseif amount == 4 # Check if four points form a tetrahedron.
+        A = hcat(coordinates[1, :], coordinates[2, :], coordinates[3, :],
+                 SVector(1, 1, 1, 1))
+        return !isapprox(det(A), 0; atol = tol)
+    else # With one point a tetrahedron can always be formed.
+        return true
+    end
 end
 
 # Check if three given 3D-points are on the same line.
-function on_the_same_line(a, b, c; tol=10^-4)
-  # Calculate the intersection of the a-b-axis at x=0.
-  if b[1] == 0
-    intersect_a_b = b
-  else
-    intersect_a_b = a - b.*(a[1]/b[1])
-  end
-  # Calculate the intersection of the a-c-axis at x=0.
-  if c[1] == 0
-    intersect_a_c = c
-  else
-    intersect_a_c = a - c.*(a[1]/c[1])
-  end
-  return isapprox(intersect_a_b, intersect_a_c; atol=tol)
+function on_the_same_line(a, b, c; tol = 10^-4)
+    # Calculate the intersection of the a-b-axis at x=0.
+    if b[1] == 0
+        intersect_a_b = b
+    else
+        intersect_a_b = a - b .* (a[1] / b[1])
+    end
+    # Calculate the intersection of the a-c-axis at x=0.
+    if c[1] == 0
+        intersect_a_c = c
+    else
+        intersect_a_c = a - c .* (a[1] / c[1])
+    end
+    return isapprox(intersect_a_b, intersect_a_c; atol = tol)
 end
 
 # Interpolate from four corners of a tetrahedron to a single point.
-function tetrahedron_interpolation(x_coordinates_in, y_coordinates_in, z_coordinates_in, values_in, coordinate_out)
-  A = hcat(x_coordinates_in, y_coordinates_in, z_coordinates_in, SVector(1, 1, 1, 1))
-  c = A \ values_in
-  return c[1] * coordinate_out[1] + c[2] * coordinate_out[2] + c[3] * coordinate_out[3] + c[4]
+function tetrahedron_interpolation(x_coordinates_in, y_coordinates_in, z_coordinates_in,
+                                   values_in, coordinate_out)
+    A = hcat(x_coordinates_in, y_coordinates_in, z_coordinates_in, SVector(1, 1, 1, 1))
+    c = A \ values_in
+    return c[1] * coordinate_out[1] + c[2] * coordinate_out[2] +
+           c[3] * coordinate_out[3] + c[4]
 end
 
 # Calculate the distances from every entry in node to given point.
 function distances_from_single_point(nodes, point)
-  _, n_nodes, _, _, n_elements = size(nodes)
-  shifted_data = nodes.-point
-  distances = zeros(n_nodes, n_nodes, n_nodes, n_elements)
-
-  # Iterate over every entry.
-  for element in 1:n_elements
-    for x in 1:n_nodes
-      for y in 1:n_nodes
-        for z in 1:n_nodes
-          distances[x,y,z,element] = norm(shifted_data[:,x,y,z,element])
+    _, n_nodes, _, _, n_elements = size(nodes)
+    shifted_data = nodes .- point
+    distances = zeros(n_nodes, n_nodes, n_nodes, n_elements)
+
+    # Iterate over every entry.
+    for element in 1:n_elements
+        for x in 1:n_nodes
+            for y in 1:n_nodes
+                for z in 1:n_nodes
+                    distances[x, y, z, element] = norm(shifted_data[:, x, y, z,
+                                                                    element])
+                end
+            end
         end
-      end
     end
-  end
-  return distances
+    return distances
 end
 
 # Interpolate the data on given nodes to a single value at given point.
 function get_value_at_point(point, nodes, data)
-  # Set up ata structures.
-  n_variables, n_x_nodes, n_y_nodes, n_z_nodes, _ = size(data)
-  distances = distances_from_single_point(nodes, point)
-  maximum_distance = maximum(distances)
+    # Set up ata structures.
+    n_variables, n_x_nodes, n_y_nodes, n_z_nodes, _ = size(data)
+    distances = distances_from_single_point(nodes, point)
+    maximum_distance = maximum(distances)
+
+    coordinates_tetrahedron = Array{Float64, 2}(undef, 3, 4)
+    value_tetrahedron = Array{Float64}(undef, n_variables, 4)
+
+    index = argmin(distances)
 
-  coordinates_tetrahedron = Array{Float64, 2}(undef, 3, 4)
-  value_tetrahedron = Array{Float64}(undef, n_variables, 4)
+    # If the point sits exactly on a node, no interpolation is needed.
+    if nodes[:, index[1], index[2], index[3], index[4]] == point
+        return data[1, index[1], index[2], index[3], index[4]]
+    end
 
-  index = argmin(distances)
+    @views coordinates_tetrahedron[:, 1] = nodes[:, index[1], index[2], index[3],
+                                                 index[4]]
+    @views value_tetrahedron[:, 1] = data[:, index[1], index[2], index[3], index[4]]
+
+    # Restrict the interpolation to the closest element only.
+    closest_element = index[4]
+    @views element_distances = distances[:, :, :, closest_element]
+
+    # Find a tetrahedron, which is given by four corners, to interpolate from.
+    for i in 1:4
+        # Iterate until a valid tetrahedron is found.
+        while true
+            index = argmin(element_distances)
+            element_distances[index[1], index[2], index[3]] = maximum_distance
+
+            @views coordinates_tetrahedron[:, i] = nodes[:, index[1], index[2],
+                                                         index[3], closest_element]
+            @views value_tetrahedron[:, i] = data[:, index[1], index[2], index[3],
+                                                  closest_element]
+
+            # Look for another point if current tetrahedron is not valid.
+            if is_valid_tetrahedron(i, coordinates_tetrahedron)
+                break
+            end
+        end
+    end
 
-  # If the point sits exactly on a node, no interpolation is needed.
-  if nodes[:, index[1], index[2], index[3], index[4]] == point
-    return data[1, index[1], index[2], index[3], index[4]]
-  end
+    # Interpolate from tetrahedron to given point.
+    value_at_point = Array{Float64}(undef, n_variables)
+    for v in 1:n_variables
+        value_at_point[v] = tetrahedron_interpolation(coordinates_tetrahedron[1, :],
+                                                      coordinates_tetrahedron[2, :],
+                                                      coordinates_tetrahedron[3, :],
+                                                      value_tetrahedron[v, :], point)
+    end
 
-  @views coordinates_tetrahedron[:,1] = nodes[:, index[1], index[2], index[3], index[4]]
-  @views value_tetrahedron[:, 1] = data[:, index[1], index[2], index[3], index[4]]
+    return value_at_point
+end
 
-  # Restrict the interpolation to the closest element only.
-  closest_element = index[4]
-  @views element_distances = distances[:,:,:,closest_element]
+# Convert 3d unstructured data to 1d slice and interpolate them.
+function unstructured_3d_to_1d(original_nodes, unstructured_data, nvisnodes, slice,
+                               point)
+    if slice === :x
+        slice_dimension = 1
+        other_dimensions = [2, 3]
+    elseif slice === :y
+        slice_dimension = 2
+        other_dimensions = [1, 3]
+    elseif slice === :z
+        slice_dimension = 3
+        other_dimensions = [1, 2]
+    else
+        error("illegal dimension '$slice', supported dimensions are :x, :y and :z")
+    end
 
-  # Find a tetrahedron, which is given by four corners, to interpolate from.
-  for i in 1:4
-    # Iterate until a valid tetrahedron is found.
-    while true
-      index = argmin(element_distances)
-      element_distances[index[1], index[2], index[3]] = maximum_distance
+    # Set up data structures to store new 1D data.
+    @views new_unstructured_data = similar(unstructured_data[1, 1, ..])
+    @views temp_unstructured_data = similar(unstructured_data[1, ..])
+    @views new_nodes = similar(original_nodes[1, 1, 1, ..])
 
-      @views coordinates_tetrahedron[:,i] = nodes[:, index[1], index[2], index[3], closest_element]
-      @views value_tetrahedron[:, i] = data[:, index[1], index[2], index[3], closest_element]
+    n_nodes_in, _, _, n_elements, n_variables = size(unstructured_data)
+    nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in)
 
-      # Look for another point if current tetrahedron is not valid.
-      if is_valid_tetrahedron(i, coordinates_tetrahedron)
-        break
-      end
+    # Test if point lies in the domain.
+    lower_limit = original_nodes[1, 1, 1, 1, 1]
+    upper_limit = original_nodes[1, n_nodes_in, n_nodes_in, n_nodes_in, n_elements]
+
+    @assert length(point)>=3 "Point must be three-dimensional."
+    if prod(point[other_dimensions] .< lower_limit) ||
+       prod(point[other_dimensions] .> upper_limit)
+        error(string("Slice axis is outside of domain. ",
+                     " point[$other_dimensions]=$(point[other_dimensions]) must be between $lower_limit and $upper_limit"))
     end
-  end
 
-  # Interpolate from tetrahedron to given point.
-  value_at_point = Array{Float64}(undef, n_variables)
-  for v in 1:n_variables
-    value_at_point[v] = tetrahedron_interpolation(coordinates_tetrahedron[1, :], coordinates_tetrahedron[2, :], coordinates_tetrahedron[3, :], value_tetrahedron[v, :], point)
-  end
+    # Count the amount of new elements.
+    new_id = 0
 
-  return value_at_point
-end
+    # Permute dimensions so that the slice dimensions are always the in correct places for later use.
+    if slice === :x
+        original_nodes = permutedims(original_nodes, [1, 3, 4, 2, 5])
+        unstructured_data = permutedims(unstructured_data, [2, 3, 1, 4, 5])
+    elseif slice === :y
+        original_nodes = permutedims(original_nodes, [1, 2, 4, 3, 5])
+        unstructured_data = permutedims(unstructured_data, [1, 3, 2, 4, 5])
+    end
 
-# Convert 3d unstructured data to 1d slice and interpolate them.
-function unstructured_3d_to_1d(original_nodes, unstructured_data, nvisnodes, slice, point)
-
-  if slice === :x
-    slice_dimension = 1
-    other_dimensions = [2,3]
-  elseif slice === :y
-    slice_dimension = 2
-    other_dimensions = [1,3]
-  elseif slice === :z
-    slice_dimension = 3
-    other_dimensions = [1,2]
-  else
-    error("illegal dimension '$slice', supported dimensions are :x, :y and :z")
-  end
-
-  # Set up data structures to store new 1D data.
-  @views new_unstructured_data = similar(unstructured_data[1, 1, ..])
-  @views temp_unstructured_data = similar(unstructured_data[1, ..])
-  @views new_nodes = similar(original_nodes[1, 1, 1,..])
-
-  n_nodes_in, _, _, n_elements, n_variables = size(unstructured_data)
-  nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in)
-
-  # Test if point lies in the domain.
-  lower_limit = original_nodes[1, 1, 1, 1, 1]
-  upper_limit = original_nodes[1, n_nodes_in, n_nodes_in, n_nodes_in, n_elements]
-
-  @assert length(point) >= 3 "Point must be three-dimensional."
-  if prod(point[other_dimensions] .< lower_limit) || prod(point[other_dimensions] .> upper_limit)
-    error(string("Slice axis is outside of domain. ",
-        " point[$other_dimensions]=$(point[other_dimensions]) must be between $lower_limit and $upper_limit"))
-  end
-
-  # Count the amount of new elements.
-  new_id = 0
-
-  # Permute dimensions so that the slice dimensions are always the in correct places for later use.
-  if slice === :x
-    original_nodes = permutedims(original_nodes, [1, 3, 4, 2, 5])
-    unstructured_data = permutedims(unstructured_data, [2, 3, 1, 4, 5])
-  elseif slice === :y
-    original_nodes = permutedims(original_nodes, [1, 2, 4, 3, 5])
-    unstructured_data = permutedims(unstructured_data, [1, 3, 2, 4, 5])
-  end
-
-  # Iterate over all elements to find the ones that lie on the slice axis.
-  for element_id in 1:n_elements
-    min_coordinate = original_nodes[:, 1, 1, 1, element_id]
-    max_coordinate = original_nodes[:, n_nodes_in, n_nodes_in, n_nodes_in, element_id]
-    element_length = max_coordinate - min_coordinate
-
-    # Test if the element is on the slice axis. If not just continue with the next element.
-    if !((prod(min_coordinate[other_dimensions] .<= point[other_dimensions]) &&
-        prod(max_coordinate[other_dimensions] .> point[other_dimensions])) ||
-        (point[other_dimensions] == upper_limit && prod(max_coordinate[other_dimensions] .== upper_limit)))
-
-        continue
-    end
-
-    new_id += 1
-
-    # Construct vandermonde matrix for interpolation of each 2D element to a 1D element.
-    normalized_intercept =
-          (point[other_dimensions] .- min_coordinate[other_dimensions]) /
-          element_length[1] * 2 .- 1
-    vandermonde_i = polynomial_interpolation_matrix(nodes_in, normalized_intercept[1])
-    vandermonde_ii = polynomial_interpolation_matrix(nodes_in, normalized_intercept[2])
-
-    # Interpolate to each node of new 1D element.
-    for v in 1:n_variables
-      for i in 1:n_nodes_in
-        for ii in 1:n_nodes_in
-          temp_unstructured_data[i, ii, new_id, v] = (vandermonde_ii*unstructured_data[ii, :, i, element_id, v])[1]
+    # Iterate over all elements to find the ones that lie on the slice axis.
+    for element_id in 1:n_elements
+        min_coordinate = original_nodes[:, 1, 1, 1, element_id]
+        max_coordinate = original_nodes[:, n_nodes_in, n_nodes_in, n_nodes_in,
+                                        element_id]
+        element_length = max_coordinate - min_coordinate
+
+        # Test if the element is on the slice axis. If not just continue with the next element.
+        if !((prod(min_coordinate[other_dimensions] .<= point[other_dimensions]) &&
+              prod(max_coordinate[other_dimensions] .> point[other_dimensions])) ||
+             (point[other_dimensions] == upper_limit &&
+              prod(max_coordinate[other_dimensions] .== upper_limit)))
+            continue
         end
-        new_unstructured_data[i, new_id, v] = (vandermonde_i*temp_unstructured_data[i, :, new_id, v])[1]
-      end
-    end
 
-    new_nodes[:, new_id] = original_nodes[slice_dimension, 1, 1, :, element_id]
-  end
+        new_id += 1
+
+        # Construct vandermonde matrix for interpolation of each 2D element to a 1D element.
+        normalized_intercept = (point[other_dimensions] .-
+                                min_coordinate[other_dimensions]) /
+                               element_length[1] * 2 .- 1
+        vandermonde_i = polynomial_interpolation_matrix(nodes_in,
+                                                        normalized_intercept[1])
+        vandermonde_ii = polynomial_interpolation_matrix(nodes_in,
+                                                         normalized_intercept[2])
+
+        # Interpolate to each node of new 1D element.
+        for v in 1:n_variables
+            for i in 1:n_nodes_in
+                for ii in 1:n_nodes_in
+                    temp_unstructured_data[i, ii, new_id, v] = (vandermonde_ii * unstructured_data[ii,
+                                                                                                   :,
+                                                                                                   i,
+                                                                                                   element_id,
+                                                                                                   v])[1]
+                end
+                new_unstructured_data[i, new_id, v] = (vandermonde_i * temp_unstructured_data[i,
+                                                                                              :,
+                                                                                              new_id,
+                                                                                              v])[1]
+            end
+        end
+
+        new_nodes[:, new_id] = original_nodes[slice_dimension, 1, 1, :, element_id]
+    end
 
-  return get_data_1d(reshape(new_nodes[:, 1:new_id], 1, n_nodes_in, new_id), new_unstructured_data[:, 1:new_id, :], nvisnodes)
+    return get_data_1d(reshape(new_nodes[:, 1:new_id], 1, n_nodes_in, new_id),
+                       new_unstructured_data[:, 1:new_id, :], nvisnodes)
 end
 
 # Interpolate unstructured DG data to structured data (cell-centered)
@@ -1082,264 +1174,270 @@ end
 #       thus be changed in future releases.
 function unstructured2structured(unstructured_data, normalized_coordinates,
                                  levels, resolution, nvisnodes_per_level)
-  # Extract data shape information
-  n_nodes_in, _, n_elements, n_variables = size(unstructured_data)
-
-  # Get node coordinates for DG locations on reference element
-  nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in)
-
-  # Calculate interpolation vandermonde matrices for each level
-  max_level = length(nvisnodes_per_level) - 1
-  vandermonde_per_level = []
-  for l in 0:max_level
-    n_nodes_out = nvisnodes_per_level[l + 1]
-    dx = 2 / n_nodes_out
-    nodes_out = collect(range(-1 + dx/2, 1 - dx/2, length=n_nodes_out))
-    push!(vandermonde_per_level, polynomial_interpolation_matrix(nodes_in, nodes_out))
-  end
-
-  # For each element, calculate index position at which to insert data in global data structure
-  lower_left_index = element2index(normalized_coordinates, levels, resolution, nvisnodes_per_level)
-
-  # Create output data structure
-  structured = [Matrix{Float64}(undef, resolution, resolution) for _ in 1:n_variables]
+    # Extract data shape information
+    n_nodes_in, _, n_elements, n_variables = size(unstructured_data)
+
+    # Get node coordinates for DG locations on reference element
+    nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in)
+
+    # Calculate interpolation vandermonde matrices for each level
+    max_level = length(nvisnodes_per_level) - 1
+    vandermonde_per_level = []
+    for l in 0:max_level
+        n_nodes_out = nvisnodes_per_level[l + 1]
+        dx = 2 / n_nodes_out
+        nodes_out = collect(range(-1 + dx / 2, 1 - dx / 2, length = n_nodes_out))
+        push!(vandermonde_per_level,
+              polynomial_interpolation_matrix(nodes_in, nodes_out))
+    end
 
-  # For each variable, interpolate element data and store to global data structure
-  for v in 1:n_variables
-    # Reshape data array for use in multiply_dimensionwise function
-    reshaped_data = reshape(unstructured_data[:, :, :, v], 1, n_nodes_in, n_nodes_in, n_elements)
+    # For each element, calculate index position at which to insert data in global data structure
+    lower_left_index = element2index(normalized_coordinates, levels, resolution,
+                                     nvisnodes_per_level)
 
-    for element_id in 1:n_elements
-      # Extract level for convenience
-      level = levels[element_id]
+    # Create output data structure
+    structured = [Matrix{Float64}(undef, resolution, resolution) for _ in 1:n_variables]
 
-      # Determine target indices
-      n_nodes_out = nvisnodes_per_level[level + 1]
-      first = lower_left_index[:, element_id]
-      last = first .+ (n_nodes_out - 1)
-
-      # Interpolate data
-      vandermonde = vandermonde_per_level[level + 1]
-      structured[v][first[1]:last[1], first[2]:last[2]] .= (
-          reshape(multiply_dimensionwise(vandermonde, reshaped_data[:, :, :, element_id]),
-                  n_nodes_out, n_nodes_out))
+    # For each variable, interpolate element data and store to global data structure
+    for v in 1:n_variables
+        # Reshape data array for use in multiply_dimensionwise function
+        reshaped_data = reshape(unstructured_data[:, :, :, v], 1, n_nodes_in,
+                                n_nodes_in, n_elements)
+
+        for element_id in 1:n_elements
+            # Extract level for convenience
+            level = levels[element_id]
+
+            # Determine target indices
+            n_nodes_out = nvisnodes_per_level[level + 1]
+            first = lower_left_index[:, element_id]
+            last = first .+ (n_nodes_out - 1)
+
+            # Interpolate data
+            vandermonde = vandermonde_per_level[level + 1]
+            structured[v][first[1]:last[1], first[2]:last[2]] .= (reshape(multiply_dimensionwise(vandermonde,
+                                                                                                 reshaped_data[:,
+                                                                                                               :,
+                                                                                                               :,
+                                                                                                               element_id]),
+                                                                          n_nodes_out,
+                                                                          n_nodes_out))
+        end
     end
-  end
 
-  return structured
+    return structured
 end
 
-
 # For a given normalized element coordinate, return the index of its lower left
 # contribution to the global data structure
 #
 # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may
 #       thus be changed in future releases.
 function element2index(normalized_coordinates, levels, resolution, nvisnodes_per_level)
-  @assert size(normalized_coordinates, 1) == 2 "only works in 2D"
-
-  n_elements = length(levels)
-
-  # First, determine lower left coordinate for all cells
-  dx = 2 / resolution
-  ndim = 2
-  lower_left_coordinate = Array{Float64}(undef, ndim, n_elements)
-  for element_id in 1:n_elements
-    nvisnodes = nvisnodes_per_level[levels[element_id] + 1]
-    lower_left_coordinate[1, element_id] = (
-        normalized_coordinates[1, element_id] - (nvisnodes - 1)/2 * dx)
-    lower_left_coordinate[2, element_id] = (
-        normalized_coordinates[2, element_id] - (nvisnodes - 1)/2 * dx)
-  end
-
-  # Then, convert coordinate to global index
-  indices = coordinate2index(lower_left_coordinate, resolution)
-
-  return indices
-end
+    @assert size(normalized_coordinates, 1)==2 "only works in 2D"
 
+    n_elements = length(levels)
+
+    # First, determine lower left coordinate for all cells
+    dx = 2 / resolution
+    ndim = 2
+    lower_left_coordinate = Array{Float64}(undef, ndim, n_elements)
+    for element_id in 1:n_elements
+        nvisnodes = nvisnodes_per_level[levels[element_id] + 1]
+        lower_left_coordinate[1, element_id] = (normalized_coordinates[1, element_id] -
+                                                (nvisnodes - 1) / 2 * dx)
+        lower_left_coordinate[2, element_id] = (normalized_coordinates[2, element_id] -
+                                                (nvisnodes - 1) / 2 * dx)
+    end
+
+    # Then, convert coordinate to global index
+    indices = coordinate2index(lower_left_coordinate, resolution)
+
+    return indices
+end
 
 # Find 2D array index for a 2-tuple of normalized, cell-centered coordinates (i.e., in [-1,1])
 #
 # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may
 #       thus be changed in future releases.
 function coordinate2index(coordinate, resolution::Integer)
-  # Calculate 1D normalized coordinates
-  dx = 2/resolution
-  mesh_coordinates = collect(range(-1 + dx/2, 1 - dx/2, length=resolution))
-
-  # Find index
-  id_x = searchsortedfirst.(Ref(mesh_coordinates), coordinate[1, :], lt=(x,y)->x .< y .- dx/2)
-  id_y = searchsortedfirst.(Ref(mesh_coordinates), coordinate[2, :], lt=(x,y)->x .< y .- dx/2)
-  return transpose(hcat(id_x, id_y))
+    # Calculate 1D normalized coordinates
+    dx = 2 / resolution
+    mesh_coordinates = collect(range(-1 + dx / 2, 1 - dx / 2, length = resolution))
+
+    # Find index
+    id_x = searchsortedfirst.(Ref(mesh_coordinates), coordinate[1, :],
+                              lt = (x, y) -> x .< y .- dx / 2)
+    id_y = searchsortedfirst.(Ref(mesh_coordinates), coordinate[2, :],
+                              lt = (x, y) -> x .< y .- dx / 2)
+    return transpose(hcat(id_x, id_y))
 end
 
-
 # Calculate the vertices for each mesh cell such that it can be visualized as a closed box
 #
 # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may
 #       thus be changed in future releases.
 function calc_vertices(coordinates, levels, length_level_0)
-  ndim = size(coordinates, 1)
-  @assert ndim == 2 "only works in 2D"
-
-  # Initialize output arrays
-  n_elements = length(levels)
-  n_points_per_element = 2^ndim+2
-  x = Vector{Float64}(undef, n_points_per_element*n_elements)
-  y = Vector{Float64}(undef, n_points_per_element*n_elements)
-
-  # Calculate vertices for all coordinates at once
-  for element_id in 1:n_elements
-    length = length_level_0 / 2^levels[element_id]
-    index = n_points_per_element*(element_id-1)
-    x[index+1] = coordinates[1, element_id] - 1/2 * length
-    x[index+2] = coordinates[1, element_id] + 1/2 * length
-    x[index+3] = coordinates[1, element_id] + 1/2 * length
-    x[index+4] = coordinates[1, element_id] - 1/2 * length
-    x[index+5] = coordinates[1, element_id] - 1/2 * length
-    x[index+6] = NaN
-
-    y[index+1] = coordinates[2, element_id] - 1/2 * length
-    y[index+2] = coordinates[2, element_id] - 1/2 * length
-    y[index+3] = coordinates[2, element_id] + 1/2 * length
-    y[index+4] = coordinates[2, element_id] + 1/2 * length
-    y[index+5] = coordinates[2, element_id] - 1/2 * length
-    y[index+6] = NaN
-  end
-
-  return x, y
-end
+    ndim = size(coordinates, 1)
+    @assert ndim==2 "only works in 2D"
 
+    # Initialize output arrays
+    n_elements = length(levels)
+    n_points_per_element = 2^ndim + 2
+    x = Vector{Float64}(undef, n_points_per_element * n_elements)
+    y = Vector{Float64}(undef, n_points_per_element * n_elements)
+
+    # Calculate vertices for all coordinates at once
+    for element_id in 1:n_elements
+        length = length_level_0 / 2^levels[element_id]
+        index = n_points_per_element * (element_id - 1)
+        x[index + 1] = coordinates[1, element_id] - 1 / 2 * length
+        x[index + 2] = coordinates[1, element_id] + 1 / 2 * length
+        x[index + 3] = coordinates[1, element_id] + 1 / 2 * length
+        x[index + 4] = coordinates[1, element_id] - 1 / 2 * length
+        x[index + 5] = coordinates[1, element_id] - 1 / 2 * length
+        x[index + 6] = NaN
+
+        y[index + 1] = coordinates[2, element_id] - 1 / 2 * length
+        y[index + 2] = coordinates[2, element_id] - 1 / 2 * length
+        y[index + 3] = coordinates[2, element_id] + 1 / 2 * length
+        y[index + 4] = coordinates[2, element_id] + 1 / 2 * length
+        y[index + 5] = coordinates[2, element_id] - 1 / 2 * length
+        y[index + 6] = NaN
+    end
+
+    return x, y
+end
 
 # Calculate the vertices to plot each grid line for StructuredMesh
 #
 # Note: This is a low-level function that is not considered as part of Trixi.jl's interface and may
 #       thus be changed in future releases.
 function calc_vertices(node_coordinates, mesh)
-  @unpack cells_per_dimension = mesh
-  @assert size(node_coordinates, 1) == 2 "only works in 2D"
-
-  linear_indices = LinearIndices(size(mesh))
-
-  # Initialize output arrays
-  n_lines = sum(cells_per_dimension) + 2
-  max_length = maximum(cells_per_dimension)
-  n_nodes = size(node_coordinates, 2)
-
-  # Create output as two matrices `x` and `y`, each holding the node locations for each of the `n_lines` grid lines
-  # The # of rows in the matrices must be sufficient to store the longest dimension (`max_length`),
-  # and for each the node locations without doubling the corner nodes (`n_nodes-1`), plus the final node (`+1`)
-  # Rely on Plots.jl to ignore `NaN`s (i.e., they are not plotted) to handle shorter lines
-  x = fill(NaN, max_length*(n_nodes-1)+1, n_lines)
-  y = fill(NaN, max_length*(n_nodes-1)+1, n_lines)
-
-  line_index = 1
-  # Lines in x-direction
-  # Bottom boundary
-  i = 1
-  for cell_x in axes(mesh, 1)
-    for node in 1:(n_nodes-1)
-      x[i, line_index] = node_coordinates[1, node, 1, linear_indices[cell_x, 1]]
-      y[i, line_index] = node_coordinates[2, node, 1, linear_indices[cell_x, 1]]
-
-      i += 1
-    end
-  end
-  # Last point on bottom boundary
-  x[i, line_index] = node_coordinates[1, end, 1, linear_indices[end, 1]]
-  y[i, line_index] = node_coordinates[2, end, 1, linear_indices[end, 1]]
-
-  # Other lines in x-direction
-  line_index += 1
-  for cell_y in axes(mesh, 2)
+    @unpack cells_per_dimension = mesh
+    @assert size(node_coordinates, 1)==2 "only works in 2D"
+
+    linear_indices = LinearIndices(size(mesh))
+
+    # Initialize output arrays
+    n_lines = sum(cells_per_dimension) + 2
+    max_length = maximum(cells_per_dimension)
+    n_nodes = size(node_coordinates, 2)
+
+    # Create output as two matrices `x` and `y`, each holding the node locations for each of the `n_lines` grid lines
+    # The # of rows in the matrices must be sufficient to store the longest dimension (`max_length`),
+    # and for each the node locations without doubling the corner nodes (`n_nodes-1`), plus the final node (`+1`)
+    # Rely on Plots.jl to ignore `NaN`s (i.e., they are not plotted) to handle shorter lines
+    x = fill(NaN, max_length * (n_nodes - 1) + 1, n_lines)
+    y = fill(NaN, max_length * (n_nodes - 1) + 1, n_lines)
+
+    line_index = 1
+    # Lines in x-direction
+    # Bottom boundary
     i = 1
     for cell_x in axes(mesh, 1)
-      for node in 1:(n_nodes-1)
-        x[i, line_index] = node_coordinates[1, node, end, linear_indices[cell_x, cell_y]]
-        y[i, line_index] = node_coordinates[2, node, end, linear_indices[cell_x, cell_y]]
+        for node in 1:(n_nodes - 1)
+            x[i, line_index] = node_coordinates[1, node, 1, linear_indices[cell_x, 1]]
+            y[i, line_index] = node_coordinates[2, node, 1, linear_indices[cell_x, 1]]
 
-        i += 1
-      end
+            i += 1
+        end
     end
-    # Last point on line
-    x[i, line_index] = node_coordinates[1, end, end, linear_indices[end, cell_y]]
-    y[i, line_index] = node_coordinates[2, end, end, linear_indices[end, cell_y]]
+    # Last point on bottom boundary
+    x[i, line_index] = node_coordinates[1, end, 1, linear_indices[end, 1]]
+    y[i, line_index] = node_coordinates[2, end, 1, linear_indices[end, 1]]
 
+    # Other lines in x-direction
     line_index += 1
-  end
-
-
-  # Lines in y-direction
-  # Left boundary
-  i = 1
-  for cell_y in axes(mesh, 2)
-    for node in 1:(n_nodes-1)
-      x[i, line_index] = node_coordinates[1, 1, node, linear_indices[1, cell_y]]
-      y[i, line_index] = node_coordinates[2, 1, node, linear_indices[1, cell_y]]
+    for cell_y in axes(mesh, 2)
+        i = 1
+        for cell_x in axes(mesh, 1)
+            for node in 1:(n_nodes - 1)
+                x[i, line_index] = node_coordinates[1, node, end,
+                                                    linear_indices[cell_x, cell_y]]
+                y[i, line_index] = node_coordinates[2, node, end,
+                                                    linear_indices[cell_x, cell_y]]
+
+                i += 1
+            end
+        end
+        # Last point on line
+        x[i, line_index] = node_coordinates[1, end, end, linear_indices[end, cell_y]]
+        y[i, line_index] = node_coordinates[2, end, end, linear_indices[end, cell_y]]
 
-      i += 1
+        line_index += 1
     end
-  end
-  # Last point on left boundary
-  x[i, line_index] = node_coordinates[1, 1, end, linear_indices[1, end]]
-  y[i, line_index] = node_coordinates[2, 1, end, linear_indices[1, end]]
 
-  # Other lines in y-direction
-  line_index +=1
-  for cell_x in axes(mesh, 1)
+    # Lines in y-direction
+    # Left boundary
     i = 1
     for cell_y in axes(mesh, 2)
-      for node in 1:(n_nodes-1)
-        x[i, line_index] = node_coordinates[1, end, node, linear_indices[cell_x, cell_y]]
-        y[i, line_index] = node_coordinates[2, end, node, linear_indices[cell_x, cell_y]]
+        for node in 1:(n_nodes - 1)
+            x[i, line_index] = node_coordinates[1, 1, node, linear_indices[1, cell_y]]
+            y[i, line_index] = node_coordinates[2, 1, node, linear_indices[1, cell_y]]
 
-        i += 1
-      end
+            i += 1
+        end
     end
-    # Last point on line
-    x[i, line_index] = node_coordinates[1, end, end, linear_indices[cell_x, end]]
-    y[i, line_index] = node_coordinates[2, end, end, linear_indices[cell_x, end]]
+    # Last point on left boundary
+    x[i, line_index] = node_coordinates[1, 1, end, linear_indices[1, end]]
+    y[i, line_index] = node_coordinates[2, 1, end, linear_indices[1, end]]
 
+    # Other lines in y-direction
     line_index += 1
-  end
+    for cell_x in axes(mesh, 1)
+        i = 1
+        for cell_y in axes(mesh, 2)
+            for node in 1:(n_nodes - 1)
+                x[i, line_index] = node_coordinates[1, end, node,
+                                                    linear_indices[cell_x, cell_y]]
+                y[i, line_index] = node_coordinates[2, end, node,
+                                                    linear_indices[cell_x, cell_y]]
+
+                i += 1
+            end
+        end
+        # Last point on line
+        x[i, line_index] = node_coordinates[1, end, end, linear_indices[cell_x, end]]
+        y[i, line_index] = node_coordinates[2, end, end, linear_indices[cell_x, end]]
 
-  return x, y
+        line_index += 1
+    end
+
+    return x, y
 end
 
 # Convert `slice` to orientations (1 -> `x`, 2 -> `y`, 3 -> `z`) for the two axes in a 2D plot
 function _get_orientations(mesh, slice)
-  if ndims(mesh) == 2 || (ndims(mesh) == 3 && slice === :xy)
-    orientation_x = 1
-    orientation_y = 2
-  elseif ndims(mesh) == 3 && slice === :xz
-    orientation_x = 1
-    orientation_y = 3
-  elseif ndims(mesh) == 3 && slice === :yz
-    orientation_x = 2
-    orientation_y = 3
-  else
-    orientation_x = 0
-    orientation_y = 0
-  end
-  return orientation_x, orientation_y
+    if ndims(mesh) == 2 || (ndims(mesh) == 3 && slice === :xy)
+        orientation_x = 1
+        orientation_y = 2
+    elseif ndims(mesh) == 3 && slice === :xz
+        orientation_x = 1
+        orientation_y = 3
+    elseif ndims(mesh) == 3 && slice === :yz
+        orientation_x = 2
+        orientation_y = 3
+    else
+        orientation_x = 0
+        orientation_y = 0
+    end
+    return orientation_x, orientation_y
 end
 
-
 # Convert `orientation` into a guide label (see also `_get_orientations`)
 function _get_guide(orientation::Integer)
-  if orientation == 1
-    return "\$x\$"
-  elseif orientation == 2
-    return "\$y\$"
-  elseif orientation == 3
-    return "\$z\$"
-  else
-    return ""
-  end
+    if orientation == 1
+        return "\$x\$"
+    elseif orientation == 2
+        return "\$y\$"
+    elseif orientation == 3
+        return "\$z\$"
+    else
+        return ""
+    end
 end
 
-
 #   plotting_interpolation_matrix(dg; kwargs...)
 #
 # Interpolation matrix which maps discretization nodes to a set of plotting nodes.
@@ -1356,121 +1454,125 @@ end
 # to define a multi-dimensional interpolation matrix later.
 plotting_interpolation_matrix(dg; kwargs...) = I(length(dg.basis.nodes))
 
-function face_plotting_interpolation_matrix(dg::DGSEM; nvisnodes=2*length(dg.basis.nodes))
-  return polynomial_interpolation_matrix(dg.basis.nodes, LinRange(-1, 1, nvisnodes))
+function face_plotting_interpolation_matrix(dg::DGSEM;
+                                            nvisnodes = 2 * length(dg.basis.nodes))
+    return polynomial_interpolation_matrix(dg.basis.nodes, LinRange(-1, 1, nvisnodes))
 end
 
-function plotting_interpolation_matrix(dg::DGSEM; nvisnodes=2*length(dg.basis.nodes))
-  Vp1D = polynomial_interpolation_matrix(dg.basis.nodes, LinRange(-1, 1, nvisnodes))
-  # For quadrilateral elements, interpolation to plotting nodes involves applying a 1D interpolation
-  # operator to each line of nodes. This is equivalent to multiplying the vector containing all node
-  # node coordinates on an element by a Kronecker product of the 1D interpolation operator (e.g., a
-  # multi-dimensional interpolation operator).
-  return kron(Vp1D, Vp1D)
+function plotting_interpolation_matrix(dg::DGSEM;
+                                       nvisnodes = 2 * length(dg.basis.nodes))
+    Vp1D = polynomial_interpolation_matrix(dg.basis.nodes, LinRange(-1, 1, nvisnodes))
+    # For quadrilateral elements, interpolation to plotting nodes involves applying a 1D interpolation
+    # operator to each line of nodes. This is equivalent to multiplying the vector containing all node
+    # node coordinates on an element by a Kronecker product of the 1D interpolation operator (e.g., a
+    # multi-dimensional interpolation operator).
+    return kron(Vp1D, Vp1D)
 end
 
 function reference_node_coordinates_2d(dg::DGSEM)
-  @unpack nodes = dg.basis
-  r = vec([nodes[i] for i in eachnode(dg), j in eachnode(dg)])
-  s = vec([nodes[j] for i in eachnode(dg), j in eachnode(dg)])
-  return r, s
+    @unpack nodes = dg.basis
+    r = vec([nodes[i] for i in eachnode(dg), j in eachnode(dg)])
+    s = vec([nodes[j] for i in eachnode(dg), j in eachnode(dg)])
+    return r, s
 end
 
-
-
 # Find element and triangle ids containing coordinates given as a matrix [ndims, npoints]
 function get_ids_by_coordinates!(ids, coordinates, pd)
-  if length(ids) != 2 * size(coordinates, 2)
-    throw(DimensionMismatch("storage length for element ids does not match the number of coordinates"))
-  end
+    if length(ids) != 2 * size(coordinates, 2)
+        throw(DimensionMismatch("storage length for element ids does not match the number of coordinates"))
+    end
 
-  n_coordinates = size(coordinates, 2)
+    n_coordinates = size(coordinates, 2)
 
-  for index in 1:n_coordinates
-    ids[index, :] .= find_element(coordinates[:, index], pd)
-  end
+    for index in 1:n_coordinates
+        ids[index, :] .= find_element(coordinates[:, index], pd)
+    end
 
-  return ids
+    return ids
 end
 
 # Find the ids of elements and triangles containing given coordinates by using the triangulation in 'pd'.
 function get_ids_by_coordinates(coordinates, pd)
-  ids = Matrix(undef, size(coordinates, 2), 2)
-  get_ids_by_coordinates!(ids, coordinates, pd)
-  return ids
+    ids = Matrix(undef, size(coordinates, 2), 2)
+    get_ids_by_coordinates!(ids, coordinates, pd)
+    return ids
 end
 
 # Check if given 'point' is inside the triangle with corners corresponding to the coordinates of x and y.
 function is_in_triangle(point, x, y)
-  a = SVector(x[1], y[1]); b = SVector(x[2], y[2]); c = SVector(x[3], y[3])
-  return is_on_same_side(point, a, b, c) && is_on_same_side(point, b, c, a) && is_on_same_side(point, c, a, b)
+    a = SVector(x[1], y[1])
+    b = SVector(x[2], y[2])
+    c = SVector(x[3], y[3])
+    return is_on_same_side(point, a, b, c) && is_on_same_side(point, b, c, a) &&
+           is_on_same_side(point, c, a, b)
 end
 
 # Create an axis through x and y to then check if 'point' is on the same side of the axis as z.
 function is_on_same_side(point, x, y, z)
-  if (y[1] - x[1]) == 0
-    return (point[1] - x[1]) * (z[1] - x[1]) >= 0
-  else
-    a = (y[2] - x[2]) / (y[1] - x[1])
-    b = x[2] - a * x[1]
-    return (z[2] - a * z[1] - b) * (point[2] - a * point[1] - b) >= 0
-  end
+    if (y[1] - x[1]) == 0
+        return (point[1] - x[1]) * (z[1] - x[1]) >= 0
+    else
+        a = (y[2] - x[2]) / (y[1] - x[1])
+        b = x[2] - a * x[1]
+        return (z[2] - a * z[1] - b) * (point[2] - a * point[1] - b) >= 0
+    end
 end
 
 # For a given 'point', return the id of the element it is contained in in; if not found return 0.
 function find_element(point, pd)
-  n_tri = size(pd.t, 1)
-  n_elements = size(pd.x, 2)
-
-  # Iterate over all elements.
-  for element in 1:n_elements
-    # Iterate over all triangles in given element.
-    for tri in 1:n_tri
-      if is_in_triangle(point, pd.x[pd.t[tri, :], element], pd.y[pd.t[tri, :], element])
-        return SVector(element, tri)
-      end
-    end
-  end
+    n_tri = size(pd.t, 1)
+    n_elements = size(pd.x, 2)
+
+    # Iterate over all elements.
+    for element in 1:n_elements
+        # Iterate over all triangles in given element.
+        for tri in 1:n_tri
+            if is_in_triangle(point, pd.x[pd.t[tri, :], element],
+                              pd.y[pd.t[tri, :], element])
+                return SVector(element, tri)
+            end
+        end
+    end
 end
 
 # Interpolate from three corners of a triangle to a single point.
-function triangle_interpolation(x_coordinates_in, y_coordinates_in, values_in, coordinate_out)
-  A = hcat(x_coordinates_in, y_coordinates_in, SVector(1, 1, 1))
-  c = A \ values_in
-  return c[1] * coordinate_out[1] + c[2] * coordinate_out[2] + c[3]
+function triangle_interpolation(x_coordinates_in, y_coordinates_in, values_in,
+                                coordinate_out)
+    A = hcat(x_coordinates_in, y_coordinates_in, SVector(1, 1, 1))
+    c = A \ values_in
+    return c[1] * coordinate_out[1] + c[2] * coordinate_out[2] + c[3]
 end
 
 # Create an axis.
 function axis_curve(nodes_x, nodes_y, nodes_z, slice, point, n_points)
-  if n_points == nothing
-    n_points = 64
-  end
-  dimensions = length(point)
-  curve = zeros(dimensions, n_points)
-  if slice == :x
-    xmin, xmax = extrema(nodes_x)
-    curve[1, :] .= range(xmin, xmax, length = n_points)
-    curve[2, :] .= point[2]
-    if dimensions === 3
-      curve[3, :] .= point[3]
-    end
-  elseif slice == :y
-    ymin, ymax = extrema(nodes_y)
-    curve[1, :] .= point[1]
-    curve[2, :] .= range(ymin, ymax, length = n_points)
-    if dimensions === 3
-      curve[3, :] .= point[3]
-    end
-  elseif slice == :z
-    zmin, zmax = extrema(nodes_z)
-    curve[1, :] .= point[1]
-    curve[2, :] .= point[2]
-    curve[3, :] .= range(zmin, zmax, length = n_points)
-  else
-    @assert false "Input for 'slice' is not supported here."
-  end
-
-  return curve
-end
+    if n_points == nothing
+        n_points = 64
+    end
+    dimensions = length(point)
+    curve = zeros(dimensions, n_points)
+    if slice == :x
+        xmin, xmax = extrema(nodes_x)
+        curve[1, :] .= range(xmin, xmax, length = n_points)
+        curve[2, :] .= point[2]
+        if dimensions === 3
+            curve[3, :] .= point[3]
+        end
+    elseif slice == :y
+        ymin, ymax = extrema(nodes_y)
+        curve[1, :] .= point[1]
+        curve[2, :] .= range(ymin, ymax, length = n_points)
+        if dimensions === 3
+            curve[3, :] .= point[3]
+        end
+    elseif slice == :z
+        zmin, zmax = extrema(nodes_z)
+        curve[1, :] .= point[1]
+        curve[2, :] .= point[2]
+        curve[3, :] .= range(zmin, zmax, length = n_points)
+    else
+        @assert false "Input for 'slice' is not supported here."
+    end
 
+    return curve
+end
 end # @muladd
diff --git a/src/visualization/visualization.jl b/src/visualization/visualization.jl
index 5d7795571fa..94d2532cba3 100644
--- a/src/visualization/visualization.jl
+++ b/src/visualization/visualization.jl
@@ -3,6 +3,7 @@
 # we need to opt-in explicitly.
 # See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
 @muladd begin
+#! format: noindent
 
 include("types.jl")
 include("utilities.jl")
@@ -12,5 +13,4 @@ include("recipes_plots.jl")
 # TrixiMakieExt package extension or by the Makie-specific code loaded by Requires.jl
 function iplot end
 function iplot! end
-
 end # @muladd
diff --git a/utils/build_sysimage.jl b/utils/build_sysimage.jl
index ff99cc872fd..69bce54b269 100755
--- a/utils/build_sysimage.jl
+++ b/utils/build_sysimage.jl
@@ -45,7 +45,7 @@ start_time = time()
 
 # Create a temporary environment to install all necessary packages without modifying
 # the users environment
-Pkg.activate(temp=true)
+Pkg.activate(temp = true)
 
 # Add package compiler, Trixi.jl, and additional packages that shall be built into the sysimage
 Pkg.add("PackageCompiler")
@@ -56,21 +56,22 @@ Pkg.add("Trixi")
 # of the current temporary project if we do not want to bundle Trixi.jl into the sysimage.
 packages = Symbol[:OrdinaryDiffEq, :Plots, :Trixi2Vtk]
 if lowercase(get(ENV, "TRIXI_SYSIMAGE_INCLUDE_TRIXI", "no")) in ("yes", "1", "true")
-  # If Trixi.jl is to be included, just add it to the list
-  push!(packages, :Trixi)
+    # If Trixi.jl is to be included, just add it to the list
+    push!(packages, :Trixi)
 else
-  # Otherwise, figure out all direct dependencies and add them instead
-  # Inspired by: https://github.com/CliMA/ClimateMachine.jl/blob/8c57fb55acc20ee824ea37478395a7cb07c5a93c/.dev/systemimage/climate_machine_image.jl
-  trixi_uuid = Base.UUID("a7f1ee26-1774-49b1-8366-f1abc58fbfcb")
-  append!(packages, Symbol[Symbol(v) for v in keys(Pkg.dependencies()[trixi_uuid].dependencies)])
+    # Otherwise, figure out all direct dependencies and add them instead
+    # Inspired by: https://github.com/CliMA/ClimateMachine.jl/blob/8c57fb55acc20ee824ea37478395a7cb07c5a93c/.dev/systemimage/climate_machine_image.jl
+    trixi_uuid = Base.UUID("a7f1ee26-1774-49b1-8366-f1abc58fbfcb")
+    append!(packages,
+            Symbol[Symbol(v) for v in keys(Pkg.dependencies()[trixi_uuid].dependencies)])
 end
 
 map(Pkg.add ∘ string, packages)
 Pkg.precompile()
 
-
 # Collect remaining arguments
-sysimage_path = get(ENV, "TRIXI_SYSIMAGE_PATH", joinpath(@__DIR__, "TrixiSysimage." * Libdl.dlext))
+sysimage_path = get(ENV, "TRIXI_SYSIMAGE_PATH",
+                    joinpath(@__DIR__, "TrixiSysimage." * Libdl.dlext))
 precompile_execution_file = joinpath(@__DIR__, "precompile_execution_file.jl")
 
 # Create system image
@@ -79,12 +80,10 @@ precompile_execution_file = joinpath(@__DIR__, "precompile_execution_file.jl")
 @info "Precompile execution file: $precompile_execution_file"
 
 using PackageCompiler
-PackageCompiler.create_sysimage(
-  packages,
-  sysimage_path=sysimage_path,
-  precompile_execution_file=precompile_execution_file,
-  cpu_target=PackageCompiler.default_app_cpu_target()
-)
+PackageCompiler.create_sysimage(packages,
+                                sysimage_path = sysimage_path,
+                                precompile_execution_file = precompile_execution_file,
+                                cpu_target = PackageCompiler.default_app_cpu_target())
 
 duration = time() - start_time
 @info "Done. Created sysimage in $duration seconds."
diff --git a/utils/euler-manufactured.jl b/utils/euler-manufactured.jl
index 6b3e04d2fc6..7e19c4e64a6 100644
--- a/utils/euler-manufactured.jl
+++ b/utils/euler-manufactured.jl
@@ -21,7 +21,6 @@ julia> euler3d()
 using Reduce
 @force using Reduce.Algebra
 
-
 # Original Reduce code (CompressibleEulerEquations 1D)
 #=
 clear(γ,f,A,ω,c,ini,rho,rho_v1,rho_v2,rho_v3,rho_e,v1,v2,p,x,y,t,u1,u2,u3,u4);
@@ -39,24 +38,24 @@ source_rho_v1 := df(rho_v1, t) + df(rho * v1^2 + p, x)
 source_rho_e  := df(rho_e, t)  + df((rho_e + p) * v1, x)
 =#
 
-
 function euler1d()
-  quote
-    ini = c + a * sin(ω * (x - t))
-    rho = ini
-    rho_v1 = ini
-    rho_e = ini^2
-
-    v1 = rho_v1 / rho
-    p = (γ - 1) * (rho_e - 1/2 * rho * v1^2)
-
-    source_rho    = df(rho, t)    + df(rho_v1, x)
-    source_rho_v1 = df(rho_v1, t) + df(rho * v1^2 + p, x)
-    source_rho_e  = df(rho_e, t)  + df((rho_e + p) * v1, x)
-  end |> rcall
+    quote
+        ini = c + a * sin(ω * (x - t))
+        rho = ini
+        rho_v1 = ini
+        rho_e = ini^2
+
+        v1 = rho_v1 / rho
+        p = (γ - 1) * (rho_e - 1 / 2 * rho * v1^2)
+
+        #! format: off
+        source_rho    = df(rho, t)    + df(rho_v1, x)
+        source_rho_v1 = df(rho_v1, t) + df(rho * v1^2 + p, x)
+        source_rho_e  = df(rho_e, t)  + df((rho_e + p) * v1, x)
+        #! format: on
+    end |> rcall
 end
 
-
 # Original Reduce code (CompressibleEulerEquations 2D)
 #=
 clear(γ,f,A,ω,c,ini,rho,rho_v1,rho_v2,rho_v3,rho_e,v1,v2,p,x,y,t,u1,u2,u3,u4);
@@ -77,27 +76,27 @@ source_rho_v2 := df(rho_v2, t) + df(rho * v1 * v2, x)    + df(rho * v2^2 + p, y)
 source_rho_e  := df(rho_e, t)  + df((rho_e + p) * v1, x) + df((rho_e + p) * v2, y);
 =#
 
-
 function euler2d()
-  quote
-    ini = c + a * sin(ω * (x + y - t))
-    rho = ini
-    rho_v1 = ini
-    rho_v2 = ini
-    rho_e = ini^2
-
-    v1 = rho_v1 / rho
-    v2 = rho_v2 / rho
-    p = (γ - 1) * (rho_e - 1/2 * rho * (v1^2 + v2^2))
-
-    source_rho    = df(rho, t)    + df(rho_v1, x)           + df(rho_v2, y)
-    source_rho_v1 = df(rho_v1, t) + df(rho * v1^2 + p, x)   + df(rho * v1 * v2, y)
-    source_rho_v2 = df(rho_v2, t) + df(rho * v1 * v2, x)    + df(rho * v2^2 + p, y)
-    source_rho_e  = df(rho_e, t)  + df((rho_e + p) * v1, x) + df((rho_e + p) * v2, y)
-  end |> rcall
+    quote
+        ini = c + a * sin(ω * (x + y - t))
+        rho = ini
+        rho_v1 = ini
+        rho_v2 = ini
+        rho_e = ini^2
+
+        v1 = rho_v1 / rho
+        v2 = rho_v2 / rho
+        p = (γ - 1) * (rho_e - 1 / 2 * rho * (v1^2 + v2^2))
+
+        #! format: off
+        source_rho    = df(rho, t)    + df(rho_v1, x)           + df(rho_v2, y)
+        source_rho_v1 = df(rho_v1, t) + df(rho * v1^2 + p, x)   + df(rho * v1 * v2, y)
+        source_rho_v2 = df(rho_v2, t) + df(rho * v1 * v2, x)    + df(rho * v2^2 + p, y)
+        source_rho_e  = df(rho_e, t)  + df((rho_e + p) * v1, x) + df((rho_e + p) * v2, y)
+        #! format: on
+    end |> rcall
 end
 
-
 # Original Reduce code (CompressibleEulerEquations 3D)
 #=
 clear(γ,f,A,ω,c,a1,a2,a3,ini,rho,rho_v1,rho_v2,rho_v3,rho_e,v1,v2,v3,p,x,y,z,t);
@@ -122,23 +121,25 @@ source_rho_e  := df(rho_e, t)  + df((rho_e + p) * v1, x) + df((rho_e + p) * v2,
 =#
 
 function euler3d()
-  quote
-    ini = c + a * sin(ω * (x + y + z - t))
-    rho = ini
-    rho_v1 = ini
-    rho_v2 = ini
-    rho_v3 = ini
-    rho_e = ini^2
-
-    v1 = rho_v1 / rho
-    v2 = rho_v2 / rho
-    v3 = rho_v3 / rho
-    p = (γ - 1) * (rho_e - 1/2 * rho * (v1^2 + v2^2 + v3^2))
-
-    source_rho    = df(rho, t)    + df(rho_v1, x)           + df(rho_v2, y)           + df(rho_v3, z)
-    source_rho_v1 = df(rho_v1, t) + df(rho * v1^2 + p, x)   + df(rho * v1 * v2, y)    + df(rho * v1 * v3, z)
-    source_rho_v2 = df(rho_v2, t) + df(rho * v1 * v2, x)    + df(rho * v2^2 + p, y)   + df(rho * v2 * v3, z)
-    source_rho_v3 = df(rho_v3, t) + df(rho * v1 * v3, x)    + df(rho * v3 * v3, y)    + df(rho * v3^2 + p, z)
-    source_rho_e  = df(rho_e, t)  + df((rho_e + p) * v1, x) + df((rho_e + p) * v2, y) + df((rho_e + p) * v3, z)
-  end |> rcall
+    quote
+        ini = c + a * sin(ω * (x + y + z - t))
+        rho = ini
+        rho_v1 = ini
+        rho_v2 = ini
+        rho_v3 = ini
+        rho_e = ini^2
+
+        v1 = rho_v1 / rho
+        v2 = rho_v2 / rho
+        v3 = rho_v3 / rho
+        p = (γ - 1) * (rho_e - 1 / 2 * rho * (v1^2 + v2^2 + v3^2))
+
+        #! format: off
+        source_rho    = df(rho, t)    + df(rho_v1, x)           + df(rho_v2, y)           + df(rho_v3, z)
+        source_rho_v1 = df(rho_v1, t) + df(rho * v1^2 + p, x)   + df(rho * v1 * v2, y)    + df(rho * v1 * v3, z)
+        source_rho_v2 = df(rho_v2, t) + df(rho * v1 * v2, x)    + df(rho * v2^2 + p, y)   + df(rho * v2 * v3, z)
+        source_rho_v3 = df(rho_v3, t) + df(rho * v1 * v3, x)    + df(rho * v3 * v3, y)    + df(rho * v3^2 + p, z)
+        source_rho_e  = df(rho_e, t)  + df((rho_e + p) * v1, x) + df((rho_e + p) * v2, y) + df((rho_e + p) * v3, z)
+        #! format: on
+    end |> rcall
 end
diff --git a/utils/julia-format.jl b/utils/julia-format.jl
deleted file mode 100755
index f53b5c0ceca..00000000000
--- a/utils/julia-format.jl
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/env julia
-
-using ArgParse: ArgParseSettings, @add_arg_table, parse_args
-using JuliaFormatter: format
-
-
-function main()
-  # Parse command line arguments
-  args = parse_commandline_arguments()
-
-  # Call formatter with our default options
-  format(args["path"],
-         overwrite = true,
-         verbose = true,
-         indent = 2,
-         margin = 100,
-         always_for_in = true)
-end
-
-
-function parse_commandline_arguments()
-  s = ArgParseSettings()
-  @add_arg_table s begin
-    "path"
-      help = ("Name of file or folder to format. If PATH is a folder, "
-              * "its contents are examined recursively and all `.jl` files are formatted.")
-      arg_type = String
-      required = true
-      nargs = '+'
-  end
-
-  return parse_args(s)
-end
-
-
-if abspath(PROGRAM_FILE) == @__FILE__
-  main()
-end
diff --git a/utils/precompile_execution_file.jl b/utils/precompile_execution_file.jl
index c7a56f1a67b..3117c2b1589 100644
--- a/utils/precompile_execution_file.jl
+++ b/utils/precompile_execution_file.jl
@@ -1,3 +1,4 @@
+#! format: off
 using Trixi
 
 trixi_include(default_example())
diff --git a/utils/trixi-format.jl b/utils/trixi-format.jl
new file mode 100755
index 00000000000..d1e7efa656a
--- /dev/null
+++ b/utils/trixi-format.jl
@@ -0,0 +1,30 @@
+#!/usr/bin/env julia
+
+using Pkg
+Pkg.activate(; temp = true, io = devnull)
+Pkg.add("JuliaFormatter"; preserve = PRESERVE_ALL, io = devnull)
+
+using JuliaFormatter: format
+
+function main()
+    # Show help
+    if "-h" in ARGS || "--help" in ARGS
+        println("usage: trixi-format.jl PATH [PATH...]")
+        println()
+        println("positional arguments:")
+        println()
+        println("    PATH        One or more paths (directories or files) to format. Default: '.'")
+        return nothing
+    end
+
+    # Set default path if none is given on command line
+    if isempty(ARGS)
+        paths = String["."]
+    else
+        paths = ARGS
+    end
+
+    return format(paths)
+end
+
+main()
diff --git a/utils/trixi2tec.jl b/utils/trixi2tec.jl
index 1c76bc27623..fc5f3e705c2 100644
--- a/utils/trixi2tec.jl
+++ b/utils/trixi2tec.jl
@@ -32,64 +32,67 @@ julia> trixi2tec(sol, "mydata_primitive.tec", solution_variables=cons2prim)
     This is an experimental feature and *not* part of the official Trixi.jl API. Specifically,
     this function may change (or even be removed) in future releases without warning.
 """
-function trixi2tec(u, semi, filename; title=basename(filename), solution_variables=cons2cons)
-  # Extract fundamental building blocks and auxiliary data
-  mesh, equations, solver, cache = Trixi.mesh_equations_solver_cache(semi)
-  @unpack node_coordinates = cache.elements
+function trixi2tec(u, semi, filename; title = basename(filename),
+                   solution_variables = cons2cons)
+    # Extract fundamental building blocks and auxiliary data
+    mesh, equations, solver, cache = Trixi.mesh_equations_solver_cache(semi)
+    @unpack node_coordinates = cache.elements
 
-  # Collect variable names and size information
-  ndims = Trixi.ndims(semi)
-  if ndims == 1
-    variables = ["x"]
-    ndofs_x = size(u, 2)
-    indices = CartesianIndices((ndofs_x,))
-    zone_info = "ZONE I=$ndofs_x, F=POINT\n"
-  elseif ndims == 2
-    variables = ["x", "y"]
-    ndofs_x = size(u, 2)
-    ndofs_y = size(u, 3)
-    indices = CartesianIndices((ndofs_x, ndofs_y))
-    zone_info = "ZONE I=$ndofs_x, J=$ndofs_y, F=POINT\n"
-  elseif ndims == 3
-    variables = ["x", "y", "z"]
-    ndofs_x = size(u, 2)
-    ndofs_y = size(u, 3)
-    ndofs_z = size(u, 4)
-    indices = CartesianIndices((ndofs_x, ndofs_y, ndofs_z))
-    zone_info = "ZONE I=$ndofs_x, J=$ndofs_y, K=$ndofs_z, F=POINT\n"
-  else
-    error("Unsupported number of dimensions (must be 1, 2, or 3)")
-  end
-  push!(variables, Trixi.varnames(solution_variables, equations)...)
-  variables_list = join(variables, "\", \"")
+    # Collect variable names and size information
+    ndims = Trixi.ndims(semi)
+    if ndims == 1
+        variables = ["x"]
+        ndofs_x = size(u, 2)
+        indices = CartesianIndices((ndofs_x,))
+        zone_info = "ZONE I=$ndofs_x, F=POINT\n"
+    elseif ndims == 2
+        variables = ["x", "y"]
+        ndofs_x = size(u, 2)
+        ndofs_y = size(u, 3)
+        indices = CartesianIndices((ndofs_x, ndofs_y))
+        zone_info = "ZONE I=$ndofs_x, J=$ndofs_y, F=POINT\n"
+    elseif ndims == 3
+        variables = ["x", "y", "z"]
+        ndofs_x = size(u, 2)
+        ndofs_y = size(u, 3)
+        ndofs_z = size(u, 4)
+        indices = CartesianIndices((ndofs_x, ndofs_y, ndofs_z))
+        zone_info = "ZONE I=$ndofs_x, J=$ndofs_y, K=$ndofs_z, F=POINT\n"
+    else
+        error("Unsupported number of dimensions (must be 1, 2, or 3)")
+    end
+    push!(variables, Trixi.varnames(solution_variables, equations)...)
+    variables_list = join(variables, "\", \"")
 
-  # Write tec file
-  open(filename, "w") do io
-    write(io, """TITLE = "$title"\n""")
-    write(io, """VARIABLES = "$variables_list"\n""")
-    for element in eachelement(solver, cache)
-      write(io, zone_info)
-      for ci in indices
-        node_coords = Trixi.get_node_coords(node_coordinates, equations, solver, ci, element)
-        node_vars = solution_variables(Trixi.get_node_vars(u, equations, solver, ci, element), equations)
-        print(io, join(node_coords, " "))
-        write(io, " ")
-        print(io, join(node_vars, " "))
-        write(io, "\n")
-      end # k, j, i
-    end # element
-  end
+    # Write tec file
+    open(filename, "w") do io
+        write(io, """TITLE = "$title"\n""")
+        write(io, """VARIABLES = "$variables_list"\n""")
+        for element in eachelement(solver, cache)
+            write(io, zone_info)
+            for ci in indices
+                node_coords = Trixi.get_node_coords(node_coordinates, equations, solver, ci,
+                                                    element)
+                node_vars = solution_variables(Trixi.get_node_vars(u, equations, solver, ci,
+                                                                   element), equations)
+                print(io, join(node_coords, " "))
+                write(io, " ")
+                print(io, join(node_vars, " "))
+                write(io, "\n")
+            end # k, j, i
+        end # element
+    end
 end
 
 # Convenience function to allow calling `trixi2tec` with the `sol` variable
 function trixi2tec(sol, filename; kwargs...)
-  semi = sol.prob.p
-  u_ode = sol.u[end]
-  trixi2tec(u_ode, semi, filename; kwargs...)
+    semi = sol.prob.p
+    u_ode = sol.u[end]
+    trixi2tec(u_ode, semi, filename; kwargs...)
 end
 
 # Convenience function to allow calling `trixi2tec` with, e.g., the initial condition
 function trixi2tec(u_ode::Vector{<:Real}, semi, filename; kwargs...)
-  u = Trixi.wrap_array_native(u_ode, semi)
-  trixi2tec(u, semi, filename; kwargs...)
+    u = Trixi.wrap_array_native(u_ode, semi)
+    trixi2tec(u, semi, filename; kwargs...)
 end
diff --git a/utils/trixi2txt.jl b/utils/trixi2txt.jl
index 1fb631c9f83..b386f150da4 100644
--- a/utils/trixi2txt.jl
+++ b/utils/trixi2txt.jl
@@ -35,323 +35,324 @@ include("../src/solvers/dgsem/basis_lobatto_legendre.jl")
 include("../src/solvers/dgsem/interpolation.jl")
 
 function trixi2txt(filename::AbstractString...;
-                   variables=[], output_directory=".", nvisnodes=nothing, max_supported_level=11)
-  # Convert filenames to a single list of strings
-  if isempty(filename)
-    error("no input file was provided")
-  end
-  filenames = String[]
-  for pattern in filename
-    append!(filenames, glob(pattern))
-  end
-
-  # Iterate over input files
-  for (index, filename) in enumerate(filenames)
-    # Check if data file exists
-    if !isfile(filename)
-      error("file '$filename' does not exist")
+                   variables = [], output_directory = ".", nvisnodes = nothing,
+                   max_supported_level = 11)
+    # Convert filenames to a single list of strings
+    if isempty(filename)
+        error("no input file was provided")
     end
-
-    # Make sure it is a data file
-    if !is_solution_restart_file(filename)
-      error("file '$filename' is not a data file")
+    filenames = String[]
+    for pattern in filename
+        append!(filenames, glob(pattern))
     end
 
-    # Get mesh file name
-    meshfile = extract_mesh_filename(filename)
+    # Iterate over input files
+    for (index, filename) in enumerate(filenames)
+        # Check if data file exists
+        if !isfile(filename)
+            error("file '$filename' does not exist")
+        end
 
-    # Check if mesh file exists
-    if !isfile(meshfile)
-      error("mesh file '$meshfile' does not exist")
-    end
+        # Make sure it is a data file
+        if !is_solution_restart_file(filename)
+            error("file '$filename' is not a data file")
+        end
 
-    # Read mesh
-    center_level_0, length_level_0, leaf_cells, coordinates, levels = read_meshfile(meshfile)
+        # Get mesh file name
+        meshfile = extract_mesh_filename(filename)
 
-    # Read data
-    labels, data, n_elements, n_nodes, element_variables, time = read_datafile(filename)
+        # Check if mesh file exists
+        if !isfile(meshfile)
+            error("mesh file '$meshfile' does not exist")
+        end
 
-    # Check if dimensions match
-    if length(leaf_cells) != n_elements
-      error("number of elements in '$(filename)' do not match number of leaf cells in " *
-            "'$(meshfile)' " *
-            "(did you forget to clean your 'out/' directory between different runs?)")
-    end
+        # Read mesh
+        center_level_0, length_level_0, leaf_cells, coordinates, levels = read_meshfile(meshfile)
 
-    # Determine resolution for data interpolation
-    max_level = maximum(levels)
-    if max_level > max_supported_level
-      error("Maximum refinement level in data file $max_level is higher than " *
-            "maximum supported level $max_supported_level")
-    end
-    max_available_nodes_per_finest_element = 2^(max_supported_level - max_level)
-    if nvisnodes == nothing
-      max_nvisnodes = 2 * n_nodes
-    elseif nvisnodes == 0
-      max_nvisnodes = n_nodes
-    else
-      max_nvisnodes = nvisnodes
-    end
-    nvisnodes_at_max_level = min(max_available_nodes_per_finest_element, max_nvisnodes)
-    resolution = nvisnodes_at_max_level * 2^max_level
-    nvisnodes_per_level = [2^(max_level - level)*nvisnodes_at_max_level for level in 0:max_level]
-
-    # Interpolate data
-    structured_data = unstructured2structured(data, levels, resolution, nvisnodes_per_level)
-
-    # Interpolate cell-centered values to node-centered values
-    node_centered_data = cell2node(structured_data)
-
-    # Determine x coordinates
-    xs = collect(range(-1, 1, length=resolution+1)) .* length_level_0/2 .+ center_level_0[1]
-
-    # Check that all variables exist in data file
-    if isempty(variables)
-      append!(variables, labels)
-    else
-      for var in variables
-        if !(var in labels)
-          error("variable '$var' does not exist in the data file $filename")
+        # Read data
+        labels, data, n_elements, n_nodes, element_variables, time = read_datafile(filename)
+
+        # Check if dimensions match
+        if length(leaf_cells) != n_elements
+            error("number of elements in '$(filename)' do not match number of leaf cells in " *
+                  "'$(meshfile)' " *
+                  "(did you forget to clean your 'out/' directory between different runs?)")
+        end
+
+        # Determine resolution for data interpolation
+        max_level = maximum(levels)
+        if max_level > max_supported_level
+            error("Maximum refinement level in data file $max_level is higher than " *
+                  "maximum supported level $max_supported_level")
+        end
+        max_available_nodes_per_finest_element = 2^(max_supported_level - max_level)
+        if nvisnodes == nothing
+            max_nvisnodes = 2 * n_nodes
+        elseif nvisnodes == 0
+            max_nvisnodes = n_nodes
+        else
+            max_nvisnodes = nvisnodes
+        end
+        nvisnodes_at_max_level = min(max_available_nodes_per_finest_element, max_nvisnodes)
+        resolution = nvisnodes_at_max_level * 2^max_level
+        nvisnodes_per_level = [2^(max_level - level) * nvisnodes_at_max_level
+                               for level in 0:max_level]
+
+        # Interpolate data
+        structured_data = unstructured2structured(data, levels, resolution,
+                                                  nvisnodes_per_level)
+
+        # Interpolate cell-centered values to node-centered values
+        node_centered_data = cell2node(structured_data)
+
+        # Determine x coordinates
+        xs = collect(range(-1, 1, length = resolution + 1)) .* length_level_0 / 2 .+
+             center_level_0[1]
+
+        # Check that all variables exist in data file
+        if isempty(variables)
+            append!(variables, labels)
+        else
+            for var in variables
+                if !(var in labels)
+                    error("variable '$var' does not exist in the data file $filename")
+                end
+            end
         end
-      end
-    end
 
-    # Create output directory if it does not exist
-    mkpath(output_directory)
-
-    # Determine output file name
-    base, _ = splitext(splitdir(filename)[2])
-    output_filename = joinpath(output_directory, "$(base).txt")
-
-    # Write to file
-    open(output_filename, "w") do io
-      # Header
-      print(io, "x             ")
-      for label in variables
-        @printf(io, "  %-14s", label)
-      end
-      println(io)
-
-      # Data
-      for idx in 1:length(xs)
-        @printf(io, "%+10.8e", xs[idx])
-        for variable_id in 1:length(variables)
-          @printf(io, " %+10.8e ", node_centered_data[idx, variable_id])
+        # Create output directory if it does not exist
+        mkpath(output_directory)
+
+        # Determine output file name
+        base, _ = splitext(splitdir(filename)[2])
+        output_filename = joinpath(output_directory, "$(base).txt")
+
+        # Write to file
+        open(output_filename, "w") do io
+            # Header
+            print(io, "x             ")
+            for label in variables
+                @printf(io, "  %-14s", label)
+            end
+            println(io)
+
+            # Data
+            for idx in 1:length(xs)
+                @printf(io, "%+10.8e", xs[idx])
+                for variable_id in 1:length(variables)
+                    @printf(io, " %+10.8e ", node_centered_data[idx, variable_id])
+                end
+                println(io)
+            end
         end
-        println(io)
-      end
     end
-  end
 end
 
-
 # Check if file is a data file
 function is_solution_restart_file(filename::String)
-  # Open file for reading
-  h5open(filename, "r") do file
-    # If attribute "mesh_file" exists, this must be a data file
-    return haskey(attributes(file), "mesh_file")
-  end
+    # Open file for reading
+    h5open(filename, "r") do file
+        # If attribute "mesh_file" exists, this must be a data file
+        return haskey(attributes(file), "mesh_file")
+    end
 end
 
-
 # Use data file to extract mesh filename from attributes
 function extract_mesh_filename(filename::String)
-  # Open file for reading
-  h5open(filename, "r") do file
-    # Extract filename relative to data file
-    mesh_file = read(attributes(file)["mesh_file"])
+    # Open file for reading
+    h5open(filename, "r") do file
+        # Extract filename relative to data file
+        mesh_file = read(attributes(file)["mesh_file"])
 
-    return joinpath(dirname(filename), mesh_file)
-  end
+        return joinpath(dirname(filename), mesh_file)
+    end
 end
 
-
 # Read in mesh file and return relevant data
 function read_meshfile(filename::String)
-  # Open file for reading
-  h5open(filename, "r") do file
-    # Check dimension - only 1D supported
-    if haskey(attributes(file), "ndims")
-      ndims_ = read(attributes(file)["ndims"])
-    else
-      ndims_ = read(attributes(file)["ndim"]) # FIXME once Trixi.jl's 3D branch is merged & released
-    end
-    if ndims_ != 1
-      error("currently only 1D files can be processed, but '$filename' is $(ndims_)D")
-    end
+    # Open file for reading
+    h5open(filename, "r") do file
+        # Check dimension - only 1D supported
+        if haskey(attributes(file), "ndims")
+            ndims_ = read(attributes(file)["ndims"])
+        else
+            ndims_ = read(attributes(file)["ndim"]) # FIXME once Trixi.jl's 3D branch is merged & released
+        end
+        if ndims_ != 1
+            error("currently only 1D files can be processed, but '$filename' is $(ndims_)D")
+        end
 
-    # Extract basic information
-    n_cells = read(attributes(file)["n_cells"])
-    n_leaf_cells = read(attributes(file)["n_leaf_cells"])
-    center_level_0 = read(attributes(file)["center_level_0"])
-    length_level_0 = read(attributes(file)["length_level_0"])
-
-    # Extract coordinates, levels, child cells
-    coordinates = Array{Float64}(undef, ndims_, n_cells)
-    coordinates .= read(file["coordinates"])
-    levels = Array{Int}(undef, n_cells)
-    levels .= read(file["levels"])
-    child_ids = Array{Int}(undef, 2^ndims_, n_cells)
-    child_ids .= read(file["child_ids"])
-
-    # Extract leaf cells (= cells to be plotted) and contract all other arrays accordingly
-    leaf_cells = similar(levels)
-    n_cells = 0
-    for cell_id in 1:length(levels)
-      if sum(child_ids[:, cell_id]) > 0
-        continue
-      end
-
-      n_cells += 1
-      leaf_cells[n_cells] = cell_id
-    end
-    leaf_cells = leaf_cells[1:n_cells]
+        # Extract basic information
+        n_cells = read(attributes(file)["n_cells"])
+        n_leaf_cells = read(attributes(file)["n_leaf_cells"])
+        center_level_0 = read(attributes(file)["center_level_0"])
+        length_level_0 = read(attributes(file)["length_level_0"])
+
+        # Extract coordinates, levels, child cells
+        coordinates = Array{Float64}(undef, ndims_, n_cells)
+        coordinates .= read(file["coordinates"])
+        levels = Array{Int}(undef, n_cells)
+        levels .= read(file["levels"])
+        child_ids = Array{Int}(undef, 2^ndims_, n_cells)
+        child_ids .= read(file["child_ids"])
+
+        # Extract leaf cells (= cells to be plotted) and contract all other arrays accordingly
+        leaf_cells = similar(levels)
+        n_cells = 0
+        for cell_id in 1:length(levels)
+            if sum(child_ids[:, cell_id]) > 0
+                continue
+            end
+
+            n_cells += 1
+            leaf_cells[n_cells] = cell_id
+        end
+        leaf_cells = leaf_cells[1:n_cells]
 
-    coordinates = coordinates[:, leaf_cells]
-    levels = levels[leaf_cells]
+        coordinates = coordinates[:, leaf_cells]
+        levels = levels[leaf_cells]
 
-    return center_level_0, length_level_0, leaf_cells, coordinates, levels
-  end
+        return center_level_0, length_level_0, leaf_cells, coordinates, levels
+    end
 end
 
-
 # Read in data file and return all relevant information
 function read_datafile(filename::String)
-  # Open file for reading
-  h5open(filename, "r") do file
-    # Extract basic information
-    if haskey(attributes(file), "ndims")
-      ndims_ = read(attributes(file)["ndims"])
-    else
-      ndims_ = read(attributes(file)["ndim"])
-    end
-    if haskey(attributes(file), "polydeg")
-      polydeg = read(attributes(file)["polydeg"])
-    else
-      polydeg = read(attributes(file)["N"])
-    end
-    n_elements = read(attributes(file)["n_elements"])
-    n_variables = read(attributes(file)["n_vars"])
-    time = read(attributes(file)["time"])
-
-    # Extract labels for legend
-    labels = Array{String}(undef, 1, n_variables)
-    for v = 1:n_variables
-      labels[1, v] = read(attributes(file["variables_$v"])["name"])
-    end
+    # Open file for reading
+    h5open(filename, "r") do file
+        # Extract basic information
+        if haskey(attributes(file), "ndims")
+            ndims_ = read(attributes(file)["ndims"])
+        else
+            ndims_ = read(attributes(file)["ndim"])
+        end
+        if haskey(attributes(file), "polydeg")
+            polydeg = read(attributes(file)["polydeg"])
+        else
+            polydeg = read(attributes(file)["N"])
+        end
+        n_elements = read(attributes(file)["n_elements"])
+        n_variables = read(attributes(file)["n_vars"])
+        time = read(attributes(file)["time"])
+
+        # Extract labels for legend
+        labels = Array{String}(undef, 1, n_variables)
+        for v in 1:n_variables
+            labels[1, v] = read(attributes(file["variables_$v"])["name"])
+        end
 
-    # Extract data arrays
-    n_nodes = polydeg + 1
-
-    if ndims_ == 1
-      data = Array{Float64}(undef, n_nodes, n_elements, n_variables)
-      for v = 1:n_variables
-        vardata = read(file["variables_$v"])
-        @views data[:, :, v][:] .= vardata
-      end
-    else
-      error("Unsupported number of spatial dimensions: ", ndims_)
-    end
+        # Extract data arrays
+        n_nodes = polydeg + 1
+
+        if ndims_ == 1
+            data = Array{Float64}(undef, n_nodes, n_elements, n_variables)
+            for v in 1:n_variables
+                vardata = read(file["variables_$v"])
+                @views data[:, :, v][:] .= vardata
+            end
+        else
+            error("Unsupported number of spatial dimensions: ", ndims_)
+        end
 
-    # Extract element variable arrays
-    element_variables = Dict{String, Union{Vector{Float64}, Vector{Int}}}()
-    index = 1
-    while haskey(file, "element_variables_$index")
-      varname = read(attributes(file["element_variables_$index"])["name"])
-      element_variables[varname] = read(file["element_variables_$index"])
-      index +=1
-    end
+        # Extract element variable arrays
+        element_variables = Dict{String, Union{Vector{Float64}, Vector{Int}}}()
+        index = 1
+        while haskey(file, "element_variables_$index")
+            varname = read(attributes(file["element_variables_$index"])["name"])
+            element_variables[varname] = read(file["element_variables_$index"])
+            index += 1
+        end
 
-    return labels, data, n_elements, n_nodes, element_variables, time
-  end
+        return labels, data, n_elements, n_nodes, element_variables, time
+    end
 end
 
-
 # Interpolate unstructured DG data to structured data (cell-centered)
 function unstructured2structured(unstructured_data::AbstractArray{Float64},
                                  levels::AbstractArray{Int}, resolution::Int,
                                  nvisnodes_per_level::AbstractArray{Int})
-  # Extract data shape information
-  n_nodes_in, n_elements, n_variables = size(unstructured_data)
-
-  # Get node coordinates for DG locations on reference element
-  nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in)
-
-  # Calculate interpolation vandermonde matrices for each level
-  max_level = length(nvisnodes_per_level) - 1
-  vandermonde_per_level = []
-  for l in 0:max_level
-    n_nodes_out = nvisnodes_per_level[l + 1]
-    dx = 2 / n_nodes_out
-    nodes_out = collect(range(-1 + dx/2, 1 - dx/2, length=n_nodes_out))
-    push!(vandermonde_per_level, polynomial_interpolation_matrix(nodes_in, nodes_out))
-  end
-
-  # Create output data structure
-  structured = Array{Float64}(undef, resolution, n_variables)
-
-  # For each variable, interpolate element data and store to global data structure
-  for v in 1:n_variables
-    first = 1
-
-    # Reshape data array for use in interpolate_nodes function
-    @views reshaped_data = reshape(unstructured_data[:, :, v], 1, n_nodes_in, n_elements)
-
-    for element_id in 1:n_elements
-      # Extract level for convenience
-      level = levels[element_id]
-
-      # Determine target indices
-      n_nodes_out = nvisnodes_per_level[level + 1]
-      last = first + (n_nodes_out - 1)
-
-      # Interpolate data
-      vandermonde = vandermonde_per_level[level + 1]
-      @views structured[first:last, v] .= (
-           reshape(multiply_dimensionwise_naive(reshaped_data[:, :, element_id], vandermonde),
-                   n_nodes_out))
-
-      # Update first index for next iteration
-      first += n_nodes_out
+    # Extract data shape information
+    n_nodes_in, n_elements, n_variables = size(unstructured_data)
+
+    # Get node coordinates for DG locations on reference element
+    nodes_in, _ = gauss_lobatto_nodes_weights(n_nodes_in)
+
+    # Calculate interpolation vandermonde matrices for each level
+    max_level = length(nvisnodes_per_level) - 1
+    vandermonde_per_level = []
+    for l in 0:max_level
+        n_nodes_out = nvisnodes_per_level[l + 1]
+        dx = 2 / n_nodes_out
+        nodes_out = collect(range(-1 + dx / 2, 1 - dx / 2, length = n_nodes_out))
+        push!(vandermonde_per_level, polynomial_interpolation_matrix(nodes_in, nodes_out))
     end
-  end
 
-  return structured
-end
+    # Create output data structure
+    structured = Array{Float64}(undef, resolution, n_variables)
+
+    # For each variable, interpolate element data and store to global data structure
+    for v in 1:n_variables
+        first = 1
+
+        # Reshape data array for use in interpolate_nodes function
+        @views reshaped_data = reshape(unstructured_data[:, :, v], 1, n_nodes_in,
+                                       n_elements)
+
+        for element_id in 1:n_elements
+            # Extract level for convenience
+            level = levels[element_id]
+
+            # Determine target indices
+            n_nodes_out = nvisnodes_per_level[level + 1]
+            last = first + (n_nodes_out - 1)
+
+            # Interpolate data
+            vandermonde = vandermonde_per_level[level + 1]
+            @views structured[first:last, v] .= (reshape(multiply_dimensionwise_naive(reshaped_data[:,
+                                                                                                    :,
+                                                                                                    element_id],
+                                                                                      vandermonde),
+                                                         n_nodes_out))
 
+            # Update first index for next iteration
+            first += n_nodes_out
+        end
+    end
+
+    return structured
+end
 
 # Convert cell-centered values to node-centered values by averaging over all
 # four neighbors and making use of the periodicity of the solution
 function cell2node(cell_centered_data::AbstractArray{Float64})
-  # Create temporary data structure to make the averaging algorithm as simple
-  # as possible (by using a ghost layer)
-  tmp = similar(cell_centered_data, size(cell_centered_data) .+ (2, 0))
-
-  # Fill center with original data
-  tmp[2:end-1, :] .= cell_centered_data
-
-  # # Fill sides with opposite data (periodic domain)
-  # # x-direction
-  # tmp[1,   :] .= cell_centered_data[end, :]
-  # tmp[end, :] .= cell_centered_data[1,   :]
-
-  # Fill sides with duplicate information
-  # x-direction
-  tmp[1,   :] .= cell_centered_data[1,   :]
-  tmp[end, :] .= cell_centered_data[end, :]
-
-  # Create output data structure
-  resolution_in, n_variables = size(cell_centered_data)
-  resolution_out = resolution_in + 1
-  node_centered_data = Array{Float64}(undef, resolution_out, n_variables)
-
-  # Obtain node-centered value by averaging over neighboring cell-centered values
-  for i in 1:resolution_out
-    node_centered_data[i, :] = (tmp[i, :] + tmp[i+1, :]) / 2
-  end
-
-  return node_centered_data
+    # Create temporary data structure to make the averaging algorithm as simple
+    # as possible (by using a ghost layer)
+    tmp = similar(cell_centered_data, size(cell_centered_data) .+ (2, 0))
+
+    # Fill center with original data
+    tmp[2:(end - 1), :] .= cell_centered_data
+
+    # # Fill sides with opposite data (periodic domain)
+    # # x-direction
+    # tmp[1,   :] .= cell_centered_data[end, :]
+    # tmp[end, :] .= cell_centered_data[1,   :]
+
+    # Fill sides with duplicate information
+    # x-direction
+    tmp[1, :] .= cell_centered_data[1, :]
+    tmp[end, :] .= cell_centered_data[end, :]
+
+    # Create output data structure
+    resolution_in, n_variables = size(cell_centered_data)
+    resolution_out = resolution_in + 1
+    node_centered_data = Array{Float64}(undef, resolution_out, n_variables)
+
+    # Obtain node-centered value by averaging over neighboring cell-centered values
+    for i in 1:resolution_out
+        node_centered_data[i, :] = (tmp[i, :] + tmp[i + 1, :]) / 2
+    end
+
+    return node_centered_data
 end
 
 end

From 95518c5670774dfccfd66db2fc0df15ec91b251f Mon Sep 17 00:00:00 2001
From: Simon Candelaresi <10759273+SimonCan@users.noreply.github.com>
Date: Fri, 16 Jun 2023 20:35:24 +0100
Subject: [PATCH 050/163] Initial support for surface coupling of two systems
 (#1452)

* Corrected bugs from coupled to main merger.

* Added polytropic equation.

* Added further polytropic equations and examples.

* Added coupling equations.

* Added coupling equation between Euler.

* Commented debugging bits, like infiltrator.

* Add missing `using`

* Fix `destats` deprecation warning

* Added coupled elixir.

* Removed commented testing code.

* Added other_list to the coupled semi discretisation elixir.

* Removed flux coupling equation.

* Removed surface coupling equation for polytropic Euler.

* Removed polytropic Euler equation.

* Removed any code related to BoundaryConditionCoupledAB.

* Removed flux coupling code.

* Removed numerical fluxes for BoundaryConditionCoupledAB.

* Removed surface fluxes for BoundaryConditionCoupledAB.

* Removed coupled elixir.

* Remove Coupled StructuredMesh from visualization test.

* Remove duplicate function definitions

* make advection elixir go further

* Removed initial_condition_peak.

* Removed src/equations/hyperbolic_diffusion_2d.jl.

* Removed 3d coupling.

* Remopved 3d capability.

* Removed 3d capability.

* Removed 3d plotting of coupled data.

* Remove extra dependencies

* Remove whitespace changes

* Fix type instability

* Some temporary fixes.

* Fix type in semidiscretization

* Removed analysis_callback for simple coupled elixir.

* Removed analysis callbacks for the coupled case.

* Removed AnalysysCallback for coupled elixir.

* Removed polytropic coupling elixir.

* Update summary output

* Update src/solvers/dgsem_structured/dg_2d.jl

* Format summary

* Fix save solution callback

* Remove unused code

* Move timeit call before dispatch on semi

* Avoid copy on calculcate_dt

* Avoid copy on save_solution_file

* Remove unnnecessary override of wrap_array

* Undo changes to analysis callback

* Remove equations_list

* Remove unused functions

* nmeshes -> nsystems

* Further cleanup

* Move BoundaryConditionCoupled to the correct location

* Visualize setup

* Formatting improvmenets

* Change 1:nsystems(semi) to eachsystem(semi)

* Remove redundant ndofs(...) function

* copy_to_coupled_boundary --> copy_to_coupled_boundary!

* Move all SemidiscretizationCoupled-specific code to semi/semi_coupled.jl

* Use uEltype for BCCoupled

* Add comment

* I --> Indices

* Add comment

* Remove Base.summary for SemiCoupled since it appears to be unused

* Add parens

* Int64 -> Int

* Add xref for Documenter.jl

* Fixup comment

* Remove unused `total_volume`

* Remove obsolete comment

* summary_semi --> print_summary_semi for clarity

* Make SemiCoupled ctor more convenient

* Fix docstring

* Add description to elixir

* Rename elixir

* Remove unused kwarg

* Fix argument order and simplify interface for IO functions

* Explicitly return nothing in functions that should do - nothing

* Update comment

* Add AnalysisCallback to coupled semidiscretization (#1505)

* Add AnalysisCallback to coupled semidiscretization

* First non-crashing version of the AnalysisCallbackCoupled

* Added comment to offending line in the analysis callback.

* Fix stupid bug

* Rename variable for easier testing

* Clean up code

* Remove type instability

* Prettify output

* Add test

* Enable `convergence_test` for SemidiscretizationCoupled

* Increased the frequency of the solution write out for a more usable animation.

* Reverted analysis intervals.

* Updated the l2 and linf errors for the elixir_advection_basic_coupled.jl test
to reflect the increased simulation time.

* Corrected bracket typo in structured_2d test.

* Renamed plural variable names to lists.

* Further renaiming plural variable names.

* Added convergence_test for elixir_advection_basic_coupled.

* Fix coverage for convergence_test

* Add test for analysis_callback(sol)

* Split timers between systems

* fully specialize on rhs and parameters when constructing an ODEProblem

* switch example back to OrdinaryDiffEq.jl

* Reverted coupled example to use Trixi.solve instead of OrdinaryDiffEq solve.
This should fix issues with LoadError: Failed to precompile OrdinaryDiffEq
in the thread_legacy test.

* Changed Julia version in project toml to 1.9
to fix OrdinaryDiffEq issues on the github test.

* Change 1:nsystems(semi) to eachsystem(semi)

* Use `get_system_u_ode`

* Move all SemidiscretizationCoupled-specific code to semi/semi_coupled.jl

* Changed file name name of elixir_advection_basic_coupled.jl to elixir_advection_coupled.jl.

* Reverted Julia version to 1.8 in Project toml file.

* Apply suggestions from code review

* -use type SciMLBase.FullSpecialize instead of instance

* Use get_system_u_ode instead of manual view

* Reorder elixir ingredients

* Make comment reflect code again

* Use solve from OrdinaryDiffEq

* Use more precise type for array

* Test EOCs for each system separately

* Allow test to run for the full duration

---------

Co-authored-by: SimonCan <simon.candelaresi@gmail.com>
Co-authored-by: Hendrik Ranocha <mail@ranocha.de>

* Remove unused `total_volume(...)`

* Make `save_solution_file` work for SemiEulerGravity again (and make it multi-system aware)

* Update src/semidiscretization/semidiscretization_euler_gravity.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Apply formatting

---------

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
Co-authored-by: Hendrik Ranocha <mail@ranocha.de>
Co-authored-by: Benjamin Bolm <74359358+bennibolm@users.noreply.github.com>
Co-authored-by: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com>
Co-authored-by: Lucas Gemein <74359570+NichtLucas@users.noreply.github.com>
---
 .../elixir_advection_coupled.jl               | 117 ++++
 src/Trixi.jl                                  |   8 +-
 src/auxiliary/special_elixirs.jl              |  16 +-
 src/callbacks_step/analysis.jl                |  30 +-
 src/callbacks_step/save_solution.jl           |  91 +--
 src/callbacks_step/stepsize.jl                |  19 +-
 src/callbacks_step/summary.jl                 |  22 +-
 src/meshes/mesh_io.jl                         |   8 +-
 src/semidiscretization/semidiscretization.jl  |   6 +-
 .../semidiscretization_coupled.jl             | 610 ++++++++++++++++++
 .../semidiscretization_euler_gravity.jl       |  17 +-
 test/test_special_elixirs.jl                  |   7 +
 test/test_structured_2d.jl                    |  13 +
 13 files changed, 890 insertions(+), 74 deletions(-)
 create mode 100644 examples/structured_2d_dgsem/elixir_advection_coupled.jl
 create mode 100644 src/semidiscretization/semidiscretization_coupled.jl

diff --git a/examples/structured_2d_dgsem/elixir_advection_coupled.jl b/examples/structured_2d_dgsem/elixir_advection_coupled.jl
new file mode 100644
index 00000000000..1e54e411db6
--- /dev/null
+++ b/examples/structured_2d_dgsem/elixir_advection_coupled.jl
@@ -0,0 +1,117 @@
+using OrdinaryDiffEq
+using Trixi
+
+
+###############################################################################
+# Coupled semidiscretization of two linear advection systems, which are connected periodically
+#
+# In this elixir, we have a square domain that is divided into a left half and a right half. On each
+# half of the domain, a completely independent SemidiscretizationHyperbolic is created for the
+# linear advection equations. The two systems are coupled in the x-direction and have periodic
+# boundaries in the y-direction. For a high-level overview, see also the figure below:
+#
+# (-1,  1)                                   ( 1,  1)
+#     ┌────────────────────┬────────────────────┐
+#     │    ↑ periodic ↑    │    ↑ periodic ↑    │
+#     │                    │                    │
+#     │                    │                    │
+#     │     =========      │     =========      │
+#     │     system #1      │     system #2      │
+#     │     =========      │     =========      │
+#     │                    │                    │
+#     │                    │                    │
+#     │                    │                    │
+#     │                    │                    │
+#     │         coupled -->│<-- coupled         │
+#     │                    │                    │
+#     │<-- coupled         │         coupled -->│
+#     │                    │                    │
+#     │                    │                    │
+#     │    ↓ periodic ↓    │    ↓ periodic ↓    │
+#     └────────────────────┴────────────────────┘
+# (-1, -1)                                   ( 1, -1)
+
+advection_velocity = (0.2, -0.7)
+equations = LinearScalarAdvectionEquation2D(advection_velocity)
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs)
+
+# First mesh is the left half of a [-1,1]^2 square
+coordinates_min1 = (-1.0, -1.0) # minimum coordinates (min(x), min(y))
+coordinates_max1 = ( 0.0,  1.0) # maximum coordinates (max(x), max(y))
+
+# Define identical resolution as a variable such that it is easier to change from `trixi_include`
+cells_per_dimension = (8, 16)
+
+cells_per_dimension1 = cells_per_dimension
+
+mesh1 = StructuredMesh(cells_per_dimension1, coordinates_min1, coordinates_max1)
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi1 = SemidiscretizationHyperbolic(mesh1, equations, initial_condition_convergence_test, solver,
+                                     boundary_conditions=(
+                                       # Connect left boundary with right boundary of right mesh
+                                       x_neg=BoundaryConditionCoupled(2, (:end, :i_forward), Float64),
+                                       # Connect right boundary with left boundary of right mesh
+                                       x_pos=BoundaryConditionCoupled(2, (:begin, :i_forward),  Float64),
+                                       y_neg=boundary_condition_periodic,
+                                       y_pos=boundary_condition_periodic))
+
+
+# Second mesh is the right half of a [-1,1]^2 square
+coordinates_min2 = (0.0, -1.0) # minimum coordinates (min(x), min(y))
+coordinates_max2 = (1.0,  1.0) # maximum coordinates (max(x), max(y))
+
+cells_per_dimension2 = cells_per_dimension
+
+mesh2 = StructuredMesh(cells_per_dimension2, coordinates_min2, coordinates_max2)
+
+semi2 = SemidiscretizationHyperbolic(mesh2, equations, initial_condition_convergence_test, solver,
+                                     boundary_conditions=(
+                                       # Connect left boundary with right boundary of left mesh
+                                       x_neg=BoundaryConditionCoupled(1, (:end, :i_forward), Float64),
+                                       # Connect right boundary with left boundary of left mesh
+                                       x_pos=BoundaryConditionCoupled(1, (:begin, :i_forward),  Float64),
+                                       y_neg=boundary_condition_periodic,
+                                       y_pos=boundary_condition_periodic))
+
+# Create a semidiscretization that bundles semi1 and semi2
+semi = SemidiscretizationCoupled(semi1, semi2)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 2.0
+ode = semidiscretize(semi, (0.0, 2.0));
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_callback1 = AnalysisCallback(semi1, interval=100)
+analysis_callback2 = AnalysisCallback(semi2, interval=100)
+analysis_callback = AnalysisCallbackCoupled(semi, analysis_callback1, analysis_callback2)
+
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval=100,
+                                     solution_variables=cons2prim)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl=1.6)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, stepsize_callback)
+
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep=false, callback=callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/src/Trixi.jl b/src/Trixi.jl
index d5579aeea33..86e349c7dad 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -117,6 +117,7 @@ include("semidiscretization/semidiscretization.jl")
 include("semidiscretization/semidiscretization_hyperbolic.jl")
 include("semidiscretization/semidiscretization_hyperbolic_parabolic.jl")
 include("semidiscretization/semidiscretization_euler_acoustics.jl")
+include("semidiscretization/semidiscretization_coupled.jl")
 include("callbacks_step/callbacks_step.jl")
 include("callbacks_stage/callbacks_stage.jl")
 include("semidiscretization/semidiscretization_euler_gravity.jl")
@@ -184,7 +185,8 @@ export boundary_condition_do_nothing,
        boundary_condition_noslip_wall,
        boundary_condition_slip_wall,
        boundary_condition_wall,
-       BoundaryConditionNavierStokesWall, NoSlip, Adiabatic, Isothermal
+       BoundaryConditionNavierStokesWall, NoSlip, Adiabatic, Isothermal,
+       BoundaryConditionCoupled
 
 export initial_condition_convergence_test, source_terms_convergence_test
 export source_terms_harmonic
@@ -229,12 +231,14 @@ export SemidiscretizationEulerAcoustics
 export SemidiscretizationEulerGravity, ParametersEulerGravity,
        timestep_gravity_erk52_3Sstar!, timestep_gravity_carpenter_kennedy_erk54_2N!
 
+export SemidiscretizationCoupled
+
 export SummaryCallback, SteadyStateCallback, AnalysisCallback, AliveCallback,
        SaveRestartCallback, SaveSolutionCallback, TimeSeriesCallback, VisualizationCallback,
        AveragingCallback,
        AMRCallback, StepsizeCallback,
        GlmSpeedCallback, LBMCollisionCallback, EulerAcousticsCouplingCallback,
-       TrivialCallback
+       TrivialCallback, AnalysisCallbackCoupled
 
 export load_mesh, load_time
 
diff --git a/src/auxiliary/special_elixirs.jl b/src/auxiliary/special_elixirs.jl
index da73b42e572..25bca8939ce 100644
--- a/src/auxiliary/special_elixirs.jl
+++ b/src/auxiliary/special_elixirs.jl
@@ -85,9 +85,21 @@ function convergence_test(mod::Module, elixir::AbstractString, iterations; kwarg
         println("#"^100)
     end
 
-    # number of variables
-    _, equations, _, _ = mesh_equations_solver_cache(mod.semi)
+    # Use raw error values to compute EOC
+    analyze_convergence(errors, iterations, mod.semi)
+end
+
+# Analyze convergence for any semidiscretization
+# Note: this intermediate method is to allow dispatching on the semidiscretization
+function analyze_convergence(errors, iterations, semi::AbstractSemidiscretization)
+    _, equations, _, _ = mesh_equations_solver_cache(semi)
     variablenames = varnames(cons2cons, equations)
+    analyze_convergence(errors, iterations, variablenames)
+end
+
+# This method is called with the collected error values to actually compute and print the EOC
+function analyze_convergence(errors, iterations,
+                             variablenames::Union{Tuple, AbstractArray})
     nvariables = length(variablenames)
 
     # Reshape errors to get a matrix where the i-th row represents the i-th iteration
diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index 2e038401df7..7fa2e21a244 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -84,11 +84,13 @@ function Base.show(io::IO, ::MIME"text/plain",
     end
 end
 
+# This is the convenience constructor that gets called from the elixirs
 function AnalysisCallback(semi::AbstractSemidiscretization; kwargs...)
     mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
     AnalysisCallback(mesh, equations, solver, cache; kwargs...)
 end
 
+# This is the actual constructor
 function AnalysisCallback(mesh, equations::AbstractEquations, solver, cache;
                           interval = 0,
                           save_analysis = false,
@@ -132,9 +134,18 @@ function AnalysisCallback(mesh, equations::AbstractEquations, solver, cache;
                      initialize = initialize!)
 end
 
+# This method gets called from OrdinaryDiffEq's `solve(...)`
 function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, t,
                      integrator) where {Condition, Affect! <: AnalysisCallback}
     semi = integrator.p
+    du_ode = first(get_tmp_cache(integrator))
+    initialize!(cb, u_ode, du_ode, t, integrator, semi)
+end
+
+# This is the actual initialization method
+# Note: we have this indirection to allow initializing a callback from the AnalysisCallbackCoupled
+function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, du_ode, t,
+                     integrator, semi) where {Condition, Affect! <: AnalysisCallback}
     initial_state_integrals = integrate(u_ode, semi)
     _, equations, _, _ = mesh_equations_solver_cache(semi)
 
@@ -202,13 +213,21 @@ function initialize!(cb::DiscreteCallback{Condition, Affect!}, u_ode, t,
     # Note: For details see the actual callback function below
     analysis_callback.start_gc_time = Base.gc_time_ns()
 
-    analysis_callback(integrator)
+    analysis_callback(u_ode, du_ode, integrator, semi)
     return nothing
 end
 
-# TODO: Taal refactor, allow passing an IO object (which could be devnull to avoid cluttering the console)
+# This method gets called from OrdinaryDiffEq's `solve(...)`
 function (analysis_callback::AnalysisCallback)(integrator)
     semi = integrator.p
+    du_ode = first(get_tmp_cache(integrator))
+    u_ode = integrator.u
+    analysis_callback(u_ode, du_ode, integrator, semi)
+end
+
+# This method gets called internally as the main entry point to the AnalysiCallback
+# TODO: Taal refactor, allow passing an IO object (which could be devnull to avoid cluttering the console)
+function (analysis_callback::AnalysisCallback)(u_ode, du_ode, integrator, semi)
     mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
     @unpack dt, t = integrator
     iter = integrator.stats.naccept
@@ -300,15 +319,14 @@ function (analysis_callback::AnalysisCallback)(integrator)
         end
 
         # Calculate current time derivative (needed for semidiscrete entropy time derivative, residual, etc.)
-        du_ode = first(get_tmp_cache(integrator))
         # `integrator.f` is usually just a call to `rhs!`
         # However, we want to allow users to modify the ODE RHS outside of Trixi.jl
         # and allow us to pass a combined ODE RHS to OrdinaryDiffEq, e.g., for
         # hyperbolic-parabolic systems.
-        @notimeit timer() integrator.f(du_ode, integrator.u, semi, t)
-        u = wrap_array(integrator.u, mesh, equations, solver, cache)
+        @notimeit timer() integrator.f(du_ode, u_ode, semi, t)
+        u = wrap_array(u_ode, mesh, equations, solver, cache)
         du = wrap_array(du_ode, mesh, equations, solver, cache)
-        l2_error, linf_error = analysis_callback(io, du, u, integrator.u, t, semi)
+        l2_error, linf_error = analysis_callback(io, du, u, u_ode, t, semi)
 
         mpi_println("─"^100)
         mpi_println()
diff --git a/src/callbacks_step/save_solution.jl b/src/callbacks_step/save_solution.jl
index 55f17bbc1c7..1fe0d6b1e15 100644
--- a/src/callbacks_step/save_solution.jl
+++ b/src/callbacks_step/save_solution.jl
@@ -141,14 +141,7 @@ function initialize_save_cb!(solution_callback::SaveSolutionCallback, u, t, inte
     mpi_isroot() && mkpath(solution_callback.output_directory)
 
     semi = integrator.p
-    mesh, _, _, _ = mesh_equations_solver_cache(semi)
-    @trixi_timeit timer() "I/O" begin
-        if mesh.unsaved_changes
-            mesh.current_filename = save_mesh_file(mesh,
-                                                   solution_callback.output_directory)
-            mesh.unsaved_changes = false
-        end
-    end
+    @trixi_timeit timer() "I/O" save_mesh(semi, solution_callback.output_directory)
 
     if solution_callback.save_initial_solution
         solution_callback(integrator)
@@ -157,6 +150,16 @@ function initialize_save_cb!(solution_callback::SaveSolutionCallback, u, t, inte
     return nothing
 end
 
+# Save mesh for a general semidiscretization (default)
+function save_mesh(semi::AbstractSemidiscretization, output_directory, timestep = 0)
+    mesh, _, _, _ = mesh_equations_solver_cache(semi)
+
+    if mesh.unsaved_changes
+        mesh.current_filename = save_mesh_file(mesh, output_directory)
+        mesh.unsaved_changes = false
+    end
+end
+
 # this method is called to determine whether the callback should be activated
 function (solution_callback::SaveSolutionCallback)(u, t, integrator)
     @unpack interval_or_dt, save_final_solution = solution_callback
@@ -174,41 +177,15 @@ end
 # this method is called when the callback is activated
 function (solution_callback::SaveSolutionCallback)(integrator)
     u_ode = integrator.u
-    @unpack t, dt = integrator
-    iter = integrator.stats.naccept
     semi = integrator.p
-    mesh, _, _, _ = mesh_equations_solver_cache(semi)
+    iter = integrator.stats.naccept
 
     @trixi_timeit timer() "I/O" begin
-        @trixi_timeit timer() "save mesh" if mesh.unsaved_changes
-            mesh.current_filename = save_mesh_file(mesh,
-                                                   solution_callback.output_directory,
-                                                   iter)
-            mesh.unsaved_changes = false
-        end
-
-        element_variables = Dict{Symbol, Any}()
-        @trixi_timeit timer() "get element variables" begin
-            get_element_variables!(element_variables, u_ode, semi)
-            callbacks = integrator.opts.callback
-            if callbacks isa CallbackSet
-                for cb in callbacks.continuous_callbacks
-                    get_element_variables!(element_variables, u_ode, semi, cb;
-                                           t = integrator.t,
-                                           iter = integrator.stats.naccept)
-                end
-                for cb in callbacks.discrete_callbacks
-                    get_element_variables!(element_variables, u_ode, semi, cb;
-                                           t = integrator.t,
-                                           iter = integrator.stats.naccept)
-                end
-            end
-        end
-
-        @trixi_timeit timer() "save solution" save_solution_file(u_ode, t, dt, iter,
-                                                                 semi,
-                                                                 solution_callback,
-                                                                 element_variables)
+        # Call high-level functions that dispatch on semidiscretization type
+        @trixi_timeit timer() "save mesh" save_mesh(semi,
+                                                    solution_callback.output_directory,
+                                                    iter)
+        save_solution_file(semi, u_ode, solution_callback, integrator)
     end
 
     # avoid re-evaluating possible FSAL stages
@@ -216,13 +193,43 @@ function (solution_callback::SaveSolutionCallback)(integrator)
     return nothing
 end
 
+@inline function save_solution_file(semi::AbstractSemidiscretization, u_ode,
+                                    solution_callback,
+                                    integrator; system = "")
+    @unpack t, dt = integrator
+    iter = integrator.stats.naccept
+
+    element_variables = Dict{Symbol, Any}()
+    @trixi_timeit timer() "get element variables" begin
+        get_element_variables!(element_variables, u_ode, semi)
+        callbacks = integrator.opts.callback
+        if callbacks isa CallbackSet
+            for cb in callbacks.continuous_callbacks
+                get_element_variables!(element_variables, u_ode, semi, cb;
+                                       t = integrator.t, iter = iter)
+            end
+            for cb in callbacks.discrete_callbacks
+                get_element_variables!(element_variables, u_ode, semi, cb;
+                                       t = integrator.t, iter = iter)
+            end
+        end
+    end
+
+    @trixi_timeit timer() "save solution" save_solution_file(u_ode, t, dt, iter, semi,
+                                                             solution_callback,
+                                                             element_variables,
+                                                             system = system)
+end
+
 @inline function save_solution_file(u_ode, t, dt, iter,
                                     semi::AbstractSemidiscretization, solution_callback,
-                                    element_variables = Dict{Symbol, Any}())
+                                    element_variables = Dict{Symbol, Any}();
+                                    system = "")
     mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
     u = wrap_array_native(u_ode, mesh, equations, solver, cache)
     save_solution_file(u, t, dt, iter, mesh, equations, solver, cache,
-                       solution_callback, element_variables)
+                       solution_callback,
+                       element_variables; system = system)
 end
 
 # TODO: Taal refactor, move save_mesh_file?
diff --git a/src/callbacks_step/stepsize.jl b/src/callbacks_step/stepsize.jl
index 9e9f2d4885b..8b5cb958318 100644
--- a/src/callbacks_step/stepsize.jl
+++ b/src/callbacks_step/stepsize.jl
@@ -64,14 +64,11 @@ end
         t = integrator.t
         u_ode = integrator.u
         semi = integrator.p
-        mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
         @unpack cfl_number = stepsize_callback
-        u = wrap_array(u_ode, mesh, equations, solver, cache)
 
-        dt = @trixi_timeit timer() "calculate dt" begin
-            cfl_number * max_dt(u, t, mesh, have_constant_speed(equations), equations,
-                   solver, cache)
-        end
+        # Dispatch based on semidiscretization
+        dt = @trixi_timeit timer() "calculate dt" calculate_dt(u_ode, t, cfl_number,
+                                                               semi)
 
         set_proposed_dt!(integrator, dt)
         integrator.opts.dtmax = dt
@@ -83,6 +80,16 @@ end
     return nothing
 end
 
+# General case for a single semidiscretization
+function calculate_dt(u_ode, t, cfl_number, semi::AbstractSemidiscretization)
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
+
+    dt = cfl_number * max_dt(u, t, mesh,
+                have_constant_speed(equations), equations,
+                solver, cache)
+end
+
 # Time integration methods from the DiffEq ecosystem without adaptive time stepping on their own
 # such as `CarpenterKennedy2N54` require passing `dt=...` in `solve(ode, ...)`. Since we don't have
 # an integrator at this stage but only the ODE, this method will be used there. It's called in
diff --git a/src/callbacks_step/summary.jl b/src/callbacks_step/summary.jl
index a73b2a1913b..08e13d0b98d 100644
--- a/src/callbacks_step/summary.jl
+++ b/src/callbacks_step/summary.jl
@@ -152,15 +152,7 @@ function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator)
                            :indentation_level => 0)
 
     semi = integrator.p
-    show(io_context, MIME"text/plain"(), semi)
-    println(io, "\n")
-    mesh, equations, solver, _ = mesh_equations_solver_cache(semi)
-    show(io_context, MIME"text/plain"(), mesh)
-    println(io, "\n")
-    show(io_context, MIME"text/plain"(), equations)
-    println(io, "\n")
-    show(io_context, MIME"text/plain"(), solver)
-    println(io, "\n")
+    print_summary_semidiscretization(io_context, semi)
 
     callbacks = integrator.opts.callback
     if callbacks isa CallbackSet
@@ -208,6 +200,18 @@ function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator)
     return nothing
 end
 
+function print_summary_semidiscretization(io::IO, semi::AbstractSemidiscretization)
+    show(io, MIME"text/plain"(), semi)
+    println(io, "\n")
+    mesh, equations, solver, _ = mesh_equations_solver_cache(semi)
+    show(io, MIME"text/plain"(), mesh)
+    println(io, "\n")
+    show(io, MIME"text/plain"(), equations)
+    println(io, "\n")
+    show(io, MIME"text/plain"(), solver)
+    println(io, "\n")
+end
+
 function (cb::DiscreteCallback{Condition, Affect!})(io::IO = stdout) where {Condition,
                                                                             Affect! <:
                                                                             typeof(summary_callback)
diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index b9c462fa15a..ede85d80106 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -95,11 +95,15 @@ end
 # of the mesh, like its size and the type of boundary mapping function.
 # Then, within Trixi2Vtk, the StructuredMesh and its node coordinates are reconstructured from
 # these attributes for plotting purposes
-function save_mesh_file(mesh::StructuredMesh, output_directory)
+function save_mesh_file(mesh::StructuredMesh, output_directory; system = "")
     # Create output directory (if it does not exist)
     mkpath(output_directory)
 
-    filename = joinpath(output_directory, "mesh.h5")
+    if isempty(system)
+        filename = joinpath(output_directory, "mesh.h5")
+    else
+        filename = joinpath(output_directory, @sprintf("mesh_%s.h5", system))
+    end
 
     # Open file (clobber existing content)
     h5open(filename, "w") do file
diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl
index 8fef66d261e..ac312c57c89 100644
--- a/src/semidiscretization/semidiscretization.jl
+++ b/src/semidiscretization/semidiscretization.jl
@@ -76,7 +76,8 @@ function semidiscretize(semi::AbstractSemidiscretization, tspan)
     #       mpi_isparallel() && MPI.Barrier(mpi_comm())
     #       See https://github.com/trixi-framework/Trixi.jl/issues/328
     iip = true # is-inplace, i.e., we modify a vector when calling rhs!
-    return ODEProblem{iip}(rhs!, u0_ode, tspan, semi)
+    specialize = SciMLBase.FullSpecialize # specialize on rhs! and parameters (semi)
+    return ODEProblem{iip, specialize}(rhs!, u0_ode, tspan, semi)
 end
 
 """
@@ -93,7 +94,8 @@ function semidiscretize(semi::AbstractSemidiscretization, tspan,
     #       mpi_isparallel() && MPI.Barrier(mpi_comm())
     #       See https://github.com/trixi-framework/Trixi.jl/issues/328
     iip = true # is-inplace, i.e., we modify a vector when calling rhs!
-    return ODEProblem{iip}(rhs!, u0_ode, tspan, semi)
+    specialize = SciMLBase.FullSpecialize # specialize on rhs! and parameters (semi)
+    return ODEProblem{iip, specialize}(rhs!, u0_ode, tspan, semi)
 end
 
 """
diff --git a/src/semidiscretization/semidiscretization_coupled.jl b/src/semidiscretization/semidiscretization_coupled.jl
new file mode 100644
index 00000000000..b7adff78425
--- /dev/null
+++ b/src/semidiscretization/semidiscretization_coupled.jl
@@ -0,0 +1,610 @@
+"""
+    SemidiscretizationCoupled
+
+A struct used to bundle multiple semidiscretizations.
+[`semidiscretize`](@ref) will return an `ODEProblem` that synchronizes time steps between the semidiscretizations.
+Each call of `rhs!` will call `rhs!` for each semidiscretization individually.
+The semidiscretizations can be coupled by gluing meshes together using [`BoundaryConditionCoupled`](@ref).
+
+!!! warning "Experimental code"
+    This is an experimental feature and can change any time.
+"""
+struct SemidiscretizationCoupled{S, Indices, EquationList} <: AbstractSemidiscretization
+    semis::S
+    u_indices::Indices # u_ode[u_indices[i]] is the part of u_ode corresponding to semis[i]
+    performance_counter::PerformanceCounter
+end
+
+"""
+    SemidiscretizationCoupled(semis...)
+
+Create a coupled semidiscretization that consists of the semidiscretizations passed as arguments.
+"""
+function SemidiscretizationCoupled(semis...)
+    @assert all(semi -> ndims(semi) == ndims(semis[1]), semis) "All semidiscretizations must have the same dimension!"
+
+    # Number of coefficients for each semidiscretization
+    n_coefficients = zeros(Int, length(semis))
+    for i in 1:length(semis)
+        _, equations, _, _ = mesh_equations_solver_cache(semis[i])
+        n_coefficients[i] = ndofs(semis[i]) * nvariables(equations)
+    end
+
+    # Compute range of coefficients associated with each semidiscretization and allocate coupled BCs
+    u_indices = Vector{UnitRange{Int}}(undef, length(semis))
+    for i in 1:length(semis)
+        offset = sum(n_coefficients[1:(i - 1)]) + 1
+        u_indices[i] = range(offset, length = n_coefficients[i])
+
+        allocate_coupled_boundary_conditions(semis[i])
+    end
+
+    performance_counter = PerformanceCounter()
+
+    SemidiscretizationCoupled{typeof(semis), typeof(u_indices), typeof(performance_counter)
+                              }(semis, u_indices, performance_counter)
+end
+
+function Base.show(io::IO, semi::SemidiscretizationCoupled)
+    @nospecialize semi # reduce precompilation time
+
+    print(io, "SemidiscretizationCoupled($(semi.semis))")
+end
+
+function Base.show(io::IO, ::MIME"text/plain", semi::SemidiscretizationCoupled)
+    @nospecialize semi # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, semi)
+    else
+        summary_header(io, "SemidiscretizationCoupled")
+        summary_line(io, "#spatial dimensions", ndims(semi.semis[1]))
+        summary_line(io, "#systems", nsystems(semi))
+        for i in eachsystem(semi)
+            summary_line(io, "system", i)
+            mesh, equations, solver, _ = mesh_equations_solver_cache(semi.semis[i])
+            summary_line(increment_indent(io), "mesh", mesh |> typeof |> nameof)
+            summary_line(increment_indent(io), "equations", equations |> typeof |> nameof)
+            summary_line(increment_indent(io), "initial condition",
+                         semi.semis[i].initial_condition)
+            # no boundary conditions since that could be too much
+            summary_line(increment_indent(io), "source terms", semi.semis[i].source_terms)
+            summary_line(increment_indent(io), "solver", solver |> typeof |> nameof)
+        end
+        summary_line(io, "total #DOFs", ndofs(semi))
+        summary_footer(io)
+    end
+end
+
+function print_summary_semidiscretization(io::IO, semi::SemidiscretizationCoupled)
+    show(io, MIME"text/plain"(), semi)
+    println(io, "\n")
+    for i in eachsystem(semi)
+        mesh, equations, solver, _ = mesh_equations_solver_cache(semi.semis[i])
+        summary_header(io, "System #$i")
+
+        summary_line(io, "mesh", mesh |> typeof |> nameof)
+        show(increment_indent(io), MIME"text/plain"(), mesh)
+
+        summary_line(io, "equations", equations |> typeof |> nameof)
+        show(increment_indent(io), MIME"text/plain"(), equations)
+
+        summary_line(io, "solver", solver |> typeof |> nameof)
+        show(increment_indent(io), MIME"text/plain"(), solver)
+
+        summary_footer(io)
+        println(io, "\n")
+    end
+end
+
+@inline Base.ndims(semi::SemidiscretizationCoupled) = ndims(semi.semis[1])
+
+@inline nsystems(semi::SemidiscretizationCoupled) = length(semi.semis)
+
+@inline eachsystem(semi::SemidiscretizationCoupled) = Base.OneTo(nsystems(semi))
+
+@inline Base.real(semi::SemidiscretizationCoupled) = promote_type(real.(semi.semis)...)
+
+@inline Base.eltype(semi::SemidiscretizationCoupled) = promote_type(eltype.(semi.semis)...)
+
+@inline function ndofs(semi::SemidiscretizationCoupled)
+    sum(ndofs, semi.semis)
+end
+
+@inline function nelements(semi::SemidiscretizationCoupled)
+    return sum(semi.semis) do semi_
+        mesh, equations, solver, cache = mesh_equations_solver_cache(semi_)
+
+        nelements(mesh, solver, cache)
+    end
+end
+
+function compute_coefficients(t, semi::SemidiscretizationCoupled)
+    @unpack u_indices = semi
+
+    u_ode = Vector{real(semi)}(undef, u_indices[end][end])
+
+    for i in eachsystem(semi)
+        # Call `compute_coefficients` in `src/semidiscretization/semidiscretization.jl`
+        u_ode[u_indices[i]] .= compute_coefficients(t, semi.semis[i])
+    end
+
+    return u_ode
+end
+
+@inline function get_system_u_ode(u_ode, index, semi::SemidiscretizationCoupled)
+    @view u_ode[semi.u_indices[index]]
+end
+
+function rhs!(du_ode, u_ode, semi::SemidiscretizationCoupled, t)
+    @unpack u_indices = semi
+
+    time_start = time_ns()
+
+    @trixi_timeit timer() "copy to coupled boundaries" begin
+        for semi_ in semi.semis
+            copy_to_coupled_boundary!(semi_.boundary_conditions, u_ode, semi)
+        end
+    end
+
+    # Call rhs! for each semidiscretization
+    for i in eachsystem(semi)
+        u_loc = get_system_u_ode(u_ode, i, semi)
+        du_loc = get_system_u_ode(du_ode, i, semi)
+
+        @trixi_timeit timer() "system #$i" rhs!(du_loc, u_loc, semi.semis[i], t)
+    end
+
+    runtime = time_ns() - time_start
+    put!(semi.performance_counter, runtime)
+
+    return nothing
+end
+
+################################################################################
+### AnalysisCallback
+################################################################################
+
+"""
+    AnalysisCallbackCoupled(semi, callbacks...)
+
+Combine multiple analysis callbacks for coupled simulations with a
+[`SemidiscretizationCoupled`](@ref). For each coupled system, an indididual
+[`AnalysisCallback`](@ref) **must** be created and passed to the `AnalysisCallbackCoupled` **in
+order**, i.e., in the same sequence as the indidvidual semidiscretizations are stored in the
+`SemidiscretizationCoupled`.
+
+!!! warning "Experimental code"
+    This is an experimental feature and can change any time.
+"""
+struct AnalysisCallbackCoupled{CB}
+    callbacks::CB
+end
+
+function Base.show(io::IO, ::MIME"text/plain",
+                   cb_coupled::DiscreteCallback{<:Any, <:AnalysisCallbackCoupled})
+    @nospecialize cb_coupled # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, cb_coupled)
+    else
+        analysis_callback_coupled = cb_coupled.affect!
+
+        summary_header(io, "AnalysisCallbackCoupled")
+        for (i, cb) in enumerate(analysis_callback_coupled.callbacks)
+            summary_line(io, "Callback #$i", "")
+            show(increment_indent(io), MIME"text/plain"(), cb)
+        end
+        summary_footer(io)
+    end
+end
+
+# Convenience constructor for the coupled callback that gets called directly from the elixirs
+function AnalysisCallbackCoupled(semi_coupled, callbacks...)
+    if length(callbacks) != nsystems(semi_coupled)
+        error("an AnalysisCallbackCoupled requires one AnalysisCallback for each semidiscretization")
+    end
+
+    analysis_callback_coupled = AnalysisCallbackCoupled{typeof(callbacks)}(callbacks)
+
+    # This callback is triggered if any of its subsidiary callbacks' condition is triggered
+    condition = (u, t, integrator) -> any(callbacks) do callback
+        callback.condition(u, t, integrator)
+    end
+
+    DiscreteCallback(condition, analysis_callback_coupled,
+                     save_positions = (false, false),
+                     initialize = initialize!)
+end
+
+# This method gets called during initialization from OrdinaryDiffEq's `solve(...)`
+function initialize!(cb_coupled::DiscreteCallback{Condition, Affect!}, u_ode_coupled, t,
+                     integrator) where {Condition, Affect! <: AnalysisCallbackCoupled}
+    analysis_callback_coupled = cb_coupled.affect!
+    semi_coupled = integrator.p
+    du_ode_coupled = first(get_tmp_cache(integrator))
+
+    # Loop over coupled systems' callbacks and initialize them individually
+    for i in eachsystem(semi_coupled)
+        cb = analysis_callback_coupled.callbacks[i]
+        semi = semi_coupled.semis[i]
+        u_ode = get_system_u_ode(u_ode_coupled, i, semi_coupled)
+        du_ode = get_system_u_ode(du_ode_coupled, i, semi_coupled)
+        initialize!(cb, u_ode, du_ode, t, integrator, semi)
+    end
+end
+
+# This method gets called from OrdinaryDiffEq's `solve(...)`
+function (analysis_callback_coupled::AnalysisCallbackCoupled)(integrator)
+    semi_coupled = integrator.p
+    u_ode_coupled = integrator.u
+    du_ode_coupled = first(get_tmp_cache(integrator))
+
+    # Loop over coupled systems' callbacks and call them individually
+    for i in eachsystem(semi_coupled)
+        @unpack condition = analysis_callback_coupled.callbacks[i]
+        analysis_callback = analysis_callback_coupled.callbacks[i].affect!
+        u_ode = get_system_u_ode(u_ode_coupled, i, semi_coupled)
+
+        # Check condition and skip callback if it is not yet its turn
+        if !condition(u_ode, integrator.t, integrator)
+            continue
+        end
+
+        semi = semi_coupled.semis[i]
+        du_ode = get_system_u_ode(du_ode_coupled, i, semi_coupled)
+        analysis_callback(u_ode, du_ode, integrator, semi)
+    end
+end
+
+# used for error checks and EOC analysis
+function (cb::DiscreteCallback{Condition, Affect!})(sol) where {Condition,
+                                                                Affect! <:
+                                                                AnalysisCallbackCoupled}
+    semi_coupled = sol.prob.p
+    u_ode_coupled = sol.u[end]
+    @unpack callbacks = cb.affect!
+
+    uEltype = real(semi_coupled)
+    l2_error_collection = uEltype[]
+    linf_error_collection = uEltype[]
+    for i in eachsystem(semi_coupled)
+        analysis_callback = callbacks[i].affect!
+        @unpack analyzer = analysis_callback
+        cache_analysis = analysis_callback.cache
+
+        semi = semi_coupled.semis[i]
+        u_ode = get_system_u_ode(u_ode_coupled, i, semi_coupled)
+
+        l2_error, linf_error = calc_error_norms(u_ode, sol.t[end], analyzer, semi,
+                                                cache_analysis)
+        append!(l2_error_collection, l2_error)
+        append!(linf_error_collection, linf_error)
+    end
+
+    (; l2 = l2_error_collection, linf = linf_error_collection)
+end
+
+################################################################################
+### SaveSolutionCallback
+################################################################################
+
+# Save mesh for a coupled semidiscretization, which contains multiple meshes internally
+function save_mesh(semi::SemidiscretizationCoupled, output_directory, timestep = 0)
+    for i in eachsystem(semi)
+        mesh, _, _, _ = mesh_equations_solver_cache(semi.semis[i])
+
+        if mesh.unsaved_changes
+            mesh.current_filename = save_mesh_file(mesh, output_directory, system = i)
+            mesh.unsaved_changes = false
+        end
+    end
+end
+
+@inline function save_solution_file(semi::SemidiscretizationCoupled, u_ode,
+                                    solution_callback,
+                                    integrator)
+    @unpack semis = semi
+
+    for i in eachsystem(semi)
+        u_ode_slice = get_system_u_ode(u_ode, i, semi)
+        save_solution_file(semis[i], u_ode_slice, solution_callback, integrator, system = i)
+    end
+end
+
+################################################################################
+### StepsizeCallback
+################################################################################
+
+# In case of coupled system, use minimum timestep over all systems
+function calculate_dt(u_ode, t, cfl_number, semi::SemidiscretizationCoupled)
+    dt = minimum(eachsystem(semi)) do i
+        u_ode_slice = get_system_u_ode(u_ode, i, semi)
+        calculate_dt(u_ode_slice, t, cfl_number, semi.semis[i])
+    end
+
+    return dt
+end
+
+################################################################################
+### Equations
+################################################################################
+
+"""
+    BoundaryConditionCoupled(other_semi_index, indices, uEltype)
+
+Boundary condition to glue two meshes together. Solution values at the boundary
+of another mesh will be used as boundary values. This requires the use
+of [`SemidiscretizationCoupled`](@ref). The other mesh is specified by `other_semi_index`,
+which is the index of the mesh in the tuple of semidiscretizations.
+
+Note that the elements and nodes of the two meshes at the coupled boundary must coincide.
+This is currently only implemented for [`StructuredMesh`](@ref).
+
+# Arguments
+- `other_semi_index`: the index in `SemidiscretizationCoupled` of the semidiscretization
+                      from which the values are copied
+- `indices::Tuple`: node/cell indices at the boundary of the mesh in the other
+                    semidiscretization. See examples below.
+- `uEltype::Type`: element type of solution
+
+# Examples
+```julia
+# Connect the left boundary of mesh 2 to our boundary such that our positive
+# boundary direction will match the positive y direction of the other boundary
+BoundaryConditionCoupled(2, (:begin, :i), Float64)
+
+# Connect the same two boundaries oppositely oriented
+BoundaryConditionCoupled(2, (:begin, :i_backwards), Float64)
+
+# Using this as y_neg boundary will connect `our_cells[i, 1, j]` to `other_cells[j, end-i, end]`
+BoundaryConditionCoupled(2, (:j, :i_backwards, :end), Float64)
+```
+
+!!! warning "Experimental code"
+    This is an experimental feature and can change any time.
+"""
+mutable struct BoundaryConditionCoupled{NDIMS, NDIMST2M1, uEltype <: Real, Indices}
+    # NDIMST2M1 == NDIMS * 2 - 1
+    # Buffer for boundary values: [variable, nodes_i, nodes_j, cell_i, cell_j]
+    u_boundary        :: Array{uEltype, NDIMST2M1} # NDIMS * 2 - 1
+    other_semi_index  :: Int
+    other_orientation :: Int
+    indices           :: Indices
+
+    function BoundaryConditionCoupled(other_semi_index, indices, uEltype)
+        NDIMS = length(indices)
+        u_boundary = Array{uEltype, NDIMS * 2 - 1}(undef, ntuple(_ -> 0, NDIMS * 2 - 1))
+
+        if indices[1] in (:begin, :end)
+            other_orientation = 1
+        elseif indices[2] in (:begin, :end)
+            other_orientation = 2
+        else # indices[3] in (:begin, :end)
+            other_orientation = 3
+        end
+
+        new{NDIMS, NDIMS * 2 - 1, uEltype, typeof(indices)}(u_boundary, other_semi_index,
+                                                            other_orientation, indices)
+    end
+end
+
+function Base.eltype(boundary_condition::BoundaryConditionCoupled)
+    eltype(boundary_condition.u_boundary)
+end
+
+function (boundary_condition::BoundaryConditionCoupled)(u_inner, orientation, direction,
+                                                        cell_indices, surface_node_indices,
+                                                        surface_flux_function, equations)
+    # get_node_vars(boundary_condition.u_boundary, equations, solver, surface_node_indices..., cell_indices...),
+    # but we don't have a solver here
+    u_boundary = SVector(ntuple(v -> boundary_condition.u_boundary[v,
+                                                                   surface_node_indices...,
+                                                                   cell_indices...],
+                                Val(nvariables(equations))))
+
+    # Calculate boundary flux
+    if iseven(direction) # u_inner is "left" of boundary, u_boundary is "right" of boundary
+        flux = surface_flux_function(u_inner, u_boundary, orientation, equations)
+    else # u_boundary is "left" of boundary, u_inner is "right" of boundary
+        flux = surface_flux_function(u_boundary, u_inner, orientation, equations)
+    end
+
+    return flux
+end
+
+function allocate_coupled_boundary_conditions(semi::AbstractSemidiscretization)
+    n_boundaries = 2 * ndims(semi)
+    mesh, equations, solver, _ = mesh_equations_solver_cache(semi)
+
+    for direction in 1:n_boundaries
+        boundary_condition = semi.boundary_conditions[direction]
+
+        allocate_coupled_boundary_condition(boundary_condition, direction, mesh, equations,
+                                            solver)
+    end
+end
+
+# Don't do anything for other BCs than BoundaryConditionCoupled
+function allocate_coupled_boundary_condition(boundary_condition, direction, mesh, equations,
+                                             solver)
+    return nothing
+end
+
+# In 2D
+function allocate_coupled_boundary_condition(boundary_condition::BoundaryConditionCoupled{2
+                                                                                          },
+                                             direction, mesh, equations, dg::DGSEM)
+    if direction in (1, 2)
+        cell_size = size(mesh, 2)
+    else
+        cell_size = size(mesh, 1)
+    end
+
+    uEltype = eltype(boundary_condition)
+    boundary_condition.u_boundary = Array{uEltype, 3}(undef, nvariables(equations),
+                                                      nnodes(dg),
+                                                      cell_size)
+end
+
+# Don't do anything for other BCs than BoundaryConditionCoupled
+function copy_to_coupled_boundary!(boundary_condition, u_ode, semi)
+    return nothing
+end
+
+function copy_to_coupled_boundary!(boundary_conditions::Union{Tuple, NamedTuple}, u_ode,
+                                   semi)
+    for boundary_condition in boundary_conditions
+        copy_to_coupled_boundary!(boundary_condition, u_ode, semi)
+    end
+end
+
+# In 2D
+function copy_to_coupled_boundary!(boundary_condition::BoundaryConditionCoupled{2}, u_ode,
+                                   semi)
+    @unpack u_indices = semi
+    @unpack other_semi_index, other_orientation, indices = boundary_condition
+
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi.semis[other_semi_index])
+    u = wrap_array(get_system_u_ode(u_ode, other_semi_index, semi), mesh, equations, solver,
+                   cache)
+
+    linear_indices = LinearIndices(size(mesh))
+
+    if other_orientation == 1
+        cells = axes(mesh, 2)
+    else # other_orientation == 2
+        cells = axes(mesh, 1)
+    end
+
+    # Copy solution data to the coupled boundary using "delayed indexing" with
+    # a start value and a step size to get the correct face and orientation.
+    node_index_range = eachnode(solver)
+    i_node_start, i_node_step = index_to_start_step_2d(indices[1], node_index_range)
+    j_node_start, j_node_step = index_to_start_step_2d(indices[2], node_index_range)
+
+    i_cell_start, i_cell_step = index_to_start_step_2d(indices[1], axes(mesh, 1))
+    j_cell_start, j_cell_step = index_to_start_step_2d(indices[2], axes(mesh, 2))
+
+    i_cell = i_cell_start
+    j_cell = j_cell_start
+
+    for cell in cells
+        i_node = i_node_start
+        j_node = j_node_start
+
+        for i in eachnode(solver)
+            for v in 1:size(u, 1)
+                boundary_condition.u_boundary[v, i, cell] = u[v, i_node, j_node,
+                                                              linear_indices[i_cell,
+                                                                             j_cell]]
+            end
+            i_node += i_node_step
+            j_node += j_node_step
+        end
+        i_cell += i_cell_step
+        j_cell += j_cell_step
+    end
+end
+
+################################################################################
+### DGSEM/structured
+################################################################################
+
+@inline function calc_boundary_flux_by_direction!(surface_flux_values, u, t, orientation,
+                                                  boundary_condition::BoundaryConditionCoupled,
+                                                  mesh::StructuredMesh, equations,
+                                                  surface_integral, dg::DG, cache,
+                                                  direction, node_indices,
+                                                  surface_node_indices, element)
+    @unpack node_coordinates, contravariant_vectors, inverse_jacobian = cache.elements
+    @unpack surface_flux = surface_integral
+
+    cell_indices = get_boundary_indices(element, orientation, mesh)
+
+    u_inner = get_node_vars(u, equations, dg, node_indices..., element)
+
+    # If the mapping is orientation-reversing, the contravariant vectors' orientation
+    # is reversed as well. The normal vector must be oriented in the direction
+    # from `left_element` to `right_element`, or the numerical flux will be computed
+    # incorrectly (downwind direction).
+    sign_jacobian = sign(inverse_jacobian[node_indices..., element])
+
+    # Contravariant vector Ja^i is the normal vector
+    normal = sign_jacobian * get_contravariant_vector(orientation, contravariant_vectors,
+                                      node_indices..., element)
+
+    # If the mapping is orientation-reversing, the normal vector will be reversed (see above).
+    # However, the flux now has the wrong sign, since we need the physical flux in normal direction.
+    flux = sign_jacobian * boundary_condition(u_inner, normal, direction, cell_indices,
+                              surface_node_indices, surface_flux, equations)
+
+    for v in eachvariable(equations)
+        surface_flux_values[v, surface_node_indices..., direction, element] = flux[v]
+    end
+end
+
+function get_boundary_indices(element, orientation, mesh::StructuredMesh{2})
+    cartesian_indices = CartesianIndices(size(mesh))
+    if orientation == 1
+        # Get index of element in y-direction
+        cell_indices = (cartesian_indices[element][2],)
+    else # orientation == 2
+        # Get index of element in x-direction
+        cell_indices = (cartesian_indices[element][1],)
+    end
+
+    return cell_indices
+end
+
+################################################################################
+### Special elixirs
+################################################################################
+
+# Analyze convergence for SemidiscretizationCoupled
+function analyze_convergence(errors_coupled, iterations,
+                             semi_coupled::SemidiscretizationCoupled)
+    # Extract errors: the errors are currently stored as
+    # | iter 1 sys 1 var 1...n | iter 1 sys 2 var 1...n | ... | iter 2 sys 1 var 1...n | ...
+    # but for calling `analyze_convergence` below, we need the following layout
+    # sys n: | iter 1 var 1...n | iter 1 var 1...n | ... | iter 2 var 1...n | ...
+    # That is, we need to extract and join the data for a single system
+    errors = Dict{Symbol, Vector{Float64}}[]
+    for i in eachsystem(semi_coupled)
+        push!(errors, Dict(:l2 => Float64[], :linf => Float64[]))
+    end
+    offset = 0
+    for iter in 1:iterations, i in eachsystem(semi_coupled)
+        # Extract information on current semi
+        semi = semi_coupled.semis[i]
+        _, equations, _, _ = mesh_equations_solver_cache(semi)
+        variablenames = varnames(cons2cons, equations)
+
+        # Compute offset
+        first = offset + 1
+        last = offset + length(variablenames)
+        offset += length(variablenames)
+
+        # Append errors to appropriate storage
+        append!(errors[i][:l2], errors_coupled[:l2][first:last])
+        append!(errors[i][:linf], errors_coupled[:linf][first:last])
+    end
+
+    eoc_mean_values = Vector{Dict{Symbol, Any}}(undef, nsystems(semi_coupled))
+    for i in eachsystem(semi_coupled)
+        # Use visual cues to separate output from multiple systems
+        println()
+        println("="^100)
+        println("# System $i")
+        println("="^100)
+
+        # Extract information on current semi
+        semi = semi_coupled.semis[i]
+        _, equations, _, _ = mesh_equations_solver_cache(semi)
+        variablenames = varnames(cons2cons, equations)
+
+        eoc_mean_values[i] = analyze_convergence(errors[i], iterations, variablenames)
+    end
+
+    return eoc_mean_values
+end
diff --git a/src/semidiscretization/semidiscretization_euler_gravity.jl b/src/semidiscretization/semidiscretization_euler_gravity.jl
index 665f2be9bfa..8fe9de1d2b2 100644
--- a/src/semidiscretization/semidiscretization_euler_gravity.jl
+++ b/src/semidiscretization/semidiscretization_euler_gravity.jl
@@ -477,18 +477,29 @@ end
 @inline function save_solution_file(u_ode, t, dt, iter,
                                     semi::SemidiscretizationEulerGravity,
                                     solution_callback,
-                                    element_variables = Dict{Symbol, Any}())
+                                    element_variables = Dict{Symbol, Any}();
+                                    system = "")
+    # If this is called already as part of a multi-system setup (i.e., system is non-empty),
+    # we build a combined system name
+    if !isempty(system)
+        system_euler = system * "_euler"
+        system_gravity = system * "_gravity"
+    else
+        system_euler = "euler"
+        system_gravity = "gravity"
+    end
+
     u_euler = wrap_array_native(u_ode, semi.semi_euler)
     filename_euler = save_solution_file(u_euler, t, dt, iter,
                                         mesh_equations_solver_cache(semi.semi_euler)...,
                                         solution_callback, element_variables,
-                                        system = "euler")
+                                        system = system_euler)
 
     u_gravity = wrap_array_native(semi.cache.u_ode, semi.semi_gravity)
     filename_gravity = save_solution_file(u_gravity, t, dt, iter,
                                           mesh_equations_solver_cache(semi.semi_gravity)...,
                                           solution_callback, element_variables,
-                                          system = "gravity")
+                                          system = system_gravity)
 
     return filename_euler, filename_gravity
 end
diff --git a/test/test_special_elixirs.jl b/test/test_special_elixirs.jl
index 742a3abc376..23017059eaa 100644
--- a/test/test_special_elixirs.jl
+++ b/test/test_special_elixirs.jl
@@ -30,6 +30,12 @@ coverage = occursin("--code-coverage", cmd) && !occursin("--code-coverage=none",
         @test isapprox(mean_convergence[:l2], [4.0], rtol=0.05)
       end
 
+      @timed_testset "structured_2d_dgsem coupled" begin
+        mean_convergence = convergence_test(@__MODULE__, joinpath(EXAMPLES_DIR, "structured_2d_dgsem", "elixir_advection_coupled.jl"), 3)
+        @test isapprox(mean_convergence[1][:l2], [4.0], rtol=0.05)
+        @test isapprox(mean_convergence[2][:l2], [4.0], rtol=0.05)
+      end
+
       @timed_testset "p4est_2d_dgsem" begin
         # Run convergence test on unrefined mesh
         no_refine = @cfunction((p4est, which_tree, quadrant) -> Cint(0), Cint, (Ptr{Trixi.p4est_t}, Ptr{Trixi.p4est_topidx_t}, Ptr{Trixi.p4est_quadrant_t}))
@@ -57,6 +63,7 @@ coverage = occursin("--code-coverage", cmd) && !occursin("--code-coverage=none",
       @test_nowarn_mod convergence_test(@__MODULE__, joinpath(EXAMPLES_DIR, "tree_2d_dgsem", "elixir_advection_basic.jl"), 2, tspan=(0.0, 0.01))
       @test_nowarn_mod convergence_test(@__MODULE__, joinpath(EXAMPLES_DIR, "tree_2d_dgsem", "elixir_advection_extended.jl"), 2, initial_refinement_level=0, tspan=(0.0, 0.1))
       @test_nowarn_mod convergence_test(@__MODULE__, joinpath(EXAMPLES_DIR, "structured_2d_dgsem", "elixir_advection_basic.jl"), 2, tspan=(0.0, 0.01))
+      @test_nowarn_mod convergence_test(@__MODULE__, joinpath(EXAMPLES_DIR, "structured_2d_dgsem", "elixir_advection_coupled.jl"), 2, tspan=(0.0, 0.01))
       @test_nowarn_mod convergence_test(@__MODULE__, joinpath(EXAMPLES_DIR, "structured_2d_dgsem", "elixir_advection_extended.jl"), 2, cells_per_dimension=(1, 1), tspan=(0.0, 0.1))
     end
   end
diff --git a/test/test_structured_2d.jl b/test/test_structured_2d.jl
index feaf66c4a7f..16fc72f0a46 100644
--- a/test/test_structured_2d.jl
+++ b/test/test_structured_2d.jl
@@ -19,6 +19,19 @@ isdir(outdir) && rm(outdir, recursive=true)
       linf = [6.627000273229378e-5])
   end
 
+  @trixi_testset "elixir_advection_coupled.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_coupled.jl"),    
+      l2   = [7.816742843181738e-6, 7.816742843196112e-6],
+      linf = [6.314906965543265e-5, 6.314906965410039e-5],
+      coverage_override = (maxiters=10^5,))
+
+    @testset "analysis_callback(sol) for AnalysisCallbackCoupled" begin
+      errors = analysis_callback(sol)
+      @test errors.l2   ≈ [7.816742843181738e-6, 7.816742843196112e-6] rtol=1.0e-4
+      @test errors.linf ≈ [6.314906965543265e-5, 6.314906965410039e-5] rtol=1.0e-4
+    end
+  end
+
   @trixi_testset "elixir_advection_extended.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"),
       l2   = [4.220397559713772e-6],

From deb027adefdd88fccf6cec5ce4ca5c76106a0439 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 16 Jun 2023 22:53:10 +0200
Subject: [PATCH 051/163] Bump crate-ci/typos from 1.14.12 to 1.15.0 (#1524)

* Bump crate-ci/typos from 1.14.12 to 1.15.0

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.14.12 to 1.15.0.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.14.12...v1.15.0)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Fix typos

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
---
 .github/workflows/SpellCheck.yml                            | 2 +-
 docs/src/visualization.md                                   | 2 +-
 examples/p4est_2d_dgsem/elixir_euler_supersonic_cylinder.jl | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index c4ab3a98557..bc324c689bc 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v3
       - name: Check spelling
-        uses: crate-ci/typos@v1.14.12
+        uses: crate-ci/typos@v1.15.0
diff --git a/docs/src/visualization.md b/docs/src/visualization.md
index e29313cc080..8f72bb4b1c6 100644
--- a/docs/src/visualization.md
+++ b/docs/src/visualization.md
@@ -339,7 +339,7 @@ create a [`PlotData1D`](@ref) with the keyword argument `curve` set to your list
 Let's give an example of this with the basic advection equation from above by creating
 a plot along the circle marked in green:
 
-![2d-plot-along-cirlce](https://user-images.githubusercontent.com/72009492/130951042-e1849447-8e55-4798-9361-c8badb9f3a49.png)
+![2d-plot-along-circle](https://user-images.githubusercontent.com/72009492/130951042-e1849447-8e55-4798-9361-c8badb9f3a49.png)
 
 We can write a function like this, that outputs a list of points on a circle:
 ```julia
diff --git a/examples/p4est_2d_dgsem/elixir_euler_supersonic_cylinder.jl b/examples/p4est_2d_dgsem/elixir_euler_supersonic_cylinder.jl
index 42370e861ce..366be700f9f 100644
--- a/examples/p4est_2d_dgsem/elixir_euler_supersonic_cylinder.jl
+++ b/examples/p4est_2d_dgsem/elixir_euler_supersonic_cylinder.jl
@@ -3,7 +3,7 @@
 # Boundary conditions are supersonic Mach 3 inflow at the left portion of the domain
 # and supersonic outflow at the right portion of the domain. The top and bottom of the
 # channel as well as the cylinder are treated as Euler slip wall boundaries.
-# This flow results in strong shock refletions / interactions as well as Kelvin-Helmholtz
+# This flow results in strong shock reflections / interactions as well as Kelvin-Helmholtz
 # instabilities at later times as two Mach stems form above and below the cylinder.
 #
 # For complete details on the problem setup see Section 5.7 of the paper:

From 642da1af9f9cc390f1d3d2a47a5fd07628a632f0 Mon Sep 17 00:00:00 2001
From: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
Date: Mon, 19 Jun 2023 09:07:15 +0200
Subject: [PATCH 052/163] Use `Pointerwrapper`s in `P4estMesh` (#1434)

* use PointerWrappers

* fix typo

* fix typo

* fixes

* fixes

* fixes

* unsafe_load_sc returns PointerWrapper

* bug fixes

* bug fixes

* bug fixes

* add comment about

* rename unsafe_load_* to load_pointerwrapper_*

* format

* fix merge conflicts

* fix bad format again

* fix

* add unsafe_wrap_sc again

* fix

* fix

* Update src/auxiliary/p4est.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* introduce generic type PointerOrWrapper{T}

* format

---------

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 src/auxiliary/p4est.jl                        |  94 +++++++-----
 src/callbacks_step/amr.jl                     |  30 ++--
 src/callbacks_step/amr_dg.jl                  |   3 +-
 src/callbacks_step/analysis.jl                |   2 +-
 src/meshes/p4est_mesh.jl                      | 117 +++++++-------
 src/solvers/dgsem_p4est/containers.jl         | 122 +++++++--------
 src/solvers/dgsem_p4est/containers_2d.jl      |   2 +-
 src/solvers/dgsem_p4est/containers_3d.jl      |   2 +-
 .../dgsem_p4est/containers_parallel.jl        | 145 +++++++++---------
 src/solvers/dgsem_p4est/dg_parallel.jl        |  80 +++++-----
 10 files changed, 311 insertions(+), 286 deletions(-)

diff --git a/src/auxiliary/p4est.jl b/src/auxiliary/p4est.jl
index 93b5166cd81..968af339cbd 100644
--- a/src/auxiliary/p4est.jl
+++ b/src/auxiliary/p4est.jl
@@ -24,35 +24,38 @@ function init_p4est()
     return nothing
 end
 
+# for convenience to either pass a Ptr or a PointerWrapper
+const PointerOrWrapper = Union{Ptr{T}, PointerWrapper{T}} where {T}
+
 # Convert sc_array of type T to Julia array
-function unsafe_wrap_sc(::Type{T}, sc_array::Ptr{sc_array}) where {T}
-    sc_array_obj = unsafe_load(sc_array)
+function unsafe_wrap_sc(::Type{T}, sc_array_ptr::Ptr{sc_array}) where {T}
+    sc_array_obj = unsafe_load(sc_array_ptr)
     return unsafe_wrap_sc(T, sc_array_obj)
 end
 
 function unsafe_wrap_sc(::Type{T}, sc_array_obj::sc_array) where {T}
     elem_count = sc_array_obj.elem_count
     array = sc_array_obj.array
-
     return unsafe_wrap(Array, Ptr{T}(array), elem_count)
 end
 
-# Load the ith element (1-indexed) of an sc array of type T
-function unsafe_load_sc(::Type{T}, sc_array::Ptr{sc_array}, i = 1) where {T}
-    sc_array_obj = unsafe_load(sc_array)
-    return unsafe_load_sc(T, sc_array_obj, i)
-end
+function unsafe_wrap_sc(::Type{T}, sc_array_pw::PointerWrapper{sc_array}) where {T}
+    elem_count = sc_array_pw.elem_count[]
+    array = sc_array_pw.array
 
-function unsafe_load_sc(::Type{T}, sc_array_obj::sc_array, i = 1) where {T}
-    element_size = sc_array_obj.elem_size
-    @assert element_size == sizeof(T)
+    return unsafe_wrap(Array, Ptr{T}(pointer(array)), elem_count)
+end
 
-    return unsafe_load(Ptr{T}(sc_array_obj.array), i)
+# Load the ith element (1-indexed) of an sc array of type T as PointerWrapper
+function load_pointerwrapper_sc(::Type{T}, sc_array::PointerWrapper{sc_array},
+                                i::Integer = 1) where {T}
+    return PointerWrapper(T, pointer(sc_array.array) + (i - 1) * sizeof(T))
 end
 
 # Create new `p4est` from a p4est_connectivity
 # 2D
-function new_p4est(connectivity::Ptr{p4est_connectivity_t}, initial_refinement_level)
+function new_p4est(connectivity::PointerOrWrapper{p4est_connectivity_t},
+                   initial_refinement_level)
     comm = P4est.uses_mpi() ? mpi_comm() : 0 # Use Trixi.jl's MPI communicator if p4est supports MPI
     p4est_new_ext(comm,
                   connectivity,
@@ -65,7 +68,8 @@ function new_p4est(connectivity::Ptr{p4est_connectivity_t}, initial_refinement_l
 end
 
 # 3D
-function new_p4est(connectivity::Ptr{p8est_connectivity_t}, initial_refinement_level)
+function new_p4est(connectivity::PointerOrWrapper{p8est_connectivity_t},
+                   initial_refinement_level)
     comm = P4est.uses_mpi() ? mpi_comm() : 0 # Use Trixi.jl's MPI communicator if p4est supports MPI
     p8est_new_ext(comm, connectivity, 0, initial_refinement_level, true,
                   2 * sizeof(Int), C_NULL, C_NULL)
@@ -73,13 +77,13 @@ end
 
 # Save `p4est` data to file
 # 2D
-function save_p4est!(file, p4est::Ptr{p4est_t})
+function save_p4est!(file, p4est::PointerOrWrapper{p4est_t})
     # Don't save user data of the quads
     p4est_save(file, p4est, false)
 end
 
 # 3D
-function save_p4est!(file, p8est::Ptr{p8est_t})
+function save_p4est!(file, p8est::PointerOrWrapper{p8est_t})
     # Don't save user data of the quads
     p8est_save(file, p8est, false)
 end
@@ -107,27 +111,33 @@ read_inp_p4est(meshfile, ::Val{3}) = p8est_connectivity_read_inp(meshfile)
 
 # Refine `p4est` if refine_fn_c returns 1
 # 2D
-function refine_p4est!(p4est::Ptr{p4est_t}, recursive, refine_fn_c, init_fn_c)
+function refine_p4est!(p4est::PointerOrWrapper{p4est_t}, recursive, refine_fn_c,
+                       init_fn_c)
     p4est_refine(p4est, recursive, refine_fn_c, init_fn_c)
 end
 # 3D
-function refine_p4est!(p8est::Ptr{p8est_t}, recursive, refine_fn_c, init_fn_c)
+function refine_p4est!(p8est::PointerOrWrapper{p8est_t}, recursive, refine_fn_c,
+                       init_fn_c)
     p8est_refine(p8est, recursive, refine_fn_c, init_fn_c)
 end
 
 # Refine `p4est` if coarsen_fn_c returns 1
 # 2D
-function coarsen_p4est!(p4est::Ptr{p4est_t}, recursive, coarsen_fn_c, init_fn_c)
+function coarsen_p4est!(p4est::PointerOrWrapper{p4est_t}, recursive, coarsen_fn_c,
+                        init_fn_c)
     p4est_coarsen(p4est, recursive, coarsen_fn_c, init_fn_c)
 end
 # 3D
-function coarsen_p4est!(p8est::Ptr{p8est_t}, recursive, coarsen_fn_c, init_fn_c)
+function coarsen_p4est!(p8est::PointerOrWrapper{p8est_t}, recursive, coarsen_fn_c,
+                        init_fn_c)
     p8est_coarsen(p8est, recursive, coarsen_fn_c, init_fn_c)
 end
 
 # Create new ghost layer from p4est, only connections via faces are relevant
 # 2D
-ghost_new_p4est(p4est::Ptr{p4est_t}) = p4est_ghost_new(p4est, P4est.P4EST_CONNECT_FACE)
+function ghost_new_p4est(p4est::PointerOrWrapper{p4est_t})
+    p4est_ghost_new(p4est, P4est.P4EST_CONNECT_FACE)
+end
 # 3D
 # In 3D it is not sufficient to use `P8EST_CONNECT_FACE`. Consider the neighbor elements of a mortar
 # in 3D. We have to determine which MPI ranks are involved in this mortar.
@@ -147,28 +157,37 @@ ghost_new_p4est(p4est::Ptr{p4est_t}) = p4est_ghost_new(p4est, P4est.P4EST_CONNEC
 # `P8EST_CONNECT_FACE`. But if it is not in the ghost layer, it will not be available in
 # `iterate_p4est` and thus we cannot determine its MPI rank
 # (see https://github.com/cburstedde/p4est/blob/439bc9aae849555256ddfe4b03d1f9fe8d18ff0e/src/p8est_iterate.h#L66-L72).
-ghost_new_p4est(p8est::Ptr{p8est_t}) = p8est_ghost_new(p8est, P4est.P8EST_CONNECT_FULL)
+function ghost_new_p4est(p8est::PointerOrWrapper{p8est_t})
+    p8est_ghost_new(p8est, P4est.P8EST_CONNECT_FULL)
+end
 
 # Check if ghost layer is valid
 # 2D
-function ghost_is_valid_p4est(p4est::Ptr{p4est_t}, ghost_layer::Ptr{p4est_ghost_t})
+function ghost_is_valid_p4est(p4est::PointerOrWrapper{p4est_t},
+                              ghost_layer::Ptr{p4est_ghost_t})
     return p4est_ghost_is_valid(p4est, ghost_layer)
 end
 # 3D
-function ghost_is_valid_p4est(p4est::Ptr{p8est_t}, ghost_layer::Ptr{p8est_ghost_t})
+function ghost_is_valid_p4est(p4est::PointerOrWrapper{p8est_t},
+                              ghost_layer::Ptr{p8est_ghost_t})
     return p8est_ghost_is_valid(p4est, ghost_layer)
 end
 
 # Destroy ghost layer
 # 2D
-ghost_destroy_p4est(ghost_layer::Ptr{p4est_ghost_t}) = p4est_ghost_destroy(ghost_layer)
+function ghost_destroy_p4est(ghost_layer::PointerOrWrapper{p4est_ghost_t})
+    p4est_ghost_destroy(ghost_layer)
+end
 # 3D
-ghost_destroy_p4est(ghost_layer::Ptr{p8est_ghost_t}) = p8est_ghost_destroy(ghost_layer)
+function ghost_destroy_p4est(ghost_layer::PointerOrWrapper{p8est_ghost_t})
+    p8est_ghost_destroy(ghost_layer)
+end
 
 # Let `p4est` iterate over each cell volume and cell face.
 # Call iter_volume_c for each cell and iter_face_c for each face.
 # 2D
-function iterate_p4est(p4est::Ptr{p4est_t}, user_data; ghost_layer = C_NULL,
+function iterate_p4est(p4est::PointerOrWrapper{p4est_t}, user_data;
+                       ghost_layer = C_NULL,
                        iter_volume_c = C_NULL, iter_face_c = C_NULL)
     if user_data === C_NULL
         user_data_ptr = user_data
@@ -191,7 +210,8 @@ function iterate_p4est(p4est::Ptr{p4est_t}, user_data; ghost_layer = C_NULL,
 end
 
 # 3D
-function iterate_p4est(p8est::Ptr{p8est_t}, user_data; ghost_layer = C_NULL,
+function iterate_p4est(p8est::PointerOrWrapper{p8est_t}, user_data;
+                       ghost_layer = C_NULL,
                        iter_volume_c = C_NULL, iter_face_c = C_NULL)
     if user_data === C_NULL
         user_data_ptr = user_data
@@ -216,23 +236,25 @@ end
 
 # Load i-th element of the sc_array info.sides of the type p[48]est_iter_face_side_t
 # 2D version
-function unsafe_load_side(info::Ptr{p4est_iter_face_info_t}, i = 1)
-    return unsafe_load_sc(p4est_iter_face_side_t, unsafe_load(info).sides, i)
+function load_pointerwrapper_side(info::PointerWrapper{p4est_iter_face_info_t},
+                                  i::Integer = 1)
+    return load_pointerwrapper_sc(p4est_iter_face_side_t, info.sides, i)
 end
 
 # 3D version
-function unsafe_load_side(info::Ptr{p8est_iter_face_info_t}, i = 1)
-    return unsafe_load_sc(p8est_iter_face_side_t, unsafe_load(info).sides, i)
+function load_pointerwrapper_side(info::PointerWrapper{p8est_iter_face_info_t},
+                                  i::Integer = 1)
+    return load_pointerwrapper_sc(p8est_iter_face_side_t, info.sides, i)
 end
 
 # Load i-th element of the sc_array p4est.trees of the type p[48]est_tree_t
 # 2D version
-function unsafe_load_tree(p4est::Ptr{p4est_t}, i = 1)
-    return unsafe_load_sc(p4est_tree_t, unsafe_load(p4est).trees, i)
+function load_pointerwrapper_tree(p4est::PointerWrapper{p4est_t}, i::Integer = 1)
+    return load_pointerwrapper_sc(p4est_tree_t, p4est.trees, i)
 end
 
 # 3D version
-function unsafe_load_tree(p8est::Ptr{p8est_t}, i = 1)
-    return unsafe_load_sc(p8est_tree_t, unsafe_load(p8est).trees, i)
+function load_pointerwrapper_tree(p8est::PointerWrapper{p8est_t}, i::Integer = 1)
+    return load_pointerwrapper_sc(p8est_tree_t, p8est.trees, i)
 end
 end # @muladd
diff --git a/src/callbacks_step/amr.jl b/src/callbacks_step/amr.jl
index d6e19b79886..bef49b4c482 100644
--- a/src/callbacks_step/amr.jl
+++ b/src/callbacks_step/amr.jl
@@ -348,24 +348,24 @@ end
 
 # Copy controller values to quad user data storage, will be called below
 function copy_to_quad_iter_volume(info, user_data)
-    info_obj = unsafe_load(info)
+    info_pw = PointerWrapper(info)
 
     # Load tree from global trees array, one-based indexing
-    tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1)
+    tree_pw = load_pointerwrapper_tree(info_pw.p4est, info_pw.treeid[] + 1)
     # Quadrant numbering offset of this quadrant
-    offset = tree.quadrants_offset
+    offset = tree_pw.quadrants_offset[]
     # Global quad ID
-    quad_id = offset + info_obj.quadid
+    quad_id = offset + info_pw.quadid[]
 
     # Access user_data = lambda
-    user_data_ptr = Ptr{Int}(user_data)
+    user_data_pw = PointerWrapper(Int, user_data)
     # Load controller_value = lambda[quad_id + 1]
-    controller_value = unsafe_load(user_data_ptr, quad_id + 1)
+    controller_value = user_data_pw[quad_id + 1]
 
     # Access quadrant's user data ([global quad ID, controller_value])
-    quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data))
+    quad_data_pw = PointerWrapper(Int, info_pw.quad.p.user_data[])
     # Save controller value to quadrant's user data.
-    unsafe_store!(quad_data_ptr, controller_value, 2)
+    quad_data_pw[2] = controller_value
 
     return nothing
 end
@@ -599,22 +599,22 @@ function current_element_levels(mesh::TreeMesh, solver, cache)
 end
 
 function extract_levels_iter_volume(info, user_data)
-    info_obj = unsafe_load(info)
+    info_pw = PointerWrapper(info)
 
     # Load tree from global trees array, one-based indexing
-    tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1)
+    tree_pw = load_pointerwrapper_tree(info_pw.p4est, info_pw.treeid[] + 1)
     # Quadrant numbering offset of this quadrant
-    offset = tree.quadrants_offset
+    offset = tree_pw.quadrants_offset[]
     # Global quad ID
-    quad_id = offset + info_obj.quadid
+    quad_id = offset + info_pw.quadid[]
     # Julia element ID
     element_id = quad_id + 1
 
-    current_level = unsafe_load(info_obj.quad.level)
+    current_level = info_pw.quad.level[]
 
     # Unpack user_data = current_levels and save current element level
-    ptr = Ptr{Int}(user_data)
-    unsafe_store!(ptr, current_level, element_id)
+    pw = PointerWrapper(Int, user_data)
+    pw[element_id] = current_level
 
     return nothing
 end
diff --git a/src/callbacks_step/amr_dg.jl b/src/callbacks_step/amr_dg.jl
index 19bbebd9254..1dcfdccdea8 100644
--- a/src/callbacks_step/amr_dg.jl
+++ b/src/callbacks_step/amr_dg.jl
@@ -9,8 +9,7 @@
 function rebalance_solver!(u_ode::AbstractVector, mesh::ParallelP4estMesh, equations,
                            dg::DGSEM, cache, old_global_first_quadrant)
     # mpi ranks are 0-based, this array uses 1-based indices
-    global_first_quadrant = unsafe_wrap(Array,
-                                        unsafe_load(mesh.p4est).global_first_quadrant,
+    global_first_quadrant = unsafe_wrap(Array, mesh.p4est.global_first_quadrant,
                                         mpi_nranks() + 1)
     if global_first_quadrant[mpi_rank() + 1] ==
        old_global_first_quadrant[mpi_rank() + 1] &&
diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index 7fa2e21a244..8cf43a1d15e 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -508,7 +508,7 @@ function print_amr_information(callbacks, mesh::P4estMesh, solver, cache)
 
     elements_per_level = zeros(P4EST_MAXLEVEL + 1)
 
-    for tree in unsafe_wrap_sc(p4est_tree_t, unsafe_load(mesh.p4est).trees)
+    for tree in unsafe_wrap_sc(p4est_tree_t, mesh.p4est.trees)
         elements_per_level .+= tree.quadrants_per_level
     end
 
diff --git a/src/meshes/p4est_mesh.jl b/src/meshes/p4est_mesh.jl
index ddd6cf473e4..60db285e04f 100644
--- a/src/meshes/p4est_mesh.jl
+++ b/src/meshes/p4est_mesh.jl
@@ -13,9 +13,9 @@ to manage trees and mesh refinement.
 """
 mutable struct P4estMesh{NDIMS, RealT <: Real, IsParallel, P, Ghost, NDIMSP2, NNODES} <:
                AbstractMesh{NDIMS}
-    p4est::P # Either Ptr{p4est_t} or Ptr{p8est_t}
-    is_parallel::IsParallel
-    ghost::Ghost # Either Ptr{p4est_ghost_t} or Ptr{p8est_ghost_t}
+    p4est       :: P # Either PointerWrapper{p4est_t} or PointerWrapper{p8est_t}
+    is_parallel :: IsParallel
+    ghost       :: Ghost # Either PointerWrapper{p4est_ghost_t} or PointerWrapper{p8est_ghost_t}
     # Coordinates at the nodes specified by the tensor product of `nodes` (NDIMS times).
     # This specifies the geometry interpolation for each tree.
     tree_node_coordinates::Array{RealT, NDIMSP2} # [dimension, i, j, k, tree]
@@ -43,18 +43,21 @@ mutable struct P4estMesh{NDIMS, RealT <: Real, IsParallel, P, Ghost, NDIMSP2, NN
             is_parallel = False()
         end
 
+        p4est_pw = PointerWrapper(p4est)
+
         ghost = ghost_new_p4est(p4est)
+        ghost_pw = PointerWrapper(ghost)
 
         mesh = new{NDIMS, eltype(tree_node_coordinates), typeof(is_parallel),
-                   typeof(p4est), typeof(ghost), NDIMS + 2, length(nodes)}(p4est,
-                                                                           is_parallel,
-                                                                           ghost,
-                                                                           tree_node_coordinates,
-                                                                           nodes,
-                                                                           boundary_names,
-                                                                           current_filename,
-                                                                           unsaved_changes,
-                                                                           p4est_partition_allow_for_coarsening)
+                   typeof(p4est_pw), typeof(ghost_pw), NDIMS + 2, length(nodes)}(p4est_pw,
+                                                                                 is_parallel,
+                                                                                 ghost_pw,
+                                                                                 tree_node_coordinates,
+                                                                                 nodes,
+                                                                                 boundary_names,
+                                                                                 current_filename,
+                                                                                 unsaved_changes,
+                                                                                 p4est_partition_allow_for_coarsening)
 
         # Destroy `p4est` structs when the mesh is garbage collected
         finalizer(destroy_mesh, mesh)
@@ -70,14 +73,14 @@ const ParallelP4estMesh{NDIMS} = P4estMesh{NDIMS, <:Real, <:True}
 @inline mpi_parallel(mesh::ParallelP4estMesh) = True()
 
 function destroy_mesh(mesh::P4estMesh{2})
-    connectivity = unsafe_load(mesh.p4est).connectivity
+    connectivity = mesh.p4est.connectivity
     p4est_ghost_destroy(mesh.ghost)
     p4est_destroy(mesh.p4est)
     p4est_connectivity_destroy(connectivity)
 end
 
 function destroy_mesh(mesh::P4estMesh{3})
-    connectivity = unsafe_load(mesh.p4est).connectivity
+    connectivity = mesh.p4est.connectivity
     p8est_ghost_destroy(mesh.ghost)
     p8est_destroy(mesh.p4est)
     p8est_connectivity_destroy(connectivity)
@@ -87,11 +90,10 @@ end
 @inline Base.real(::P4estMesh{NDIMS, RealT}) where {NDIMS, RealT} = RealT
 
 @inline function ntrees(mesh::P4estMesh)
-    trees = unsafe_load(mesh.p4est).trees
-    return unsafe_load(trees).elem_count
+    return mesh.p4est.trees.elem_count[]
 end
 # returns Int32 by default which causes a weird method error when creating the cache
-@inline ncells(mesh::P4estMesh) = Int(unsafe_load(mesh.p4est).local_num_quadrants)
+@inline ncells(mesh::P4estMesh) = Int(mesh.p4est.local_num_quadrants[])
 
 function Base.show(io::IO, mesh::P4estMesh)
     print(io, "P4estMesh{", ndims(mesh), ", ", real(mesh), "}")
@@ -387,14 +389,14 @@ function p4est_mesh_from_hohqmesh_abaqus(meshfile, initial_refinement_level,
                                          n_dimensions, RealT)
     # Create the mesh connectivity using `p4est`
     connectivity = read_inp_p4est(meshfile, Val(n_dimensions))
-    connectivity_obj = unsafe_load(connectivity)
+    connectivity_pw = PointerWrapper(connectivity)
 
     # These need to be of the type Int for unsafe_wrap below to work
-    n_trees::Int = connectivity_obj.num_trees
-    n_vertices::Int = connectivity_obj.num_vertices
+    n_trees::Int = connectivity_pw.num_trees[]
+    n_vertices::Int = connectivity_pw.num_vertices[]
 
     # Extract a copy of the element vertices to compute the tree node coordinates
-    vertices = unsafe_wrap(Array, connectivity_obj.vertices, (3, n_vertices))
+    vertices = unsafe_wrap(Array, connectivity_pw.vertices, (3, n_vertices))
 
     # Readin all the information from the mesh file into a string array
     file_lines = readlines(open(meshfile))
@@ -445,14 +447,14 @@ function p4est_mesh_from_standard_abaqus(meshfile, mapping, polydeg,
                                          initial_refinement_level, n_dimensions, RealT)
     # Create the mesh connectivity using `p4est`
     connectivity = read_inp_p4est(meshfile, Val(n_dimensions))
-    connectivity_obj = unsafe_load(connectivity)
+    connectivity_pw = PointerWrapper(connectivity)
 
     # These need to be of the type Int for unsafe_wrap below to work
-    n_trees::Int = connectivity_obj.num_trees
-    n_vertices::Int = connectivity_obj.num_vertices
+    n_trees::Int = connectivity_pw.num_trees[]
+    n_vertices::Int = connectivity_pw.num_vertices[]
 
-    vertices = unsafe_wrap(Array, connectivity_obj.vertices, (3, n_vertices))
-    tree_to_vertex = unsafe_wrap(Array, connectivity_obj.tree_to_vertex,
+    vertices = unsafe_wrap(Array, connectivity_pw.vertices, (3, n_vertices))
+    tree_to_vertex = unsafe_wrap(Array, connectivity_pw.tree_to_vertex,
                                  (2^n_dimensions, n_trees))
 
     basis = LobattoLegendreBasis(RealT, polydeg)
@@ -1511,17 +1513,18 @@ end
 
 function update_ghost_layer!(mesh::P4estMesh)
     ghost_destroy_p4est(mesh.ghost)
-    mesh.ghost = ghost_new_p4est(mesh.p4est)
+    mesh.ghost = PointerWrapper(ghost_new_p4est(mesh.p4est))
 end
 
 function init_fn(p4est, which_tree, quadrant)
     # Unpack quadrant's user data ([global quad ID, controller_value])
-    ptr = Ptr{Int}(unsafe_load(quadrant.p.user_data))
+    # Use `unsafe_load` here since `quadrant.p.user_data isa Ptr{Ptr{Nothing}}`
+    # and we only need the first (only!) entry
+    pw = PointerWrapper(Int, unsafe_load(quadrant.p.user_data))
 
     # Initialize quad ID as -1 and controller_value as 0 (don't refine or coarsen)
-    unsafe_store!(ptr, -1, 1)
-    unsafe_store!(ptr, 0, 2)
-
+    pw[1] = -1
+    pw[2] = 0
     return nothing
 end
 
@@ -1539,8 +1542,10 @@ end
 function refine_fn(p4est, which_tree, quadrant)
     # Controller value has been copied to the quadrant's user data storage before.
     # Unpack quadrant's user data ([global quad ID, controller_value]).
-    ptr = Ptr{Int}(unsafe_load(quadrant.p.user_data))
-    controller_value = unsafe_load(ptr, 2)
+    # Use `unsafe_load` here since `quadrant.p.user_data isa Ptr{Ptr{Nothing}}`
+    # and we only need the first (only!) entry
+    pw = PointerWrapper(Int, unsafe_load(quadrant.p.user_data))
+    controller_value = pw[2]
 
     if controller_value > 0
         # return true (refine)
@@ -1586,9 +1591,9 @@ function coarsen_fn(p4est, which_tree, quadrants_ptr)
 
     # Controller value has been copied to the quadrant's user data storage before.
     # Load controller value from quadrant's user data ([global quad ID, controller_value]).
-    function controller_value(i)
-        unsafe_load(Ptr{Int}(unsafe_load(quadrants[i].p.user_data)), 2)
-    end
+    # Use `unsafe_load` here since `quadrant.p.user_data isa Ptr{Ptr{Nothing}}`
+    # and we only need the first (only!) entry
+    controller_value(i) = PointerWrapper(Int, unsafe_load(quadrants[i].p.user_data))[2]
 
     # `p4est` calls this function for each 2^ndims quads that could be coarsened to a single one.
     # Only coarsen if all these 2^ndims quads have been marked for coarsening.
@@ -1671,20 +1676,19 @@ end
 
 # Copy global quad ID to quad's user data storage, will be called below
 function save_original_id_iter_volume(info, user_data)
-    info_obj = unsafe_load(info)
+    info_pw = PointerWrapper(info)
 
     # Load tree from global trees array, one-based indexing
-    tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1)
+    tree_pw = load_pointerwrapper_tree(info_pw.p4est, info_pw.treeid[] + 1)
     # Quadrant numbering offset of this quadrant
-    offset = tree.quadrants_offset
+    offset = tree_pw.quadrants_offset[]
     # Global quad ID
-    quad_id = offset + info_obj.quadid
+    quad_id = offset + info_pw.quadid[]
 
     # Unpack quadrant's user data ([global quad ID, controller_value])
-    ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data))
+    pw = PointerWrapper(Int, info_pw.quad.p.user_data[])
     # Save global quad ID
-    unsafe_store!(ptr, quad_id, 1)
-
+    pw[1] = quad_id
     return nothing
 end
 
@@ -1708,24 +1712,23 @@ end
 
 # Extract information about which cells have been changed
 function collect_changed_iter_volume(info, user_data)
-    info_obj = unsafe_load(info)
+    info_pw = PointerWrapper(info)
 
     # The original element ID has been saved to user_data before.
     # Load original quad ID from quad's user data ([global quad ID, controller_value]).
-    quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data))
-    original_id = unsafe_load(quad_data_ptr, 1)
+    quad_data_pw = PointerWrapper(Int, info_pw.quad.p.user_data[])
+    original_id = quad_data_pw[1]
 
     # original_id of cells that have been newly created is -1
     if original_id >= 0
         # Unpack user_data = original_cells
-        user_data_ptr = Ptr{Int}(user_data)
+        user_data_pw = PointerWrapper(Int, user_data)
 
         # If quad has an original_id, it existed before refinement/coarsening,
         # and therefore wasn't changed.
         # Mark original_id as "not changed during refinement/coarsening" in original_cells
-        unsafe_store!(user_data_ptr, 0, original_id + 1)
+        user_data_pw[original_id + 1] = 0
     end
-
     return nothing
 end
 
@@ -1756,29 +1759,27 @@ end
 
 # Extract newly created cells
 function collect_new_iter_volume(info, user_data)
-    info_obj = unsafe_load(info)
+    info_pw = PointerWrapper(info)
 
     # The original element ID has been saved to user_data before.
     # Unpack quadrant's user data ([global quad ID, controller_value]).
-    quad_data_ptr = Ptr{Int}(unsafe_load(info_obj.quad.p.user_data))
-    original_id = unsafe_load(quad_data_ptr, 1)
+    original_id = PointerWrapper(Int, info_pw.quad.p.user_data[])[1]
 
     # original_id of cells that have been newly created is -1
     if original_id < 0
         # Load tree from global trees array, one-based indexing
-        tree = unsafe_load_tree(info_obj.p4est, info_obj.treeid + 1)
+        tree_pw = load_pointerwrapper_tree(info_pw.p4est, info_pw.treeid[] + 1)
         # Quadrant numbering offset of this quadrant
-        offset = tree.quadrants_offset
+        offset = tree_pw.quadrants_offset[]
         # Global quad ID
-        quad_id = offset + info_obj.quadid
+        quad_id = offset + info_pw.quadid[]
 
         # Unpack user_data = original_cells
-        user_data_ptr = Ptr{Int}(user_data)
+        user_data_pw = PointerWrapper(Int, user_data)
 
         # Mark cell as "newly created during refinement/coarsening/balancing"
-        unsafe_store!(user_data_ptr, 1, quad_id + 1)
+        user_data_pw[quad_id + 1] = 1
     end
-
     return nothing
 end
 
diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl
index 9b87de777a6..2b9c6987d24 100644
--- a/src/solvers/dgsem_p4est/containers.jl
+++ b/src/solvers/dgsem_p4est/containers.jl
@@ -276,18 +276,18 @@ function init_boundaries!(boundaries, mesh::P4estMesh)
 end
 
 # Function barrier for type stability
-function init_boundaries_iter_face_inner(info, boundaries, boundary_id, mesh)
+function init_boundaries_iter_face_inner(info_pw, boundaries, boundary_id, mesh)
     # Extract boundary data
-    side = unsafe_load_side(info)
+    side_pw = load_pointerwrapper_side(info_pw)
     # Get local tree, one-based indexing
-    tree = unsafe_load_tree(mesh.p4est, side.treeid + 1)
+    tree_pw = load_pointerwrapper_tree(mesh.p4est, side_pw.treeid[] + 1)
     # Quadrant numbering offset of this quadrant
-    offset = tree.quadrants_offset
+    offset = tree_pw.quadrants_offset[]
 
     # Verify before accessing is.full, but this should never happen
-    @assert side.is_hanging == false
+    @assert side_pw.is_hanging[] == false
 
-    local_quad_id = side.is.full.quadid
+    local_quad_id = side_pw.is.full.quadid[]
     # Global ID of this quad
     quad_id = offset + local_quad_id
 
@@ -296,13 +296,13 @@ function init_boundaries_iter_face_inner(info, boundaries, boundary_id, mesh)
     boundaries.neighbor_ids[boundary_id] = quad_id + 1
 
     # Face at which the boundary lies
-    face = side.face
+    face = side_pw.face[]
 
     # Save boundaries.node_indices dimension specific in containers_[23]d.jl
     init_boundary_node_indices!(boundaries, face, boundary_id)
 
     # One-based indexing
-    boundaries.name[boundary_id] = mesh.boundary_names[face + 1, side.treeid + 1]
+    boundaries.name[boundary_id] = mesh.boundary_names[face + 1, side_pw.treeid[] + 1]
 
     return nothing
 end
@@ -479,32 +479,33 @@ end
 # Function barrier for type stability
 function init_surfaces_iter_face_inner(info, user_data)
     @unpack interfaces, mortars, boundaries = user_data
-    elem_count = unsafe_load(info).sides.elem_count
+    info_pw = PointerWrapper(info)
+    elem_count = info_pw.sides.elem_count[]
 
     if elem_count == 2
         # Two neighboring elements => Interface or mortar
 
         # Extract surface data
-        sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
+        sides_pw = (load_pointerwrapper_side(info_pw, 1),
+                    load_pointerwrapper_side(info_pw, 2))
 
-        if sides[1].is_hanging == false && sides[2].is_hanging == false
+        if sides_pw[1].is_hanging[] == false && sides_pw[2].is_hanging[] == false
             # No hanging nodes => normal interface
             if interfaces !== nothing
-                init_interfaces_iter_face_inner(info, sides, user_data)
+                init_interfaces_iter_face_inner(info_pw, sides_pw, user_data)
             end
         else
             # Hanging nodes => mortar
             if mortars !== nothing
-                init_mortars_iter_face_inner(info, sides, user_data)
+                init_mortars_iter_face_inner(info_pw, sides_pw, user_data)
             end
         end
     elseif elem_count == 1
         # One neighboring elements => boundary
         if boundaries !== nothing
-            init_boundaries_iter_face_inner(info, user_data)
+            init_boundaries_iter_face_inner(info_pw, user_data)
         end
     end
-
     return nothing
 end
 
@@ -519,18 +520,18 @@ function init_surfaces!(interfaces, mortars, boundaries, mesh::P4estMesh)
 end
 
 # Initialization of interfaces after the function barrier
-function init_interfaces_iter_face_inner(info, sides, user_data)
+function init_interfaces_iter_face_inner(info_pw, sides_pw, user_data)
     @unpack interfaces, interface_id, mesh = user_data
     user_data.interface_id += 1
 
     # Get Tuple of local trees, one-based indexing
-    trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1),
-             unsafe_load_tree(mesh.p4est, sides[2].treeid + 1))
+    trees_pw = (load_pointerwrapper_tree(mesh.p4est, sides_pw[1].treeid[] + 1),
+                load_pointerwrapper_tree(mesh.p4est, sides_pw[2].treeid[] + 1))
     # Quadrant numbering offsets of the quadrants at this interface
-    offsets = SVector(trees[1].quadrants_offset,
-                      trees[2].quadrants_offset)
+    offsets = SVector(trees_pw[1].quadrants_offset[],
+                      trees_pw[2].quadrants_offset[])
 
-    local_quad_ids = SVector(sides[1].is.full.quadid, sides[2].is.full.quadid)
+    local_quad_ids = SVector(sides_pw[1].is.full.quadid[], sides_pw[2].is.full.quadid[])
     # Global IDs of the neighboring quads
     quad_ids = offsets + local_quad_ids
 
@@ -540,31 +541,30 @@ function init_interfaces_iter_face_inner(info, sides, user_data)
     interfaces.neighbor_ids[2, interface_id] = quad_ids[2] + 1
 
     # Face at which the interface lies
-    faces = (sides[1].face, sides[2].face)
+    faces = (sides_pw[1].face[], sides_pw[2].face[])
 
     # Save interfaces.node_indices dimension specific in containers_[23]d.jl
-    init_interface_node_indices!(interfaces, faces,
-                                 unsafe_load(info).orientation, interface_id)
+    init_interface_node_indices!(interfaces, faces, info_pw.orientation[], interface_id)
 
     return nothing
 end
 
 # Initialization of boundaries after the function barrier
-function init_boundaries_iter_face_inner(info, user_data)
+function init_boundaries_iter_face_inner(info_pw, user_data)
     @unpack boundaries, boundary_id, mesh = user_data
     user_data.boundary_id += 1
 
     # Extract boundary data
-    side = unsafe_load_side(info)
+    side_pw = load_pointerwrapper_side(info_pw)
     # Get local tree, one-based indexing
-    tree = unsafe_load_tree(mesh.p4est, side.treeid + 1)
+    tree_pw = load_pointerwrapper_tree(mesh.p4est, side_pw.treeid[] + 1)
     # Quadrant numbering offset of this quadrant
-    offset = tree.quadrants_offset
+    offset = tree_pw.quadrants_offset[]
 
     # Verify before accessing is.full, but this should never happen
-    @assert side.is_hanging == false
+    @assert side_pw.is_hanging[] == false
 
-    local_quad_id = side.is.full.quadid
+    local_quad_id = side_pw.is.full.quadid[]
     # Global ID of this quad
     quad_id = offset + local_quad_id
 
@@ -573,52 +573,52 @@ function init_boundaries_iter_face_inner(info, user_data)
     boundaries.neighbor_ids[boundary_id] = quad_id + 1
 
     # Face at which the boundary lies
-    face = side.face
+    face = side_pw.face[]
 
     # Save boundaries.node_indices dimension specific in containers_[23]d.jl
     init_boundary_node_indices!(boundaries, face, boundary_id)
 
     # One-based indexing
-    boundaries.name[boundary_id] = mesh.boundary_names[face + 1, side.treeid + 1]
+    boundaries.name[boundary_id] = mesh.boundary_names[face + 1, side_pw.treeid[] + 1]
 
     return nothing
 end
 
 # Initialization of mortars after the function barrier
-function init_mortars_iter_face_inner(info, sides, user_data)
+function init_mortars_iter_face_inner(info_pw, sides_pw, user_data)
     @unpack mortars, mortar_id, mesh = user_data
     user_data.mortar_id += 1
 
     # Get Tuple of local trees, one-based indexing
-    trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1),
-             unsafe_load_tree(mesh.p4est, sides[2].treeid + 1))
+    trees_pw = (load_pointerwrapper_tree(mesh.p4est, sides_pw[1].treeid[] + 1),
+                load_pointerwrapper_tree(mesh.p4est, sides_pw[2].treeid[] + 1))
     # Quadrant numbering offsets of the quadrants at this interface
-    offsets = SVector(trees[1].quadrants_offset,
-                      trees[2].quadrants_offset)
+    offsets = SVector(trees_pw[1].quadrants_offset[],
+                      trees_pw[2].quadrants_offset[])
 
-    if sides[1].is_hanging == true
+    if sides_pw[1].is_hanging[] == true
         # Left is small, right is large
-        faces = (sides[1].face, sides[2].face)
+        faces = (sides_pw[1].face[], sides_pw[2].face[])
 
-        local_small_quad_ids = sides[1].is.hanging.quadid
+        local_small_quad_ids = sides_pw[1].is.hanging.quadid[]
         # Global IDs of the two small quads
         small_quad_ids = offsets[1] .+ local_small_quad_ids
 
         # Just be sure before accessing is.full
-        @assert sides[2].is_hanging == false
-        large_quad_id = offsets[2] + sides[2].is.full.quadid
-    else # sides[2].is_hanging == true
+        @assert sides_pw[2].is_hanging[] == false
+        large_quad_id = offsets[2] + sides_pw[2].is.full.quadid[]
+    else # sides_pw[2].is_hanging[] == true
         # Right is small, left is large.
         # init_mortar_node_indices! below expects side 1 to contain the small elements.
-        faces = (sides[2].face, sides[1].face)
+        faces = (sides_pw[2].face[], sides_pw[1].face[])
 
-        local_small_quad_ids = sides[2].is.hanging.quadid
+        local_small_quad_ids = sides_pw[2].is.hanging.quadid[]
         # Global IDs of the two small quads
         small_quad_ids = offsets[2] .+ local_small_quad_ids
 
         # Just be sure before accessing is.full
-        @assert sides[1].is_hanging == false
-        large_quad_id = offsets[1] + sides[1].is.full.quadid
+        @assert sides_pw[1].is_hanging[] == false
+        large_quad_id = offsets[1] + sides_pw[1].is.full.quadid[]
     end
 
     # Write data to mortar container, 1 and 2 are the small elements
@@ -627,7 +627,7 @@ function init_mortars_iter_face_inner(info, sides, user_data)
     # Last entry is the large element
     mortars.neighbor_ids[end, mortar_id] = large_quad_id + 1
 
-    init_mortar_node_indices!(mortars, faces, unsafe_load(info).orientation, mortar_id)
+    init_mortar_node_indices!(mortars, faces, info_pw.orientation[], mortar_id)
 
     return nothing
 end
@@ -638,34 +638,36 @@ end
 # - boundaries
 # and collect the numbers in `user_data` in this order.
 function count_surfaces_iter_face(info, user_data)
-    elem_count = unsafe_load(info).sides.elem_count
+    info_pw = PointerWrapper(info)
+    elem_count = info_pw.sides.elem_count[]
 
     if elem_count == 2
         # Two neighboring elements => Interface or mortar
 
         # Extract surface data
-        sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
+        sides_pw = (load_pointerwrapper_side(info_pw, 1),
+                    load_pointerwrapper_side(info_pw, 2))
 
-        if sides[1].is_hanging == false && sides[2].is_hanging == false
+        if sides_pw[1].is_hanging[] == false && sides_pw[2].is_hanging[] == false
             # No hanging nodes => normal interface
             # Unpack user_data = [interface_count] and increment interface_count
-            ptr = Ptr{Int}(user_data)
-            id = unsafe_load(ptr, 1)
-            unsafe_store!(ptr, id + 1, 1)
+            pw = PointerWrapper(Int, user_data)
+            id = pw[1]
+            pw[1] = id + 1
         else
             # Hanging nodes => mortar
             # Unpack user_data = [mortar_count] and increment mortar_count
-            ptr = Ptr{Int}(user_data)
-            id = unsafe_load(ptr, 2)
-            unsafe_store!(ptr, id + 1, 2)
+            pw = PointerWrapper(Int, user_data)
+            id = pw[2]
+            pw[2] = id + 1
         end
     elseif elem_count == 1
         # One neighboring elements => boundary
 
         # Unpack user_data = [boundary_count] and increment boundary_count
-        ptr = Ptr{Int}(user_data)
-        id = unsafe_load(ptr, 3)
-        unsafe_store!(ptr, id + 1, 3)
+        pw = PointerWrapper(Int, user_data)
+        id = pw[3]
+        pw[3] = id + 1
     end
 
     return nothing
diff --git a/src/solvers/dgsem_p4est/containers_2d.jl b/src/solvers/dgsem_p4est/containers_2d.jl
index 4f7d903897a..11747f1f175 100644
--- a/src/solvers/dgsem_p4est/containers_2d.jl
+++ b/src/solvers/dgsem_p4est/containers_2d.jl
@@ -52,7 +52,7 @@ function calc_node_coordinates!(node_coordinates,
     p4est_root_len = 1 << P4EST_MAXLEVEL
     p4est_quadrant_len(l) = 1 << (P4EST_MAXLEVEL - l)
 
-    trees = unsafe_wrap_sc(p4est_tree_t, unsafe_load(mesh.p4est).trees)
+    trees = unsafe_wrap_sc(p4est_tree_t, mesh.p4est.trees)
 
     for tree in eachindex(trees)
         offset = trees[tree].quadrants_offset
diff --git a/src/solvers/dgsem_p4est/containers_3d.jl b/src/solvers/dgsem_p4est/containers_3d.jl
index 6cdc2cf9611..e9994fe4569 100644
--- a/src/solvers/dgsem_p4est/containers_3d.jl
+++ b/src/solvers/dgsem_p4est/containers_3d.jl
@@ -43,7 +43,7 @@ function calc_node_coordinates!(node_coordinates,
     p4est_root_len = 1 << P4EST_MAXLEVEL
     p4est_quadrant_len(l) = 1 << (P4EST_MAXLEVEL - l)
 
-    trees = unsafe_wrap_sc(p8est_tree_t, unsafe_load(mesh.p4est).trees)
+    trees = unsafe_wrap_sc(p8est_tree_t, mesh.p4est.trees)
 
     for tree in eachindex(trees)
         offset = trees[tree].quadrants_offset
diff --git a/src/solvers/dgsem_p4est/containers_parallel.jl b/src/solvers/dgsem_p4est/containers_parallel.jl
index 42d6ea44c5e..e7ee1f81478 100644
--- a/src/solvers/dgsem_p4est/containers_parallel.jl
+++ b/src/solvers/dgsem_p4est/containers_parallel.jl
@@ -311,21 +311,24 @@ function init_surfaces_iter_face_inner(info,
     # surfaces at once or any subset of them, some of the unpacked values above may be `nothing` if
     # they're not supposed to be initialized during this call. That is why we need additional
     # `!== nothing` checks below before initializing individual faces.
-    if unsafe_load(info).sides.elem_count == 2
+    info_pw = PointerWrapper(info)
+    if info_pw.sides.elem_count[] == 2
         # Two neighboring elements => Interface or mortar
 
         # Extract surface data
-        sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
+        sides_pw = (load_pointerwrapper_side(info_pw, 1),
+                    load_pointerwrapper_side(info_pw, 2))
 
-        if sides[1].is_hanging == false && sides[2].is_hanging == false
+        if sides_pw[1].is_hanging[] == false && sides_pw[2].is_hanging[] == false
             # No hanging nodes => normal interface or MPI interface
-            if sides[1].is.full.is_ghost == true || sides[2].is.full.is_ghost == true # remote side => MPI interface
+            if sides_pw[1].is.full.is_ghost[] == true ||
+               sides_pw[2].is.full.is_ghost[] == true # remote side => MPI interface
                 if mpi_interfaces !== nothing
-                    init_mpi_interfaces_iter_face_inner(info, sides, user_data)
+                    init_mpi_interfaces_iter_face_inner(info_pw, sides_pw, user_data)
                 end
             else
                 if interfaces !== nothing
-                    init_interfaces_iter_face_inner(info, sides, user_data)
+                    init_interfaces_iter_face_inner(info_pw, sides_pw, user_data)
                 end
             end
         else
@@ -333,18 +336,18 @@ function init_surfaces_iter_face_inner(info,
             # First, we check which side is hanging, i.e., on which side we have the refined cells.
             # Then we check if any of the refined cells or the coarse cell are "ghost" cells, i.e., they
             # belong to another rank. That way we can determine if this is a regular mortar or MPI mortar
-            if sides[1].is_hanging == true
-                @assert sides[2].is_hanging == false
-                if any(sides[1].is.hanging.is_ghost .== true) ||
-                   sides[2].is.full.is_ghost == true
+            if sides_pw[1].is_hanging[] == true
+                @assert sides_pw[2].is_hanging[] == false
+                if any(sides_pw[1].is.hanging.is_ghost[] .== true) ||
+                   sides_pw[2].is.full.is_ghost[] == true
                     face_has_ghost_side = true
                 else
                     face_has_ghost_side = false
                 end
-            else # sides[2].is_hanging == true
-                @assert sides[1].is_hanging == false
-                if sides[1].is.full.is_ghost == true ||
-                   any(sides[2].is.hanging.is_ghost .== true)
+            else # sides_pw[2].is_hanging[] == true
+                @assert sides_pw[1].is_hanging[] == false
+                if sides_pw[1].is.full.is_ghost[] == true ||
+                   any(sides_pw[2].is.hanging.is_ghost[] .== true)
                     face_has_ghost_side = true
                 else
                     face_has_ghost_side = false
@@ -352,15 +355,15 @@ function init_surfaces_iter_face_inner(info,
             end
             # Initialize mortar or MPI mortar
             if face_has_ghost_side && mpi_mortars !== nothing
-                init_mpi_mortars_iter_face_inner(info, sides, user_data)
+                init_mpi_mortars_iter_face_inner(info_pw, sides_pw, user_data)
             elseif !face_has_ghost_side && mortars !== nothing
-                init_mortars_iter_face_inner(info, sides, user_data)
+                init_mortars_iter_face_inner(info_pw, sides_pw, user_data)
             end
         end
-    elseif unsafe_load(info).sides.elem_count == 1
+    elseif info_pw.sides.elem_count[] == 1
         # One neighboring elements => boundary
         if boundaries !== nothing
-            init_boundaries_iter_face_inner(info, user_data)
+            init_boundaries_iter_face_inner(info_pw, user_data)
         end
     end
 
@@ -381,23 +384,23 @@ function init_surfaces!(interfaces, mortars, boundaries, mpi_interfaces, mpi_mor
 end
 
 # Initialization of MPI interfaces after the function barrier
-function init_mpi_interfaces_iter_face_inner(info, sides, user_data)
+function init_mpi_interfaces_iter_face_inner(info_pw, sides_pw, user_data)
     @unpack mpi_interfaces, mpi_interface_id, mesh = user_data
     user_data.mpi_interface_id += 1
 
-    if sides[1].is.full.is_ghost == true
+    if sides_pw[1].is.full.is_ghost[] == true
         local_side = 2
-    elseif sides[2].is.full.is_ghost == true
+    elseif sides_pw[2].is.full.is_ghost[] == true
         local_side = 1
     else
         error("should not happen")
     end
 
     # Get local tree, one-based indexing
-    tree = unsafe_load_tree(mesh.p4est, sides[local_side].treeid + 1)
+    tree_pw = load_pointerwrapper_tree(mesh.p4est, sides_pw[local_side].treeid[] + 1)
     # Quadrant numbering offset of the local quadrant at this interface
-    offset = tree.quadrants_offset
-    tree_quad_id = sides[local_side].is.full.quadid # quadid in the local tree
+    offset = tree_pw.quadrants_offset[]
+    tree_quad_id = sides_pw[local_side].is.full.quadid[] # quadid in the local tree
     # ID of the local neighboring quad, cumulative over local trees
     local_quad_id = offset + tree_quad_id
 
@@ -406,52 +409,52 @@ function init_mpi_interfaces_iter_face_inner(info, sides, user_data)
     mpi_interfaces.local_sides[mpi_interface_id] = local_side
 
     # Face at which the interface lies
-    faces = (sides[1].face, sides[2].face)
+    faces = (sides_pw[1].face[], sides_pw[2].face[])
 
     # Save mpi_interfaces.node_indices dimension specific in containers_[23]d_parallel.jl
     init_mpi_interface_node_indices!(mpi_interfaces, faces, local_side,
-                                     unsafe_load(info).orientation,
+                                     info_pw.orientation[],
                                      mpi_interface_id)
 
     return nothing
 end
 
 # Initialization of MPI mortars after the function barrier
-function init_mpi_mortars_iter_face_inner(info, sides, user_data)
+function init_mpi_mortars_iter_face_inner(info_pw, sides_pw, user_data)
     @unpack mpi_mortars, mpi_mortar_id, mesh = user_data
     user_data.mpi_mortar_id += 1
 
     # Get Tuple of adjacent trees, one-based indexing
-    trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1),
-             unsafe_load_tree(mesh.p4est, sides[2].treeid + 1))
+    trees_pw = (load_pointerwrapper_tree(mesh.p4est, sides_pw[1].treeid[] + 1),
+                load_pointerwrapper_tree(mesh.p4est, sides_pw[2].treeid[] + 1))
     # Quadrant numbering offsets of the quadrants at this mortar
-    offsets = SVector(trees[1].quadrants_offset,
-                      trees[2].quadrants_offset)
+    offsets = SVector(trees_pw[1].quadrants_offset[],
+                      trees_pw[2].quadrants_offset[])
 
-    if sides[1].is_hanging == true
+    if sides_pw[1].is_hanging[] == true
         hanging_side = 1
         full_side = 2
-    else # sides[2].is_hanging == true
+    else # sides_pw[2].is_hanging[] == true
         hanging_side = 2
         full_side = 1
     end
     # Just be sure before accessing is.full or is.hanging later
-    @assert sides[full_side].is_hanging == false
-    @assert sides[hanging_side].is_hanging == true
+    @assert sides_pw[full_side].is_hanging[] == false
+    @assert sides_pw[hanging_side].is_hanging[] == true
 
     # Find small quads that are locally available
-    local_small_quad_positions = findall(sides[hanging_side].is.hanging.is_ghost .==
+    local_small_quad_positions = findall(sides_pw[hanging_side].is.hanging.is_ghost[] .==
                                          false)
 
     # Get id of local small quadrants within their tree
     # Indexing CBinding.Caccessor via a Vector does not work here -> use map instead
-    tree_small_quad_ids = map(p -> sides[hanging_side].is.hanging.quadid[p],
+    tree_small_quad_ids = map(p -> sides_pw[hanging_side].is.hanging.quadid[][p],
                               local_small_quad_positions)
     local_small_quad_ids = offsets[hanging_side] .+ tree_small_quad_ids # ids cumulative over local trees
 
     # Determine if large quadrant is available and if yes, determine its id
-    if sides[full_side].is.full.is_ghost == false
-        local_large_quad_id = offsets[full_side] + sides[full_side].is.full.quadid
+    if sides_pw[full_side].is.full.is_ghost[] == false
+        local_large_quad_id = offsets[full_side] + sides_pw[full_side].is.full.quadid[]
     else
         local_large_quad_id = -1 # large quad is ghost
     end
@@ -470,9 +473,8 @@ function init_mpi_mortars_iter_face_inner(info, sides, user_data)
     mpi_mortars.local_neighbor_positions[mpi_mortar_id] = local_neighbor_positions
 
     # init_mortar_node_indices! expects side 1 to contain small elements
-    faces = (sides[hanging_side].face, sides[full_side].face)
-    init_mortar_node_indices!(mpi_mortars, faces, unsafe_load(info).orientation,
-                              mpi_mortar_id)
+    faces = (sides_pw[hanging_side].face[], sides_pw[full_side].face[])
+    init_mortar_node_indices!(mpi_mortars, faces, info_pw.orientation[], mpi_mortar_id)
 
     return nothing
 end
@@ -485,42 +487,45 @@ end
 # - (MPI) mortars at subdomain boundaries
 # and collect the numbers in `user_data` in this order.
 function count_surfaces_iter_face_parallel(info, user_data)
-    if unsafe_load(info).sides.elem_count == 2
+    info_pw = PointerWrapper(info)
+    if info_pw.sides.elem_count[] == 2
         # Two neighboring elements => Interface or mortar
 
         # Extract surface data
-        sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
+        sides_pw = (load_pointerwrapper_side(info_pw, 1),
+                    load_pointerwrapper_side(info_pw, 2))
 
-        if sides[1].is_hanging == false && sides[2].is_hanging == false
+        if sides_pw[1].is_hanging[] == false && sides_pw[2].is_hanging[] == false
             # No hanging nodes => normal interface or MPI interface
-            if sides[1].is.full.is_ghost == true || sides[2].is.full.is_ghost == true # remote side => MPI interface
+            if sides_pw[1].is.full.is_ghost[] == true ||
+               sides_pw[2].is.full.is_ghost[] == true # remote side => MPI interface
                 # Unpack user_data = [mpi_interface_count] and increment mpi_interface_count
-                ptr = Ptr{Int}(user_data)
-                id = unsafe_load(ptr, 4)
-                unsafe_store!(ptr, id + 1, 4)
+                pw = PointerWrapper(Int, user_data)
+                id = pw[4]
+                pw[4] = id + 1
             else
                 # Unpack user_data = [interface_count] and increment interface_count
-                ptr = Ptr{Int}(user_data)
-                id = unsafe_load(ptr, 1)
-                unsafe_store!(ptr, id + 1, 1)
+                pw = PointerWrapper(Int, user_data)
+                id = pw[1]
+                pw[1] = id + 1
             end
         else
             # Hanging nodes => mortar or MPI mortar
             # First, we check which side is hanging, i.e., on which side we have the refined cells.
             # Then we check if any of the refined cells or the coarse cell are "ghost" cells, i.e., they
             # belong to another rank. That way we can determine if this is a regular mortar or MPI mortar
-            if sides[1].is_hanging == true
-                @assert sides[2].is_hanging == false
-                if any(sides[1].is.hanging.is_ghost .== true) ||
-                   sides[2].is.full.is_ghost == true
+            if sides_pw[1].is_hanging[] == true
+                @assert sides_pw[2].is_hanging[] == false
+                if any(sides_pw[1].is.hanging.is_ghost[] .== true) ||
+                   sides_pw[2].is.full.is_ghost[] == true
                     face_has_ghost_side = true
                 else
                     face_has_ghost_side = false
                 end
-            else # sides[2].is_hanging == true
-                @assert sides[1].is_hanging == false
-                if sides[1].is.full.is_ghost == true ||
-                   any(sides[2].is.hanging.is_ghost .== true)
+            else # sides_pw[2].is_hanging[] == true
+                @assert sides_pw[1].is_hanging[] == false
+                if sides_pw[1].is.full.is_ghost[] == true ||
+                   any(sides_pw[2].is.hanging.is_ghost[] .== true)
                     face_has_ghost_side = true
                 else
                     face_has_ghost_side = false
@@ -528,23 +533,23 @@ function count_surfaces_iter_face_parallel(info, user_data)
             end
             if face_has_ghost_side
                 # Unpack user_data = [mpi_mortar_count] and increment mpi_mortar_count
-                ptr = Ptr{Int}(user_data)
-                id = unsafe_load(ptr, 5)
-                unsafe_store!(ptr, id + 1, 5)
+                pw = PointerWrapper(Int, user_data)
+                id = pw[5]
+                pw[5] = id + 1
             else
                 # Unpack user_data = [mortar_count] and increment mortar_count
-                ptr = Ptr{Int}(user_data)
-                id = unsafe_load(ptr, 2)
-                unsafe_store!(ptr, id + 1, 2)
+                pw = PointerWrapper(Int, user_data)
+                id = pw[2]
+                pw[2] = id + 1
             end
         end
-    elseif unsafe_load(info).sides.elem_count == 1
+    elseif info_pw.sides.elem_count[] == 1
         # One neighboring elements => boundary
 
         # Unpack user_data = [boundary_count] and increment boundary_count
-        ptr = Ptr{Int}(user_data)
-        id = unsafe_load(ptr, 3)
-        unsafe_store!(ptr, id + 1, 3)
+        pw = PointerWrapper(Int, user_data)
+        id = pw[3]
+        pw[3] = id + 1
     end
 
     return nothing
diff --git a/src/solvers/dgsem_p4est/dg_parallel.jl b/src/solvers/dgsem_p4est/dg_parallel.jl
index ac122d048c1..324bc7f3cd6 100644
--- a/src/solvers/dgsem_p4est/dg_parallel.jl
+++ b/src/solvers/dgsem_p4est/dg_parallel.jl
@@ -263,15 +263,13 @@ function init_mpi_cache!(mpi_cache::P4estMPICache, mesh::ParallelP4estMesh,
                                                                                                         uEltype)
 
     # Determine local and total number of elements
-    n_elements_global = Int(unsafe_load(mesh.p4est).global_num_quadrants)
-    n_elements_by_rank = vcat(Int.(unsafe_wrap(Array,
-                                               unsafe_load(mesh.p4est).global_first_quadrant,
+    n_elements_global = Int(mesh.p4est.global_num_quadrants[])
+    n_elements_by_rank = vcat(Int.(unsafe_wrap(Array, mesh.p4est.global_first_quadrant,
                                                mpi_nranks())),
                               n_elements_global) |> diff # diff sufficient due to 0-based quad indices
     n_elements_by_rank = OffsetArray(n_elements_by_rank, 0:(mpi_nranks() - 1))
     # Account for 1-based indexing in Julia
-    first_element_global_id = Int(unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant,
-                                              mpi_rank() + 1)) + 1
+    first_element_global_id = Int(mesh.p4est.global_first_quadrant[mpi_rank() + 1]) + 1
     @assert n_elements_global==sum(n_elements_by_rank) "error in total number of elements"
 
     # TODO reuse existing structures
@@ -379,17 +377,19 @@ function init_neighbor_rank_connectivity_iter_face_inner(info, user_data)
     @unpack interfaces, interface_id, global_interface_ids, neighbor_ranks_interface,
     mortars, mortar_id, global_mortar_ids, neighbor_ranks_mortar, mesh = user_data
 
+    info_pw = PointerWrapper(info)
     # Get the global interface/mortar ids and neighbor rank if current face belongs to an MPI
     # interface/mortar
-    if unsafe_load(info).sides.elem_count == 2 # MPI interfaces/mortars have two neighboring elements
+    if info_pw.sides.elem_count[] == 2 # MPI interfaces/mortars have two neighboring elements
         # Extract surface data
-        sides = (unsafe_load_side(info, 1), unsafe_load_side(info, 2))
+        sides_pw = (load_pointerwrapper_side(info_pw, 1),
+                    load_pointerwrapper_side(info_pw, 2))
 
-        if sides[1].is_hanging == false && sides[2].is_hanging == false # No hanging nodes for MPI interfaces
-            if sides[1].is.full.is_ghost == true
+        if sides_pw[1].is_hanging[] == false && sides_pw[2].is_hanging[] == false # No hanging nodes for MPI interfaces
+            if sides_pw[1].is.full.is_ghost[] == true
                 remote_side = 1
                 local_side = 2
-            elseif sides[2].is.full.is_ghost == true
+            elseif sides_pw[2].is.full.is_ghost[] == true
                 remote_side = 2
                 local_side = 1
             else # both sides are on this rank -> skip since it's a regular interface
@@ -397,16 +397,17 @@ function init_neighbor_rank_connectivity_iter_face_inner(info, user_data)
             end
 
             # Sanity check, current face should belong to current MPI interface
-            local_tree = unsafe_load_tree(mesh.p4est, sides[local_side].treeid + 1) # one-based indexing
-            local_quad_id = local_tree.quadrants_offset +
-                            sides[local_side].is.full.quadid
+            local_tree_pw = load_pointerwrapper_tree(mesh.p4est,
+                                                     sides_pw[local_side].treeid[] + 1) # one-based indexing
+            local_quad_id = local_tree_pw.quadrants_offset[] +
+                            sides_pw[local_side].is.full.quadid[]
             @assert interfaces.local_neighbor_ids[interface_id] == local_quad_id + 1 # one-based indexing
 
             # Get neighbor ID from ghost layer
             proc_offsets = unsafe_wrap(Array,
-                                       unsafe_load(unsafe_load(info).ghost_layer).proc_offsets,
+                                       info_pw.ghost_layer.proc_offsets,
                                        mpi_nranks() + 1)
-            ghost_id = sides[remote_side].is.full.quadid # indexes the ghost layer, 0-based
+            ghost_id = sides_pw[remote_side].is.full.quadid[] # indexes the ghost layer, 0-based
             neighbor_rank = findfirst(r -> proc_offsets[r] <= ghost_id <
                                            proc_offsets[r + 1],
                                       1:mpi_nranks()) - 1 # MPI ranks are 0-based
@@ -415,21 +416,18 @@ function init_neighbor_rank_connectivity_iter_face_inner(info, user_data)
             # Global interface id is the globally unique quadrant id of the quadrant on the primary
             # side (1) multiplied by the number of faces per quadrant plus face
             if local_side == 1
-                offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant,
-                                     mpi_rank() + 1) # one-based indexing
+                offset = mesh.p4est.global_first_quadrant[mpi_rank() + 1] # one-based indexing
                 primary_quad_id = offset + local_quad_id
             else
-                offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant,
-                                     neighbor_rank + 1) # one-based indexing
-                primary_quad_id = offset +
-                                  unsafe_load(sides[1].is.full.quad.p.piggy3.local_num)
+                offset = mesh.p4est.global_first_quadrant[neighbor_rank + 1] # one-based indexing
+                primary_quad_id = offset + sides_pw[1].is.full.quad.p.piggy3.local_num[]
             end
-            global_interface_id = 2 * ndims(mesh) * primary_quad_id + sides[1].face
+            global_interface_id = 2 * ndims(mesh) * primary_quad_id + sides_pw[1].face[]
             global_interface_ids[interface_id] = global_interface_id
 
             user_data.interface_id += 1
         else # hanging node
-            if sides[1].is_hanging == true
+            if sides_pw[1].is_hanging[] == true
                 hanging_side = 1
                 full_side = 2
             else
@@ -437,26 +435,26 @@ function init_neighbor_rank_connectivity_iter_face_inner(info, user_data)
                 full_side = 1
             end
             # Verify before accessing is.full / is.hanging
-            @assert sides[hanging_side].is_hanging == true &&
-                    sides[full_side].is_hanging == false
+            @assert sides_pw[hanging_side].is_hanging[] == true &&
+                    sides_pw[full_side].is_hanging[] == false
 
             # If all quadrants are locally available, this is a regular mortar -> skip
-            if sides[full_side].is.full.is_ghost == false &&
-               all(sides[hanging_side].is.hanging.is_ghost .== false)
+            if sides_pw[full_side].is.full.is_ghost[] == false &&
+               all(sides_pw[hanging_side].is.hanging.is_ghost[] .== false)
                 return nothing
             end
 
-            trees = (unsafe_load_tree(mesh.p4est, sides[1].treeid + 1),
-                     unsafe_load_tree(mesh.p4est, sides[2].treeid + 1))
+            trees_pw = (load_pointerwrapper_tree(mesh.p4est, sides_pw[1].treeid[] + 1),
+                        load_pointerwrapper_tree(mesh.p4est, sides_pw[2].treeid[] + 1))
 
             # Find small quads that are remote and determine which rank owns them
-            remote_small_quad_positions = findall(sides[hanging_side].is.hanging.is_ghost .==
+            remote_small_quad_positions = findall(sides_pw[hanging_side].is.hanging.is_ghost[] .==
                                                   true)
             proc_offsets = unsafe_wrap(Array,
-                                       unsafe_load(unsafe_load(info).ghost_layer).proc_offsets,
+                                       info_pw.ghost_layer.proc_offsets,
                                        mpi_nranks() + 1)
             # indices of small remote quads inside the ghost layer, 0-based
-            ghost_ids = map(pos -> sides[hanging_side].is.hanging.quadid[pos],
+            ghost_ids = map(pos -> sides_pw[hanging_side].is.hanging.quadid[][pos],
                             remote_small_quad_positions)
             neighbor_ranks = map(ghost_ids) do ghost_id
                 return findfirst(r -> proc_offsets[r] <= ghost_id < proc_offsets[r + 1],
@@ -464,28 +462,26 @@ function init_neighbor_rank_connectivity_iter_face_inner(info, user_data)
             end
             # Determine global quad id of large element to determine global MPI mortar id
             # Furthermore, if large element is ghost, add its owner rank to neighbor_ranks
-            if sides[full_side].is.full.is_ghost == true
-                ghost_id = sides[full_side].is.full.quadid
+            if sides_pw[full_side].is.full.is_ghost[] == true
+                ghost_id = sides_pw[full_side].is.full.quadid[]
                 large_quad_owner_rank = findfirst(r -> proc_offsets[r] <= ghost_id <
                                                        proc_offsets[r + 1],
                                                   1:mpi_nranks()) - 1 # MPI ranks are 0-based
                 push!(neighbor_ranks, large_quad_owner_rank)
 
-                offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant,
-                                     large_quad_owner_rank + 1) # one-based indexing
+                offset = mesh.p4est.global_first_quadrant[large_quad_owner_rank + 1] # one-based indexing
                 large_quad_id = offset +
-                                unsafe_load(sides[full_side].is.full.quad.p.piggy3.local_num)
+                                sides_pw[full_side].is.full.quad.p.piggy3.local_num[]
             else
-                offset = unsafe_load(unsafe_load(mesh.p4est).global_first_quadrant,
-                                     mpi_rank() + 1) # one-based indexing
-                large_quad_id = offset + trees[full_side].quadrants_offset +
-                                sides[full_side].is.full.quadid
+                offset = mesh.p4est.global_first_quadrant[mpi_rank() + 1] # one-based indexing
+                large_quad_id = offset + trees_pw[full_side].quadrants_offset[] +
+                                sides_pw[full_side].is.full.quadid[]
             end
             neighbor_ranks_mortar[mortar_id] = neighbor_ranks
             # Global mortar id is the globally unique quadrant id of the large quadrant multiplied by the
             # number of faces per quadrant plus face
             global_mortar_ids[mortar_id] = 2 * ndims(mesh) * large_quad_id +
-                                           sides[full_side].face
+                                           sides_pw[full_side].face[]
 
             user_data.mortar_id += 1
         end

From eb4c91a6ba3c1fe1c1f0bbb9c332ab0068171113 Mon Sep 17 00:00:00 2001
From: David Knapp <david.knapp@dlr.de>
Date: Mon, 19 Jun 2023 10:48:56 +0200
Subject: [PATCH 053/163] Added a dispatchable constructur for DG with
 TensorProductWedges (#1389)

* Added a dispatchable constructur for DG with TensorProductWedges

Adapted the timestepping for DGMulti to use the minimal polynomial degree of the tensorproduct

* Use the maximal poly-degree

* Update src/solvers/dgmulti/types.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Change DGMulti-call for Tensorwedges

* Adding comment about the polydeg-choice

* Adapt constructor

* Add an example for TensorWedges

* Update example

* Update examples/dgmulti_3d/elixir_euler_tensorWedge.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update examples/dgmulti_3d/elixir_euler_tensorWedge.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update examples/dgmulti_3d/elixir_euler_tensorWedge.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Readd Gauss

Accidentaly deleted during Merge-Conflict resolvement online

* Remove explicit load of StartUpDG in elixir

* Update max_dt

* Apply suggestions from code review

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update comments

* Add tensorWedge-test

* Update src/solvers/dgmulti/dg.jl

Co-authored-by: Jesse Chan <1156048+jlchan@users.noreply.github.com>

* Renamed tensorwedge elixir

* Update testset title

* Remove typos

* temporal push

* fix max_dt dispatch

* Update TensorProductWedges-test

* Update test

* Address CI-Warning

* Update dt_polydeg_scaling

* Correct dt_polydeg_scaling

A previous commit changed it from (polydeg + 1) to polydeg

* Update DGMulti-call

* Rename file

* Change Path in test-file

* Apply suggestions from code review

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update examples/dgmulti_3d/elixir_advection_tensor_wedge.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Reformat src/Trixi.jl

* Format solvers/dgmulti/dg.jl

* Formatting

---------

Co-authored-by: Knapp <knap_da@sc-030182l.intra.dlr.de>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
Co-authored-by: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Co-authored-by: Jesse Chan <jesse.chan@rice.edu>
---
 .../elixir_advection_tensor_wedge.jl          | 56 +++++++++++++++++++
 src/Trixi.jl                                  |  3 +-
 src/solvers/dgmulti/dg.jl                     | 11 ++--
 src/solvers/dgmulti/types.jl                  | 15 +++++
 test/test_dgmulti_3d.jl                       |  6 ++
 5 files changed, 86 insertions(+), 5 deletions(-)
 create mode 100644 examples/dgmulti_3d/elixir_advection_tensor_wedge.jl

diff --git a/examples/dgmulti_3d/elixir_advection_tensor_wedge.jl b/examples/dgmulti_3d/elixir_advection_tensor_wedge.jl
new file mode 100644
index 00000000000..4f43f2571a3
--- /dev/null
+++ b/examples/dgmulti_3d/elixir_advection_tensor_wedge.jl
@@ -0,0 +1,56 @@
+using OrdinaryDiffEq
+using Trixi
+using LinearAlgebra
+
+###############################################################################
+equations = LinearScalarAdvectionEquation3D(1.0, 1.0, 1.0)
+
+initial_condition = initial_condition_convergence_test
+
+# Define the polynomial degrees for the polynoms of the triangular base and the line
+# of the tensor-prism
+tensor_polydeg = (3, 4)
+
+dg = DGMulti(element_type = Wedge(),
+             approximation_type = Polynomial(),
+             surface_flux = flux_lax_friedrichs,
+             polydeg = tensor_polydeg)
+
+
+cells_per_dimension = (8, 8, 8)
+mesh = DGMultiMesh(dg, 
+                   cells_per_dimension,
+                   coordinates_min = (-1.0, -1.0, -1.0), 
+                   coordinates_max = (1.0, 1.0, 1.0),
+                   periodicity = true)
+
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, dg,
+                boundary_conditions=boundary_condition_periodic)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 5.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, uEltype=real(dg))
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl=1.0)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, stepsize_callback)
+
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false), dt = 1.0,
+            save_everystep=false, callback=callbacks);
+
+summary_callback() # print the timer summary
\ No newline at end of file
diff --git a/src/Trixi.jl b/src/Trixi.jl
index 86e349c7dad..66878f4b459 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -83,7 +83,8 @@ import SummationByPartsOperators: integrate, semidiscretize,
                                            upwind_operators
 
 # DGMulti solvers
-@reexport using StartUpDG: StartUpDG, Polynomial, Gauss, SBP, Line, Tri, Quad, Hex, Tet
+@reexport using StartUpDG: StartUpDG, Polynomial, Gauss, TensorProductWedge, SBP, Line, Tri,
+                           Quad, Hex, Tet, Wedge
 using StartUpDG: RefElemData, MeshData, AbstractElemShape
 
 # TODO: include_optimized
diff --git a/src/solvers/dgmulti/dg.jl b/src/solvers/dgmulti/dg.jl
index d51c7cabf9d..bc76aa1a9d2 100644
--- a/src/solvers/dgmulti/dg.jl
+++ b/src/solvers/dgmulti/dg.jl
@@ -226,6 +226,11 @@ function estimate_dt(mesh::DGMultiMesh, dg::DGMulti)
     return StartUpDG.estimate_h(rd, mesh.md) / StartUpDG.inverse_trace_constant(rd)
 end
 
+dt_polydeg_scaling(dg::DGMulti) = inv(dg.basis.N + 1)
+function dt_polydeg_scaling(dg::DGMulti{3, <:Wedge, <:TensorProductWedge})
+    inv(maximum(dg.basis.N) + 1)
+end
+
 # for the stepsize callback
 function max_dt(u, t, mesh::DGMultiMesh,
                 constant_speed::False, equations, dg::DGMulti{NDIMS},
@@ -247,8 +252,7 @@ function max_dt(u, t, mesh::DGMultiMesh,
     # `polydeg+1`. This is because `nnodes(dg)` returns the total number of
     # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns
     # the number of 1D nodes for `DGSEM` solvers.
-    polydeg = rd.N
-    return 2 * dt_min / (polydeg + 1)
+    return 2 * dt_min * dt_polydeg_scaling(dg)
 end
 
 function max_dt(u, t, mesh::DGMultiMesh,
@@ -270,8 +274,7 @@ function max_dt(u, t, mesh::DGMultiMesh,
     # `polydeg+1`. This is because `nnodes(dg)` returns the total number of
     # multi-dimensional nodes for DGMulti solver types, while `nnodes(dg)` returns
     # the number of 1D nodes for `DGSEM` solvers.
-    polydeg = rd.N
-    return 2 * dt_min / (polydeg + 1)
+    return 2 * dt_min * dt_polydeg_scaling(dg)
 end
 
 # interpolates from solution coefficients to face quadrature points
diff --git a/src/solvers/dgmulti/types.jl b/src/solvers/dgmulti/types.jl
index c225e334e8e..f1f7b158dec 100644
--- a/src/solvers/dgmulti/types.jl
+++ b/src/solvers/dgmulti/types.jl
@@ -96,6 +96,21 @@ function DGMulti(; polydeg = nothing,
             polydeg = polydeg, kwargs...)
 end
 
+# dispatchable constructor for DGMulti using a TensorProductWedge
+function DGMulti(element_type::Wedge,
+                 approximation_type,
+                 volume_integral,
+                 surface_integral;
+                 polydeg::Tuple,
+                 kwargs...)
+    factor_a = RefElemData(Tri(), approximation_type, polydeg[1]; kwargs...)
+    factor_b = RefElemData(Line(), approximation_type, polydeg[2]; kwargs...)
+
+    tensor = TensorProductWedge(factor_a, factor_b)
+    rd = RefElemData(element_type, tensor; kwargs...)
+    return DG(rd, nothing, surface_integral, volume_integral)
+end
+
 # dispatchable constructor for DGMulti to allow for specialization
 function DGMulti(element_type::AbstractElemShape,
                  approximation_type,
diff --git a/test/test_dgmulti_3d.jl b/test/test_dgmulti_3d.jl
index 22c0a0fd3ba..68fa1d13304 100644
--- a/test/test_dgmulti_3d.jl
+++ b/test/test_dgmulti_3d.jl
@@ -135,6 +135,12 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
+  @trixi_testset "elixir_advection_tensor_wedge.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_tensor_wedge.jl"),
+    l2 = [2.30487910e-04] ,
+    linf = [6.31795281e-04] )
+  end
+
 end
 
 # Clean up afterwards: delete Trixi.jl output directory

From a837dd7726f25a88c7bab8bed2dc6955f20534bf Mon Sep 17 00:00:00 2001
From: Andrew Winters <andrew.ross.winters@liu.se>
Date: Mon, 19 Jun 2023 14:50:27 +0200
Subject: [PATCH 054/163] Update test values for 1D discontinuous examples
 (#1511)

* set true discontinuous initial conditions for 1D

* update Burgers tests

* update MHD tests in 1D

* update structured_1d_dgsem/elixir_advection_shockcapturing elixir and test

* remove workaround in elixir_shallowwater_ec.jl and update test

* accidentally removed adjusted tolerance in structured mesh test

* remove workaround in elixir_shallowwater_shockcapturing.jl and update test

* remove workaround in elixir_shallowwater_well_balanced.jl and update tests

* bug fix in energy_total computation for ShallowWaterTwoLayerEquations1D

* remove workaround in elixir_shallowwater_twolayer_dam_break.jl and update test

* update docs test for adding a nonconservative equation

* update Nonconservative terms in 1D (linear advection) test

* update all necessary IdealGlmMhdMulticomponentEquations1D tests

* update all necessary CompressibleEulerMulticomponentEquations1D tests

* update necessary CompressibleEuler1D tests except for neural network shock capturing

* Update test values for NN-based SC

* add short description to the NEWS.md

* add extra test to avoid coverage drop

* Update src/solvers/dg.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

---------

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 NEWS.md                                       |  1 +
 .../files/adding_nonconservative_equation.jl  |  2 +-
 .../elixir_advection_shockcapturing.jl        |  6 +-
 .../tree_1d_dgsem/elixir_burgers_shock.jl     |  6 +-
 ..._eulermulti_two_interacting_blast_waves.jl |  2 +-
 .../elixir_mhd_briowu_shock_tube.jl           |  2 +-
 examples/tree_1d_dgsem/elixir_mhdmulti_ec.jl  |  4 +-
 examples/tree_1d_dgsem/elixir_mhdmulti_es.jl  |  4 +-
 .../tree_1d_dgsem/elixir_shallowwater_ec.jl   | 71 ++++++---------
 .../elixir_shallowwater_shock_capturing.jl    | 81 +++++++----------
 .../elixir_shallowwater_twolayer_dam_break.jl | 77 +++++++----------
 .../elixir_shallowwater_well_balanced.jl      | 56 +++---------
 src/equations/shallow_water_two_layer_1d.jl   | 86 +++++++++----------
 src/solvers/dg.jl                             |  9 ++
 test/test_structured_1d.jl                    |  4 +-
 test/test_tree_1d.jl                          |  2 +-
 test/test_tree_1d_burgers.jl                  |  8 +-
 test/test_tree_1d_euler.jl                    | 44 +++++-----
 test/test_tree_1d_eulermulti.jl               | 32 ++++---
 test/test_tree_1d_mhd.jl                      | 12 +--
 test/test_tree_1d_mhdmulti.jl                 | 48 +++++------
 test/test_tree_1d_shallowwater.jl             | 28 +++---
 test/test_tree_1d_shallowwater_twolayer.jl    |  6 +-
 23 files changed, 267 insertions(+), 324 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 9b46ba565fe..35c7039b2ef 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -9,6 +9,7 @@ for human readability.
 #### Added
 
 - Experimental support for 3D parabolic diffusion terms has been added.
+- Capability to set truly discontinuous initial conditions in 1D.
 
 #### Changed
 
diff --git a/docs/literate/src/files/adding_nonconservative_equation.jl b/docs/literate/src/files/adding_nonconservative_equation.jl
index 08dd631058e..110fa486070 100644
--- a/docs/literate/src/files/adding_nonconservative_equation.jl
+++ b/docs/literate/src/files/adding_nonconservative_equation.jl
@@ -147,7 +147,7 @@ plot(sol)
 # above.
 
 error_1 = analysis_callback(sol).l2 |> first
-@test isapprox(error_1, 0.0002961027497) #src
+@test isapprox(error_1, 0.00029609575838969394) #src
 # Next, we increase the grid resolution by one refinement level and run the
 # simulation again.
 
diff --git a/examples/structured_1d_dgsem/elixir_advection_shockcapturing.jl b/examples/structured_1d_dgsem/elixir_advection_shockcapturing.jl
index 9a81acfe51c..313812fe08d 100644
--- a/examples/structured_1d_dgsem/elixir_advection_shockcapturing.jl
+++ b/examples/structured_1d_dgsem/elixir_advection_shockcapturing.jl
@@ -9,7 +9,9 @@ advection_velocity = 1.0
 """
     initial_condition_composite(x, t, equations::LinearScalarAdvectionEquation1D)
 
-Slight simplification of
+Wave form that is a combination of a Gaussian pulse, a square wave, a triangle wave,
+and half an ellipse with periodic boundary conditions.
+Slight simplification from
 - Jiang, Shu (1996)
   Efficient Implementation of Weighted ENO Schemes
   [DOI: 10.1006/jcph.1996.0130](https://doi.org/10.1006/jcph.1996.0130)
@@ -60,7 +62,7 @@ volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
 solver = DGSEM(basis, surface_flux, volume_integral)
 
 # Create curved mesh
-cells_per_dimension = (125,)
+cells_per_dimension = (120,)
 coordinates_min = (-1.0,) # minimum coordinate
 coordinates_max = (1.0,)  # maximum coordinate
 mesh = StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max,
diff --git a/examples/tree_1d_dgsem/elixir_burgers_shock.jl b/examples/tree_1d_dgsem/elixir_burgers_shock.jl
index 987fb320ad6..00b5314e19f 100644
--- a/examples/tree_1d_dgsem/elixir_burgers_shock.jl
+++ b/examples/tree_1d_dgsem/elixir_burgers_shock.jl
@@ -21,7 +21,7 @@ surface_flux = flux_lax_friedrichs
 volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
                                                  volume_flux_dg=surface_flux,
                                                  volume_flux_fv=surface_flux)
-                                                 
+
 solver = DGSEM(basis, surface_flux, volume_integral)
 
 coordinate_min = 0.0
@@ -59,7 +59,7 @@ end
 
 boundary_conditions = (x_neg=boundary_condition_inflow,
                        x_pos=boundary_condition_outflow)
-                       
+
 initial_condition = initial_condition_shock
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
@@ -79,7 +79,7 @@ analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
 
 alive_callback = AliveCallback(analysis_interval=analysis_interval)
 
-stepsize_callback = StepsizeCallback(cfl=0.9)
+stepsize_callback = StepsizeCallback(cfl=0.85)
 
 
 callbacks = CallbackSet(summary_callback,
diff --git a/examples/tree_1d_dgsem/elixir_eulermulti_two_interacting_blast_waves.jl b/examples/tree_1d_dgsem/elixir_eulermulti_two_interacting_blast_waves.jl
index 353093e5f70..81966194180 100644
--- a/examples/tree_1d_dgsem/elixir_eulermulti_two_interacting_blast_waves.jl
+++ b/examples/tree_1d_dgsem/elixir_eulermulti_two_interacting_blast_waves.jl
@@ -88,7 +88,7 @@ ode = semidiscretize(semi, tspan)
 
 summary_callback = SummaryCallback()
 
-analysis_interval = 100
+analysis_interval = 1000
 
 analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
 
diff --git a/examples/tree_1d_dgsem/elixir_mhd_briowu_shock_tube.jl b/examples/tree_1d_dgsem/elixir_mhd_briowu_shock_tube.jl
index 1c07fc4fdde..c5727109d92 100644
--- a/examples/tree_1d_dgsem/elixir_mhd_briowu_shock_tube.jl
+++ b/examples/tree_1d_dgsem/elixir_mhd_briowu_shock_tube.jl
@@ -94,7 +94,7 @@ amr_callback = AMRCallback(semi, amr_controller,
                            adapt_initial_condition=true,
                            adapt_initial_condition_only_refine=true)
 
-stepsize_callback = StepsizeCallback(cfl=0.8)
+stepsize_callback = StepsizeCallback(cfl=0.65)
 
 callbacks = CallbackSet(summary_callback,
                         analysis_callback, alive_callback,
diff --git a/examples/tree_1d_dgsem/elixir_mhdmulti_ec.jl b/examples/tree_1d_dgsem/elixir_mhdmulti_ec.jl
index 34fdce6634e..69ea0551bed 100644
--- a/examples/tree_1d_dgsem/elixir_mhdmulti_ec.jl
+++ b/examples/tree_1d_dgsem/elixir_mhdmulti_ec.jl
@@ -4,8 +4,8 @@ using Trixi
 
 ###############################################################################
 # semidiscretization of the ideal MHD equations
-equations = IdealGlmMhdMulticomponentEquations1D(gammas           = (2.0, 2.0, 2.0),
-                                                 gas_constants    = (2.0, 2.0, 2.0))
+equations = IdealGlmMhdMulticomponentEquations1D(gammas        = (2.0, 2.0, 2.0),
+                                                 gas_constants = (2.0, 2.0, 2.0))
 
 initial_condition = initial_condition_weak_blast_wave
 
diff --git a/examples/tree_1d_dgsem/elixir_mhdmulti_es.jl b/examples/tree_1d_dgsem/elixir_mhdmulti_es.jl
index 8ca32194b9e..93cf3e0fdb2 100644
--- a/examples/tree_1d_dgsem/elixir_mhdmulti_es.jl
+++ b/examples/tree_1d_dgsem/elixir_mhdmulti_es.jl
@@ -4,8 +4,8 @@ using Trixi
 
 ###############################################################################
 # semidiscretization of the ideal MHD equations
-equations = IdealGlmMhdMulticomponentEquations1D(gammas           = (2.0, 2.0, 2.0),
-                                                 gas_constants    = (2.0, 2.0, 2.0))
+equations = IdealGlmMhdMulticomponentEquations1D(gammas        = (2.0, 2.0, 2.0),
+                                                 gas_constants = (2.0, 2.0, 2.0))
 
 initial_condition = initial_condition_weak_blast_wave
 
diff --git a/examples/tree_1d_dgsem/elixir_shallowwater_ec.jl b/examples/tree_1d_dgsem/elixir_shallowwater_ec.jl
index be6a2cb166c..1469afec1ca 100644
--- a/examples/tree_1d_dgsem/elixir_shallowwater_ec.jl
+++ b/examples/tree_1d_dgsem/elixir_shallowwater_ec.jl
@@ -8,9 +8,34 @@ using Trixi
 
 equations = ShallowWaterEquations1D(gravity_constant=9.81)
 
-# Note, this initial condition is used to compute errors in the analysis callback but the initialization is
-# overwritten by `initial_condition_ec_discontinuous_bottom` below.
-initial_condition = initial_condition_weak_blast_wave
+# Initial condition with a truly discontinuous water height, velocity, and bottom
+# topography function as an academic testcase for entropy conservation.
+# The errors from the analysis callback are not important but `∑∂S/∂U ⋅ Uₜ` should
+# be around machine roundoff.
+# Works as intended for TreeMesh1D with `initial_refinement_level=4`. If the mesh
+# refinement level is changed the initial condition below may need changed as well to
+# ensure that the discontinuities lie on an element interface.
+function initial_condition_ec_discontinuous_bottom(x, t, equations::ShallowWaterEquations1D)
+  # Set the background values
+  H = 4.25
+  v = 0.0
+  b = sin(x[1]) # arbitrary continuous function
+
+  # Setup the discontinuous water height and velocity
+  if x[1] >= 0.125 && x[1] <= 0.25
+    H = 5.0
+    v = 0.1882
+  end
+
+  # Setup a discontinuous bottom topography
+  if x[1] >= -0.25 && x[1] <= -0.125
+    b = 2.0 + 0.5 * sin(2.0 * pi * x[1])
+  end
+
+  return prim2cons(SVector(H, v, b), equations)
+end
+
+initial_condition = initial_condition_ec_discontinuous_bottom
 
 ###############################################################################
 # Get the DG approximation space
@@ -37,46 +62,6 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
 tspan = (0.0, 2.0)
 ode = semidiscretize(semi, tspan)
 
-###############################################################################
-# Workaround to set a discontinuous bottom topography and initial condition for debugging and testing.
-
-# alternative version of the initial conditinon used to setup a truly discontinuous
-# bottom topography function and initial condition for this academic testcase of entropy conservation.
-# The errors from the analysis callback are not important but `∑∂S/∂U ⋅ Uₜ` should be around machine roundoff
-# In contrast to the usual signature of initial conditions, this one get passed the
-# `element_id` explicitly. In particular, this initial conditions works as intended
-# only for the TreeMesh1D with `initial_refinement_level=4`.
-function initial_condition_ec_discontinuous_bottom(x, t, element_id, equations::ShallowWaterEquations1D)
-  # Set the background values
-  H = 4.25
-  v = 0.0
-  b = sin(x[1]) # arbitrary continuous function
-
-  # setup the discontinuous water height and velocity
-  if element_id == 10
-    H = 5.0
-    v = 0.1882
-  end
-
-  # Setup a discontinuous bottom topography using the element id number
-  if element_id == 7
-    b = 2.0 + 0.5 * sin(2.0 * pi * x[1])
-  end
-
-  return prim2cons(SVector(H, v, b), equations)
-end
-
-# point to the data we want to augment
-u = Trixi.wrap_array(ode.u0, semi)
-# reset the initial condition
-for element in eachelement(semi.solver, semi.cache)
-  for i in eachnode(semi.solver)
-    x_node = Trixi.get_node_coords(semi.cache.elements.node_coordinates, equations, semi.solver, i, element)
-    u_node = initial_condition_ec_discontinuous_bottom(x_node, first(tspan), element, equations)
-    Trixi.set_node_vars!(u, u_node, equations, semi.solver, i, element)
-  end
-end
-
 ###############################################################################
 # Callbacks
 
diff --git a/examples/tree_1d_dgsem/elixir_shallowwater_shock_capturing.jl b/examples/tree_1d_dgsem/elixir_shallowwater_shock_capturing.jl
index 50241126a28..62346d7b5ab 100644
--- a/examples/tree_1d_dgsem/elixir_shallowwater_shock_capturing.jl
+++ b/examples/tree_1d_dgsem/elixir_shallowwater_shock_capturing.jl
@@ -7,24 +7,37 @@ using Trixi
 
 equations = ShallowWaterEquations1D(gravity_constant=9.812, H0=1.75)
 
-function initial_condition_stone_throw(x, t, equations::ShallowWaterEquations1D)
-    # Set up polar coordinates
-    inicenter = 0.15
-    x_norm = x[1] - inicenter[1]
-    r = abs(x_norm)
+# Initial condition with a truly discontinuous velocity and bottom topography.
+# Works as intended for TreeMesh1D with `initial_refinement_level=3`. If the mesh
+# refinement level is changed the initial condition below may need changed as well to
+# ensure that the discontinuities lie on an element interface.
+function initial_condition_stone_throw_discontinuous_bottom(x, t, equations::ShallowWaterEquations1D)
+
+  # Calculate primitive variables
+
+  # flat lake
+  H = equations.H0
+
+  # Discontinuous velocity
+  v = 0.0
+  if x[1] >= -0.75 && x[1] <= 0.0
+      v = -1.0
+  elseif x[1] >= 0.0 && x[1] <= 0.75
+      v = 1.0
+  end
 
-    # Calculate primitive variables
-    H = equations.H0
-    # v = 0.0 # for well-balanced test
-    v = r < 0.6 ? 1.75 : 0.0 # for stone throw
+  b = (  1.5 / exp( 0.5 * ((x[1] - 1.0)^2 ) )
+     + 0.75 / exp( 0.5 * ((x[1] + 1.0)^2 ) ) )
 
-    b = (  1.5 / exp( 0.5 * ((x[1] - 1.0)^2 ) )
-       + 0.75 / exp( 0.5 * ((x[1] + 1.0)^2 ) ) )
+  # Force a discontinuous bottom topography
+  if x[1] >= -1.5 && x[1] <= 0.0
+    b = 0.5
+  end
 
-    return prim2cons(SVector(H, v, b), equations)
+  return prim2cons(SVector(H, v, b), equations)
 end
 
-initial_condition = initial_condition_stone_throw
+initial_condition = initial_condition_stone_throw_discontinuous_bottom
 
 boundary_condition = boundary_condition_slip_wall
 
@@ -62,49 +75,13 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                     boundary_conditions = boundary_condition)
 
 ###############################################################################
-# ODE solvers, callbacks etc.
+# ODE solver
 
 tspan = (0.0, 3.0)
 ode = semidiscretize(semi, tspan)
 
-# Hack in a discontinuous bottom for a more interesting test
-function initial_condition_stone_throw_discontinuous_bottom(x, t, element_id, equations::ShallowWaterEquations1D)
-
-    inicenter = 0.15
-    x_norm = x[1] - inicenter[1]
-    r = abs(x_norm)
-
-    # Calculate primitive variables
-    H = equations.H0 # flat lake
-    # Discontinuous velocity set via element id number
-    v = 0.0
-    if element_id == 4
-        v = -1.0
-    elseif element_id == 5
-        v = 1.0
-    end
-
-    b = (  1.5 / exp( 0.5 * ((x[1] - 1.0)^2 ) )
-       + 0.75 / exp( 0.5 * ((x[1] + 1.0)^2 ) ) )
-
-    # Setup a discontinuous bottom topography using the element id number
-    if element_id == 3 || element_id == 4
-      b = 0.5
-    end
-
-    return prim2cons(SVector(H, v, b), equations)
-end
-
-# point to the data we want to augment
-u = Trixi.wrap_array(ode.u0, semi)
-# reset the initial condition
-for element in eachelement(semi.solver, semi.cache)
-  for i in eachnode(semi.solver)
-    x_node = Trixi.get_node_coords(semi.cache.elements.node_coordinates, equations, semi.solver, i, element)
-    u_node = initial_condition_stone_throw_discontinuous_bottom(x_node, first(tspan), element, equations)
-    Trixi.set_node_vars!(u, u_node, equations, semi.solver, i, element)
-  end
-end
+###############################################################################
+# Callbacks
 
 summary_callback = SummaryCallback()
 
diff --git a/examples/tree_1d_dgsem/elixir_shallowwater_twolayer_dam_break.jl b/examples/tree_1d_dgsem/elixir_shallowwater_twolayer_dam_break.jl
index 67c1098b178..60770d158fa 100644
--- a/examples/tree_1d_dgsem/elixir_shallowwater_twolayer_dam_break.jl
+++ b/examples/tree_1d_dgsem/elixir_shallowwater_twolayer_dam_break.jl
@@ -3,20 +3,34 @@ using OrdinaryDiffEq
 using Trixi
 
 ###############################################################################
-# Semidiscretization of the two-layer shallow water equations for a dam break test with a 
-# discontinuous bottom topography function to test entropy conservation
+# Semidiscretization of the two-layer shallow water equations for a dam break
+# test with a discontinuous bottom topography function to test entropy conservation
 
 equations = ShallowWaterTwoLayerEquations1D(gravity_constant=9.81, H0=2.0, rho_upper=0.9, rho_lower=1.0)
 
-###############################################################################
-# Workaround to set a discontinuous bottom topography and initial condition.
+# Initial condition of a dam break with a discontinuous water heights and bottom topography.
+# Works as intended for TreeMesh1D with `initial_refinement_level=5`. If the mesh
+# refinement level is changed the initial condition below may need changed as well to
+# ensure that the discontinuities lie on an element interface.
+function initial_condition_dam_break(x, t, equations::ShallowWaterTwoLayerEquations1D)
+  v1_upper = 0.0
+  v1_lower = 0.0
+
+  # Set the discontinuity
+  if x[1] <= 10.0
+    H_lower = 2.0
+    H_upper = 4.0
+    b  = 0.0
+  else
+    H_lower = 1.5
+    H_upper = 3.0
+    b  = 0.5
+  end
+
+  return prim2cons(SVector(H_upper, v1_upper, H_lower, v1_lower, b), equations)
+end
 
-# This test case uses a special work around to setup a truly discontinuous bottom topography 
-# function and initial condition for this academic testcase of entropy conservation. First, a 
-# dummy initial condition is introduced to create the semidiscretization. Then the initial condition 
-# is reset with the true discontinuous values from initial_condition_dam_break. Note, that this
-# initial condition only works for TreeMesh1D with `initial_refinement_level=5`.
-initial_condition = initial_condition_convergence_test
+initial_condition = initial_condition_dam_break
 
 ###############################################################################
 # Get the DG approximation space
@@ -25,7 +39,6 @@ volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
 solver = DGSEM(polydeg=3, surface_flux=(flux_fjordholm_etal, flux_nonconservative_fjordholm_etal),
                volume_integral=VolumeIntegralFluxDifferencing(volume_flux))
 
-
 ###############################################################################
 # Get the TreeMesh and setup a non-periodic mesh
 
@@ -39,54 +52,22 @@ mesh = TreeMesh(coordinates_min, coordinates_max,
 boundary_condition = boundary_condition_slip_wall
 
 # create the semidiscretization object
-semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver, 
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                     boundary_conditions=boundary_condition)
 
 ###############################################################################
-# ODE solvers, callbacks etc.
+# ODE solvers
 
 tspan = (0.0,0.4)
 ode = semidiscretize(semi, tspan)
 
-# Initial conditions dam break test case
-function initial_condition_dam_break(x, t, element_id, equations::ShallowWaterTwoLayerEquations1D)
-  v1_upper = 0.0
-  v1_lower = 0.0
-
-  # Set the discontinuity
-  if element_id <= 16
-    H_lower = 2.0
-    H_upper = 4.0
-    b  = 0.0
-  else
-    H_lower = 1.5
-    H_upper = 3.0
-    b  = 0.5
-  end
-
-  return prim2cons(SVector(H_upper, v1_upper, H_lower, v1_lower, b), equations)
-end
-
-
-# point to the data we want to augment
-u = Trixi.wrap_array(ode.u0, semi)
-# reset the initial condition
-for element in eachelement(semi.solver, semi.cache)
-  for i in eachnode(semi.solver)
-    x_node = Trixi.get_node_coords(semi.cache.elements.node_coordinates,
-      equations, semi.solver, i, element)
-    u_node = initial_condition_dam_break(x_node, first(tspan), element, equations)
-    Trixi.set_node_vars!(u, u_node, equations, semi.solver, i, element)
-  end
-end
-
-
-
+###############################################################################
+# Callbacks
 
 summary_callback = SummaryCallback()
 
 analysis_interval = 500
-analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=false, 
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=false,
     extra_analysis_integrals=(energy_total, energy_kinetic, energy_internal,))
 
 stepsize_callback = StepsizeCallback(cfl=1.0)
diff --git a/examples/tree_1d_dgsem/elixir_shallowwater_well_balanced.jl b/examples/tree_1d_dgsem/elixir_shallowwater_well_balanced.jl
index 83835656839..e07bc04d76a 100644
--- a/examples/tree_1d_dgsem/elixir_shallowwater_well_balanced.jl
+++ b/examples/tree_1d_dgsem/elixir_shallowwater_well_balanced.jl
@@ -8,21 +8,28 @@ using Trixi
 
 equations = ShallowWaterEquations1D(gravity_constant=9.81, H0=3.25)
 
-# An initial condition with constant total water height and zero velocities to test well-balancedness.
-# Note, this routine is used to compute errors in the analysis callback but the initialization is
-# overwritten by `initial_condition_discontinuous_well_balancedness` below.
-function initial_condition_well_balancedness(x, t, equations::ShallowWaterEquations1D)
+# Setup a truly discontinuous bottom topography function for this academic
+# testcase of well-balancedness. The errors from the analysis callback are
+# not important but the error for this lake-at-rest test case
+# `∑|H0-(h+b)|` should be around machine roundoff.
+# Works as intended for TreeMesh1D with `initial_refinement_level=3`. If the mesh
+# refinement level is changed the initial condition below may need changed as well to
+# ensure that the discontinuities lie on an element interface.
+function initial_condition_discontinuous_well_balancedness(x, t, equations::ShallowWaterEquations1D)
   # Set the background values
   H = equations.H0
   v = 0.0
+  b = 0.0
 
-  # bottom topography inspired by from Pond.control in [HOHQMesh](https://github.com/trixi-framework/HOHQMesh)
-  b = (1.5 / exp( 0.5 * ((x[1] - 1.0)^2) )+ 0.75 / exp(0.5 * ((x[1] + 1.0)^2)))
+  # Setup a discontinuous bottom topography
+  if x[1] >= 0.5 && x[1] <= 0.75
+    b = 2.0 + 0.5 * sin(2.0 * pi * x[1])
+  end
 
   return prim2cons(SVector(H, v, b), equations)
 end
 
-initial_condition = initial_condition_well_balancedness
+initial_condition = initial_condition_discontinuous_well_balancedness
 
 ###############################################################################
 # Get the DG approximation space
@@ -50,41 +57,6 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
 tspan = (0.0, 100.0)
 ode = semidiscretize(semi, tspan)
 
-###############################################################################
-# Workaround to set a discontinuous bottom topography and initial condition for debugging and testing.
-
-# alternative version of the initial conditinon used to setup a truly discontinuous
-# bottom topography function for this academic testcase of well-balancedness.
-# The errors from the analysis callback are not important but the error for this lake at rest test case
-# `∑|H0-(h+b)|` should be around machine roundoff.
-# In contrast to the usual signature of initial conditions, this one get passed the
-# `element_id` explicitly. In particular, this initial conditions works as intended
-# only for the TreeMesh1D with `initial_refinement_level=3`.
-function initial_condition_discontinuous_well_balancedness(x, t, element_id, equations::ShallowWaterEquations1D)
-  # Set the background values
-  H = equations.H0
-  v = 0.0
-  b = 0.0
-
-  # Setup a discontinuous bottom topography using the element id number
-  if element_id == 7
-    b = 2.0 + 0.5 * sin(2.0 * pi * x[1])
-  end
-
-  return prim2cons(SVector(H, v, b), equations)
-end
-
-# point to the data we want to augment
-u = Trixi.wrap_array(ode.u0, semi)
-# reset the initial condition
-for element in eachelement(semi.solver, semi.cache)
-  for i in eachnode(semi.solver)
-    x_node = Trixi.get_node_coords(semi.cache.elements.node_coordinates, equations, semi.solver, i, element)
-    u_node = initial_condition_discontinuous_well_balancedness(x_node, first(tspan), element, equations)
-    Trixi.set_node_vars!(u, u_node, equations, semi.solver, i, element)
-  end
-end
-
 ###############################################################################
 # Callbacks
 
diff --git a/src/equations/shallow_water_two_layer_1d.jl b/src/equations/shallow_water_two_layer_1d.jl
index edf7d5e32ff..02899171509 100644
--- a/src/equations/shallow_water_two_layer_1d.jl
+++ b/src/equations/shallow_water_two_layer_1d.jl
@@ -11,28 +11,28 @@
 Two-Layer Shallow Water equations (2LSWE) in one space dimension. The equations are given by
 ```math
 \begin{alignat*}{4}
-&\frac{\partial}{\partial t}h_{upper} 
-&&+ \frac{\partial}{\partial x}\left(h_{upper} v_{1,upper}\right) 
+&\frac{\partial}{\partial t}h_{upper}
+&&+ \frac{\partial}{\partial x}\left(h_{upper} v_{1,upper}\right)
 &&= 0 \\
-&\frac{\partial}{\partial t}\left(h_{upper}v_{1,upper}\right) 
-&&+ \frac{\partial}{\partial x}\left(h_{upper}v_{1,upper}^2 + \dfrac{gh_{upper}^2}{2}\right) 
+&\frac{\partial}{\partial t}\left(h_{upper}v_{1,upper}\right)
+&&+ \frac{\partial}{\partial x}\left(h_{upper}v_{1,upper}^2 + \dfrac{gh_{upper}^2}{2}\right)
 &&= -gh_{upper}\frac{\partial}{\partial x}\left(b+h_{lower}\right)\\
-&\frac{\partial}{\partial t}h_{lower}  
-&&+ \frac{\partial}{\partial x}\left(h_{lower}v_{1,lower}\right) 
+&\frac{\partial}{\partial t}h_{lower}
+&&+ \frac{\partial}{\partial x}\left(h_{lower}v_{1,lower}\right)
 &&= 0 \\
-&\frac{\partial}{\partial t}\left(h_{lower}v_{1,lower}\right)  
-&&+ \frac{\partial}{\partial x}\left(h_{lower}v_{1,lower}^2 + \dfrac{gh_{lower}^2}{2}\right) 
+&\frac{\partial}{\partial t}\left(h_{lower}v_{1,lower}\right)
+&&+ \frac{\partial}{\partial x}\left(h_{lower}v_{1,lower}^2 + \dfrac{gh_{lower}^2}{2}\right)
 &&= -gh_{lower}\frac{\partial}{\partial x}\left(b+\dfrac{\rho_{upper}}{\rho_{lower}}h_{upper}\right).
 \end{alignat*}
 ```
-The unknown quantities of the 2LSWE are the water heights of the {lower} layer ``h_{lower}`` and the 
-{upper} layer ``h_{upper}`` with respective velocities ``v_{1,upper}`` and ``v_{1,lower}``. The gravitational constant is 
-denoted by `g`, the layer densitites by ``\rho_{upper}``and ``\rho_{lower}`` and the (possibly) variable 
-bottom topography function ``b(x)``. The conservative variable water height ``h_{lower}`` is measured 
-from the bottom topography ``b`` and ``h_{upper}`` relative to ``h_{lower}``, therefore one also defines the 
+The unknown quantities of the 2LSWE are the water heights of the {lower} layer ``h_{lower}`` and the
+{upper} layer ``h_{upper}`` with respective velocities ``v_{1,upper}`` and ``v_{1,lower}``. The gravitational constant is
+denoted by `g`, the layer densitites by ``\rho_{upper}``and ``\rho_{lower}`` and the (possibly) variable
+bottom topography function ``b(x)``. The conservative variable water height ``h_{lower}`` is measured
+from the bottom topography ``b`` and ``h_{upper}`` relative to ``h_{lower}``, therefore one also defines the
 total water heights as ``H_{upper} = h_{upper} + h_{upper} + b`` and ``H_{lower} = h_{lower} + b``.
 
-The densities must be chosen such that ``\rho_{upper} < \rho_{lower}``, to make sure that the heavier fluid 
+The densities must be chosen such that ``\rho_{upper} < \rho_{lower}``, to make sure that the heavier fluid
 ``\rho_{lower}`` is in the bottom layer and the lighter fluid ``\rho_{upper}`` in the {upper} layer.
 
 The additional quantity ``H_0`` is also available to store a reference value for the total water
@@ -41,13 +41,13 @@ height that is useful to set initial conditions or test the "lake-at-rest" well-
 The bottom topography function ``b(x)`` is set inside the initial condition routine
 for a particular problem setup.
 
-In addition to the unknowns, Trixi currently stores the bottom topography values at the 
-approximation points despite being fixed in time. This is done for convenience of computing the 
-bottom topography gradients on the fly during the approximation as well as computing auxiliary 
+In addition to the unknowns, Trixi currently stores the bottom topography values at the
+approximation points despite being fixed in time. This is done for convenience of computing the
+bottom topography gradients on the fly during the approximation as well as computing auxiliary
 quantities like the total water height ``H`` or the entropy variables.
 This affects the implementation and use of these equations in various ways:
 * The flux values corresponding to the bottom topography must be zero.
-* The bottom topography values must be included when defining initial conditions, boundary 
+* The bottom topography values must be included when defining initial conditions, boundary
   conditions or source terms.
 * [`AnalysisCallback`](@ref) analyzes this variable.
 * Trixi's visualization tools will visualize the bottom topography by default.
@@ -101,7 +101,7 @@ end
     initial_condition_convergence_test(x, t, equations::ShallowWaterTwoLayerEquations1D)
 
 A smooth initial condition used for convergence tests in combination with
-[`source_terms_convergence_test`](@ref) (and 
+[`source_terms_convergence_test`](@ref) (and
 [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
 """
 function initial_condition_convergence_test(x, t,
@@ -121,9 +121,9 @@ end
 """
     source_terms_convergence_test(u, x, t, equations::ShallowWaterTwoLayerEquations1D)
 
-Source terms used for convergence tests in combination with 
-[`initial_condition_convergence_test`](@ref) 
-(and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) 
+Source terms used for convergence tests in combination with
+[`initial_condition_convergence_test`](@ref)
+(and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref)
 in non-periodic domains).
 """
 @inline function source_terms_convergence_test(u, x, t,
@@ -167,8 +167,8 @@ the internal value.
 
 For details see Section 9.2.5 of the book:
 - Eleuterio F. Toro (2001)
-  Shock-Capturing Methods for Free-Surface Shallow Flows  
-  1st edition  
+  Shock-Capturing Methods for Free-Surface Shallow Flows
+  1st edition
   ISBN 0471987662
 """
 @inline function boundary_condition_slip_wall(u_inner, orientation_or_normal, direction,
@@ -219,7 +219,7 @@ end
 
 Non-symmetric two-point volume flux discretizing the nonconservative (source) term
 that contains the gradient of the bottom topography [`ShallowWaterTwoLayerEquations2D`](@ref) and an
-additional term that couples the momentum of both layers. This is a slightly modified version 
+additional term that couples the momentum of both layers. This is a slightly modified version
 to account for the additional source term compared to the standard SWE described in the paper.
 
 Further details are available in the paper:
@@ -238,7 +238,7 @@ Further details are available in the paper:
 
     z = zero(eltype(u_ll))
 
-    # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, 
+    # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x,
     #                                        0, g*h_lower*(b+r*h_upper)_x, 0)
     f = SVector(z,
                 equations.gravity * h_upper_ll * (b_rr + h_lower_rr),
@@ -254,9 +254,9 @@ end
 
 !!! warning "Experimental code"
     This numerical flux is experimental and may change in any future release.
-                                    
-Non-symmetric two-point surface flux discretizing the nonconservative (source) term that contains 
-the gradients of the bottom topography and an additional term that couples the momentum of both 
+
+Non-symmetric two-point surface flux discretizing the nonconservative (source) term that contains
+the gradients of the bottom topography and an additional term that couples the momentum of both
 layers [`ShallowWaterTwoLayerEquations2D`](@ref).
 
 Further details are available in the paper:
@@ -286,7 +286,7 @@ formulation.
 
     z = zero(eltype(u_ll))
 
-    # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x, 
+    # Bottom gradient nonconservative term: (0, g*h_upper*(b+h_lower)_x,
     #                                        0, g*h_lower*(b+r*h_upper)_x, 0)
     f = SVector(z,
                 g * h_upper_ll * (b_ll + h_lower_ll) +
@@ -303,13 +303,13 @@ end
     flux_fjordholm_etal(u_ll, u_rr, orientation,
                         equations::ShallowWaterTwoLayerEquations1D)
 
-Total energy conservative (mathematical entropy for shallow water equations). When the bottom 
-topography is nonzero this should only be used as a surface flux otherwise the scheme will not be 
+Total energy conservative (mathematical entropy for shallow water equations). When the bottom
+topography is nonzero this should only be used as a surface flux otherwise the scheme will not be
 well-balanced. For well-balancedness in the volume flux use [`flux_wintermeyer_etal`](@ref).
 
 Details are available in Eq. (4.1) in the paper:
 - Ulrik S. Fjordholm, Siddhartha Mishra and Eitan Tadmor (2011)
-  Well-balanced and energy stable schemes for the shallow water equations with discontinuous 
+  Well-balanced and energy stable schemes for the shallow water equations with discontinuous
   topography [DOI: 10.1016/j.jcp.2011.03.042](https://doi.org/10.1016/j.jcp.2011.03.042)
 and the application to two layers is shown in the paper:
 - Ulrik Skre Fjordholm (2012)
@@ -348,11 +348,11 @@ end
 """
     flux_wintermeyer_etal(u_ll, u_rr, orientation,
                           equations::ShallowWaterTwoLayerEquations1D)
-                      
+
 Total energy conservative (mathematical entropy for two-layer shallow water equations) split form.
 When the bottom topography is nonzero this scheme will be well-balanced when used as a `volume_flux`.
 The `surface_flux` should still use, e.g., [`flux_fjordholm_etal`](@ref). To obtain the flux for the
-two-layer shallow water equations the flux that is described in the paper for the normal shallow 
+two-layer shallow water equations the flux that is described in the paper for the normal shallow
 water equations is used within each layer.
 
 Further details are available in Theorem 1 of the paper:
@@ -391,8 +391,8 @@ end
     flux_es_fjordholm_etal(u_ll, u_rr, orientation,
                            equations::ShallowWaterTwoLayerEquations1D)
 
-Entropy stable surface flux for the two-layer shallow water equations. Uses the entropy 
-conservative flux_fjordholm_etal and adds a Lax-Friedrichs type dissipation dependent on the jump 
+Entropy stable surface flux for the two-layer shallow water equations. Uses the entropy
+conservative flux_fjordholm_etal and adds a Lax-Friedrichs type dissipation dependent on the jump
 of entropy variables.
 
 Further details are available in the paper:
@@ -460,10 +460,10 @@ formulation.
 end
 
 # Calculate approximation for maximum wave speed for local Lax-Friedrichs-type dissipation as the
-# maximum velocity magnitude plus the maximum speed of sound. This function uses approximate 
-# eigenvalues using the speed of the barotropic mode as there is no simple way to calculate them 
+# maximum velocity magnitude plus the maximum speed of sound. This function uses approximate
+# eigenvalues using the speed of the barotropic mode as there is no simple way to calculate them
 # analytically.
-# 
+#
 # A good overview of the derivation is given in:
 # -  Jonas Nycander, Andrew McC. Hogg, Leela M. Frankcombe (2008)
 #    Open boundary conditions for nonlinear channel Flows
@@ -488,7 +488,7 @@ end
     return (max(abs(v_m_ll) + c_ll, abs(v_m_rr) + c_rr))
 end
 
-# Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the bottom 
+# Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the bottom
 # topography
 @inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr,
                                                               orientation_or_normal_direction,
@@ -530,7 +530,7 @@ end
 end
 
 # Convert conservative variables to entropy variables
-# Note, only the first four are the entropy variables, the fifth entry still just carries the 
+# Note, only the first four are the entropy variables, the fifth entry still just carries the
 # bottom topography values for convenience
 @inline function cons2entropy(u, equations::ShallowWaterTwoLayerEquations1D)
     h_upper, _, h_lower, _, b = u
@@ -567,7 +567,7 @@ end
 
 # Calculate total energy for a conservative state `cons`
 @inline function energy_total(cons, equations::ShallowWaterTwoLayerEquations1D)
-    h_upper, h_lower, h_v1_upper, h_v2_lower, b = cons
+    h_upper, h_v1_upper, h_lower, h_v2_lower, b = cons
     # Set new variables for better readability
     g = equations.gravity
     rho_upper = equations.rho_upper
diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl
index 838fa2d5819..2536cfe0bf2 100644
--- a/src/solvers/dg.jl
+++ b/src/solvers/dg.jl
@@ -628,6 +628,15 @@ function compute_coefficients!(u, func, t, mesh::AbstractMesh{1}, equations, dg:
         for i in eachnode(dg)
             x_node = get_node_coords(cache.elements.node_coordinates, equations, dg, i,
                                      element)
+            # Changing the node positions passed to the initial condition by the minimum
+            # amount possible with the current type of floating point numbers allows setting
+            # discontinuous initial data in a simple way. In particular, a check like `if x < x_jump`
+            # works if the jump location `x_jump` is at the position of an interface.
+            if i == 1
+                x_node = SVector(nextfloat(x_node[1]))
+            elseif i == nnodes(dg)
+                x_node = SVector(prevfloat(x_node[1]))
+            end
             u_node = func(x_node, t, equations)
             set_node_vars!(u, u_node, equations, dg, i, element)
         end
diff --git a/test/test_structured_1d.jl b/test/test_structured_1d.jl
index a27d3c219e1..ec8c7a138d5 100644
--- a/test/test_structured_1d.jl
+++ b/test/test_structured_1d.jl
@@ -27,8 +27,8 @@ isdir(outdir) && rm(outdir, recursive=true)
 
   @trixi_testset "elixir_advection_shockcapturing.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_shockcapturing.jl"),
-      l2   = [0.08004076716881656],
-      linf = [0.6342577638501385],
+      l2   = [0.08015029105233593],
+      linf = [0.610709468736576],
       atol = 1.0e-5)
   end
 
diff --git a/test/test_tree_1d.jl b/test/test_tree_1d.jl
index e37e1efc3e6..7737a93a15a 100644
--- a/test/test_tree_1d.jl
+++ b/test/test_tree_1d.jl
@@ -271,7 +271,7 @@ end
   sol = solve(ode, Tsit5(), abstol=1.0e-6, reltol=1.0e-6,
               save_everystep=false, callback=callbacks);
 
-  @test analysis_callback(sol).l2 ≈ [0.00029610274971929974, 5.573684084938363e-6]
+  @test analysis_callback(sol).l2 ≈ [0.00029609575838969394, 5.5681704039507985e-6]
 end
 
 
diff --git a/test/test_tree_1d_burgers.jl b/test/test_tree_1d_burgers.jl
index 788c7ab4199..8c4cfaa406d 100644
--- a/test/test_tree_1d_burgers.jl
+++ b/test/test_tree_1d_burgers.jl
@@ -22,14 +22,14 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_burgers_shock.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_burgers_shock.jl"),
-      l2   = [0.4407585104869119],
-      linf = [1.000000000000001])
+      l2   = [0.4422505602587537],
+      linf = [1.0000000000000009])
   end
 
   @trixi_testset "elixir_burgers_rarefaction.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_burgers_rarefaction.jl"),
-      l2   = [0.40287062735307044],
-      linf = [1.0042992585765542])
+      l2   = [0.4038224690923722],
+      linf = [1.0049201454652736])
   end
 end
 
diff --git a/test/test_tree_1d_euler.jl b/test/test_tree_1d_euler.jl
index 40f2a38b0e1..5fb74b80bce 100644
--- a/test/test_tree_1d_euler.jl
+++ b/test/test_tree_1d_euler.jl
@@ -22,8 +22,8 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_euler_density_wave.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_density_wave.jl"),
-      l2   = [0.0011482554820185795, 0.00011482554830363504, 5.741277417754598e-6],
-      linf = [0.004090978306820037, 0.00040909783134346345, 2.0454891732413216e-5])
+      l2   = [0.0011482554820217855, 0.00011482554830323462, 5.741277429325267e-6],
+      linf = [0.004090978306812376, 0.0004090978313582294, 2.045489210189544e-5])
   end
 
   @trixi_testset "elixir_euler_density_wave.jl with initial_condition_constant" begin
@@ -41,14 +41,14 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_euler_ec.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_ec.jl"),
-      l2   = [0.11915540925414216, 0.15489191247295198, 0.44543052524765375],
-      linf = [0.2751485868543495, 0.2712764982000735, 0.9951407418216425])
+      l2   = [0.11821957357197649, 0.15330089521538678, 0.4417674632047301],
+      linf = [0.24280567569982958, 0.29130548795961936, 0.8847009003152442])
   end
 
   @trixi_testset "elixir_euler_ec.jl with flux_kennedy_gruber" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_ec.jl"),
-      l2   = [0.07905582221868049, 0.10180958900546237, 0.29596551476711125],
-      linf = [0.23515297345769826, 0.2958208108392532, 0.8694224308790321],
+      l2   = [0.07803455838661963, 0.10032577312032283, 0.29228156303827935],
+      linf = [0.2549869853794955, 0.3376472164661263, 0.9650477546553962],
       maxiters = 10,
       surface_flux = flux_kennedy_gruber,
       volume_flux = flux_kennedy_gruber)
@@ -56,8 +56,8 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_euler_ec.jl with flux_shima_etal" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_ec.jl"),
-      l2   = [0.07909267609417114, 0.1018246500951966, 0.2959649187481973],
-      linf = [0.23631829743146504, 0.2977756307879202, 0.8642794698697331],
+      l2   = [0.07800654460172655, 0.10030365573277883, 0.2921481199111959],
+      linf = [0.25408579350400395, 0.3388657679031271, 0.9776486386921928],
       maxiters = 10,
       surface_flux = flux_shima_etal,
       volume_flux = flux_shima_etal)
@@ -65,8 +65,8 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_euler_ec.jl with flux_chandrashekar" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_ec.jl"),
-      l2   = [0.07905306555214126, 0.10181180378499956, 0.2959171937479504],
-      linf = [0.24057642004451651, 0.29691454643616433, 0.886425723870524],
+      l2   = [0.07801923089205756, 0.10039557434912669, 0.2922210399923278],
+      linf = [0.2576521982607225, 0.3409717926625057, 0.9772961936567048],
       maxiters = 10,
       surface_flux = flux_chandrashekar,
       volume_flux = flux_chandrashekar)
@@ -74,8 +74,8 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_euler_ec.jl with flux_hll" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_ec.jl"),
-      l2   = [0.07959780803600519, 0.10342491934977621, 0.2978851659149904],
-      linf = [0.19228754121840885, 0.2524152253292552, 0.725604944702432],
+      l2   = [0.07852272782240548, 0.10209790867523805, 0.293873048809011],
+      linf = [0.19244768908604093, 0.2515941686151897, 0.7258000837553769],
       maxiters = 10,
       surface_flux = flux_hll,
       volume_flux = flux_ranocha)
@@ -83,8 +83,8 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_euler_shockcapturing.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_shockcapturing.jl"),
-      l2   = [0.11665968950973675, 0.15105507394693413, 0.43503082674771115],
-      linf = [0.1867400345208743, 0.24621854448555328, 0.703826406555577])
+      l2   = [0.11606096465319675, 0.15028768943458806, 0.4328230323046703],
+      linf = [0.18031710091067965, 0.2351582421501841, 0.6776805692092567])
   end
 
   @trixi_testset "elixir_euler_sedov_blast_wave.jl" begin
@@ -96,8 +96,8 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_euler_sedov_blast_wave_pure_fv.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov_blast_wave_pure_fv.jl"),
-      l2   = [1.075075094036344, 0.06766902169711514, 0.9221426570128292],
-      linf = [3.3941512671408542, 0.16862631133303882, 2.6572394126490315],
+      l2   = [1.0735456065491455, 0.07131078703089379, 0.9205739468590453],
+      linf = [3.4296365168219216, 0.17635583964559245, 2.6574584326179505],
       # Let this test run longer to cover some lines in flux_hllc
       coverage_override = (maxiters=10^5, tspan=(0.0, 0.1)))
   end
@@ -129,22 +129,22 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_euler_blast_wave.jl" begin
   @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_blast_wave.jl"),
-      l2   = [0.21651329948737183, 0.28091709900008616, 0.5580778880050432],
-      linf = [1.513525457073142, 1.5328754303137992, 2.0467706106669556],
+      l2   = [0.21934822867340323, 0.28131919126002686, 0.554361702716662],
+      linf = [1.5180897390290355, 1.3967085956620369, 2.0663825294019595],
       maxiters = 30)
   end
 
   @trixi_testset "elixir_euler_blast_wave_neuralnetwork_perssonperaire.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_blast_wave_neuralnetwork_perssonperaire.jl"),
-        l2   = [2.13605618e-01, 2.79953055e-01, 5.54424459e-01],
-        linf = [1.55151701e+00, 1.55696782e+00, 2.05525953e+00],
+        l2   = [0.21814833203212694, 0.2818328665444332, 0.5528379124720818],
+        linf = [1.5548653877320868, 1.4474018998129738, 2.071919577393772],
         maxiters = 30)
   end
 
   @trixi_testset "elixir_euler_blast_wave_neuralnetwork_rayhesthaven.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_blast_wave_neuralnetwork_rayhesthaven.jl"),
-        l2   = [2.18148857e-01, 2.83182959e-01, 5.59096194e-01],
-        linf = [1.62706876e+00, 1.61680275e+00, 2.05876517e+00],
+        l2   = [0.22054468879127423, 0.2828269190680846, 0.5542369885642424],
+        linf = [1.5623359741479623, 1.4290121654488288, 2.1040405133123072],
         maxiters = 30)
   end
 end
diff --git a/test/test_tree_1d_eulermulti.jl b/test/test_tree_1d_eulermulti.jl
index eac54a9372c..e880f98e2d0 100644
--- a/test/test_tree_1d_eulermulti.jl
+++ b/test/test_tree_1d_eulermulti.jl
@@ -11,39 +11,47 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_eulermulti_ec.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_eulermulti_ec.jl"),
-      l2   = [1.54891912e-01, 4.45430525e-01, 1.70222013e-02, 3.40444026e-02, 6.80888053e-02],
-      linf = [2.71276498e-01, 9.95140742e-01, 3.93069410e-02, 7.86138820e-02, 1.57227764e-01])
+      l2   = [0.15330089521538684, 0.4417674632047301, 0.016888510510282385, 0.03377702102056477,
+              0.06755404204112954],
+      linf = [0.29130548795961864, 0.8847009003152357, 0.034686525099975274, 0.06937305019995055,
+              0.1387461003999011])
   end
 
   @trixi_testset "elixir_eulermulti_es.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_eulermulti_es.jl"),
-      l2   = [1.53387916e-01, 4.41585576e-01, 3.93605635e-02, 7.87211270e-02],
-      linf = [2.49632117e-01, 7.21088064e-01, 6.38328770e-02, 1.27665754e-01])
+      l2   = [0.1522380497572071, 0.43830846465313206, 0.03907262116499431, 0.07814524232998862],
+      linf = [0.24939193075537294, 0.7139395740052739, 0.06324208768391237, 0.12648417536782475])
   end
 
   @trixi_testset "elixir_eulermulti_convergence_ec.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_eulermulti_convergence_ec.jl"),
-      l2   = [8.57523604e-05, 1.63878043e-04, 1.94126993e-05, 3.88253986e-05],
-      linf = [3.05932773e-04, 6.24480393e-04, 7.25312144e-05, 1.45062429e-04])
+      l2   = [8.575236038539227e-5, 0.00016387804318585358, 1.9412699303977585e-5, 3.882539860795517e-5],
+      linf = [0.00030593277277124464, 0.0006244803933350696, 7.253121435135679e-5, 0.00014506242870271358])
   end
 
   @trixi_testset "elixir_eulermulti_convergence_es.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_eulermulti_convergence_es.jl"),
-      l2   = [1.8983933794407234e-5, 6.207744299844731e-5, 1.5466205761868047e-6, 3.0932411523736094e-6, 6.186482304747219e-6, 1.2372964609494437e-5],
-      linf = [0.00012014372605895218, 0.0003313207215800418, 6.50836791016296e-6, 1.301673582032592e-5, 2.603347164065184e-5, 5.206694328130368e-5])
+      l2   = [1.8983933794407234e-5, 6.207744299844731e-5, 1.5466205761868047e-6, 3.0932411523736094e-6,
+              6.186482304747219e-6, 1.2372964609494437e-5],
+      linf = [0.00012014372605895218, 0.0003313207215800418, 6.50836791016296e-6, 1.301673582032592e-5,
+              2.603347164065184e-5, 5.206694328130368e-5])
   end
 
   @trixi_testset "elixir_eulermulti_convergence_es.jl with flux_chandrashekar" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_eulermulti_convergence_es.jl"),
-      l2   = [1.88845048e-05, 5.49106005e-05, 9.42673716e-07, 1.88534743e-06, 3.77069486e-06, 7.54138973e-06],
-      linf = [1.16223512e-04, 3.07922197e-04, 3.21774233e-06, 6.43548465e-06, 1.28709693e-05, 2.57419386e-05],
+      l2   = [1.888450477353845e-5, 5.4910600482795386e-5, 9.426737161533622e-7, 1.8853474323067245e-6,
+              3.770694864613449e-6, 7.541389729226898e-6],
+      linf = [0.00011622351152063004, 0.0003079221967086099, 3.2177423254231563e-6, 6.435484650846313e-6,
+              1.2870969301692625e-5, 2.574193860338525e-5],
       volume_flux = flux_chandrashekar)
   end
 
   @trixi_testset "elixir_eulermulti_two_interacting_blast_waves.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_eulermulti_two_interacting_blast_waves.jl"),
-      l2   = [1.28886761e+00, 8.27133526e+01, 3.50680272e-03, 1.36987844e-02, 1.91795185e-02],
-      linf = [2.96413045e+01, 1.32258448e+03, 9.19191937e-02, 3.10929710e-01, 4.41798976e-01],
+      l2   = [1.288867611915533, 82.71335258388848, 0.00350680272313187, 0.013698784353152794,
+              0.019179518517518084],
+      linf = [29.6413044707026, 1322.5844802186496, 0.09191919374782143, 0.31092970966717925,
+              0.4417989757182038],
       tspan = (0.0, 0.0001))
   end
 
diff --git a/test/test_tree_1d_mhd.jl b/test/test_tree_1d_mhd.jl
index 938959831c1..e3a0cda3250 100644
--- a/test/test_tree_1d_mhd.jl
+++ b/test/test_tree_1d_mhd.jl
@@ -32,14 +32,14 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_mhd_ec.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_ec.jl"),
-      l2   = [5.86009540e-02, 8.16048158e-02, 5.46791194e-02, 5.46791194e-02, 1.54509265e-01, 4.13046273e-17, 5.47637521e-02, 5.47637521e-02],
-      linf = [1.10014999e-01, 1.81982581e-01, 9.13611439e-02, 9.13611439e-02, 4.23831370e-01, 1.11022302e-16, 9.93731761e-02, 9.93731761e-02])
+      l2   = [0.05815183849746399, 0.08166807325621023, 0.054659228513541165, 0.054659228513541165, 0.15578125987042743, 4.130462730494e-17, 0.05465258887150046, 0.05465258887150046],
+      linf = [0.12165312668363826, 0.1901920742264952, 0.10059813883022554, 0.10059813883022554, 0.44079257431070706, 1.1102230246251565e-16, 0.10528911365809579, 0.10528911365809579])
   end
 
   @trixi_testset "elixir_mhd_briowu_shock_tube.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_briowu_shock_tube.jl"),
-      l2   = [0.17764301067932906, 0.19693621875378622, 0.3635136528288642, 0.0, 0.3757321708837591, 8.593007507325741e-16, 0.36473438378159656, 0.0],
-      linf = [0.5601530250396535, 0.43867368105486537, 1.0960903616351099, 0.0, 1.0551794137886303, 4.107825191113079e-15, 1.5374410890043144, 0.0],
+      l2   = [0.17477712356961989, 0.19489623595086944, 0.3596546157640463, 0.0, 0.3723215736814466, 1.2060075775846403e-15, 0.36276754492568164, 0.0],
+      linf = [0.5797109945880677, 0.4372991899547103, 1.0906536287185835, 0.0, 1.0526758874956808, 5.995204332975845e-15, 1.5122922036932964, 0.0],
       coverage_override = (maxiters=6,))
   end
 
@@ -51,8 +51,8 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_mhd_ryujones_shock_tube.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_ryujones_shock_tube.jl"),
-      l2   = [2.34809441e-01, 3.92255943e-01, 8.23575546e-02, 1.75599624e-01, 9.61613519e-01, 6.60825891e-17, 2.15346454e-01, 1.07006529e-01],
-      linf = [6.40732148e-01, 9.44889516e-01, 3.54932707e-01, 8.54060243e-01, 2.07757711e+00, 1.11022302e-16, 4.92584725e-01, 2.49526561e-01],
+      l2   = [0.23469781891518154, 0.3916675299696121, 0.08245195301016353, 0.1745346945706147, 0.9606363432904367, 6.608258910237605e-17, 0.21542929107153735, 0.10705457908737925],
+      linf = [0.6447951791685409, 0.9461857095377463, 0.35074627554617605, 0.8515177411529542, 2.0770652030507053, 1.1102230246251565e-16, 0.49670855513788204, 0.24830199967863564],
       tspan = (0.0, 0.1))
   end
 
diff --git a/test/test_tree_1d_mhdmulti.jl b/test/test_tree_1d_mhdmulti.jl
index 2985e6d5663..5214ed26d38 100644
--- a/test/test_tree_1d_mhdmulti.jl
+++ b/test/test_tree_1d_mhdmulti.jl
@@ -11,33 +11,33 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_mhdmulti_ec.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhdmulti_ec.jl"),
-      l2   = [0.08160481582829862, 0.05467911944326103, 0.05467911944326103, 0.15450926504459692,
-              4.130462730494e-17, 0.054763752050210085, 0.054763752050210085, 0.008371564857135208,
-              0.016743129714270416, 0.03348625942854083],
-      linf = [0.18198258075330706, 0.09136114386311774, 0.09136114386311774, 0.423831369951313,
-              1.1102230246251565e-16, 0.09937317613143604, 0.09937317613143604, 0.0157164284712992,
-              0.0314328569425984, 0.0628657138851968])
+      l2   = [0.08166807325620999, 0.054659228513541616, 0.054659228513541616, 0.15578125987042812,
+              4.130462730494e-17, 0.054652588871500665, 0.054652588871500665, 0.008307405499637766,
+              0.01661481099927553, 0.03322962199855106],
+      linf = [0.19019207422649645, 0.10059813883022888, 0.10059813883022888, 0.4407925743107146,
+              1.1102230246251565e-16, 0.10528911365809623, 0.10528911365809623, 0.01737901809766182,
+              0.03475803619532364, 0.06951607239064728])
   end
 
   @trixi_testset "elixir_mhdmulti_ec.jl with flux_derigs_etal" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhdmulti_ec.jl"),
-      l2   = [0.08153372259925547, 0.05464109003345891, 0.05464109003345891, 0.1540576724164453,
-              4.130462730494e-17, 0.054734930802131036, 0.054734930802131036, 0.008391254781284321,
-              0.016782509562568642, 0.033565019125137284],
-      linf = [0.17492544007323832, 0.09029632168248182, 0.09029632168248182, 0.40798609353896564,
-              1.1102230246251565e-16, 0.09872923637833075, 0.09872923637833075, 0.01609818847160674,
-              0.03219637694321348, 0.06439275388642696],
+      l2   = [0.08151404166186461, 0.054640238302693274, 0.054640238302693274, 0.15536125426328573,
+              4.130462730494e-17, 0.054665489963920275, 0.054665489963920275, 0.008308349501359825,
+              0.01661669900271965, 0.0332333980054393],
+      linf = [0.1824424257860952, 0.09734687137001484, 0.09734687137001484, 0.4243089502087325,
+              1.1102230246251565e-16, 0.09558639591092555, 0.09558639591092555, 0.017364773041550624,
+              0.03472954608310125, 0.0694590921662025],
       volume_flux = flux_derigs_etal)
   end
 
   @trixi_testset "elixir_mhdmulti_es.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhdmulti_es.jl"),
-      l2   = [0.07968782477167513, 0.05398115008116676, 0.05398115008116676, 0.15015281822439228,
-              4.130462730494e-17, 0.053629890024921495, 0.053629890024921495, 0.008279068245579706,
-              0.016558136491159413, 0.033116272982318826],
-      linf = [0.14118014632124837, 0.07820697032983395, 0.07820697032983395, 0.3390558674728652,
-              1.1102230246251565e-16, 0.06998787893467828, 0.06998787893467828, 0.014943825414763745,
-              0.02988765082952749, 0.05977530165905498])
+      l2   = [0.07994082660130175, 0.053940174914031976, 0.053940174914031976, 0.15165513559250643,
+              4.130462730494e-17, 0.05363207135290325, 0.05363207135290325, 0.008258265884659555,
+              0.01651653176931911, 0.03303306353863822],
+      linf = [0.14101014428198477, 0.07762441749521025, 0.07762441749521025, 0.3381334453289866,
+              1.1102230246251565e-16, 0.07003646400675223, 0.07003646400675223, 0.014962483760600165,
+              0.02992496752120033, 0.05984993504240066])
   end
 
   @trixi_testset "elixir_mhdmulti_convergence.jl" begin
@@ -52,12 +52,12 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_mhdmulti_briowu_shock_tube.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhdmulti_briowu_shock_tube.jl"),
-      l2   = [0.1946577804333822, 0.3591196215528672, 0.0, 0.36875476066849383,
-              4.7644020131827105e-16, 0.36668249926193885, 0.0, 0.05775369214541893,
-              0.11550738429083786],
-      linf = [0.4345551123140612, 1.0874941615375844, 0.0, 1.0493729052116585,
-              3.219646771412954e-15, 1.5160434573973656, 0.0, 0.18616213071936066,
-              0.3723242614387213],
+      l2   = [0.1877830835572639, 0.3455841730726793, 0.0, 0.35413123388836687,
+              8.745556626531982e-16, 0.3629920109231055, 0.0, 0.05329005553971236,
+              0.10658011107942472],
+      linf = [0.4288187627971754, 1.0386547815614993, 0.0, 0.9541678878162702,
+              5.773159728050814e-15, 1.4595119339458051, 0.0, 0.18201910908829552,
+              0.36403821817659104],
       coverage_override = (maxiters=6,))
     end
 
diff --git a/test/test_tree_1d_shallowwater.jl b/test/test_tree_1d_shallowwater.jl
index f8901a3dcb6..1c3bac1fab6 100644
--- a/test/test_tree_1d_shallowwater.jl
+++ b/test/test_tree_1d_shallowwater.jl
@@ -10,22 +10,30 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 @testset "Shallow Water" begin
   @trixi_testset "elixir_shallowwater_ec.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_ec.jl"),
-      l2   = [0.8122354510732459, 1.01586214815876, 0.43404255061704217],
-      linf = [1.4883285368551107, 3.8717508164234276, 1.7711213427919539],
+      l2   = [0.244729018751225, 0.8583565222389505, 0.07330427577586297],
+      linf = [2.1635021283528504, 3.8717508164234453, 1.7711213427919539],
+      tspan = (0.0, 0.25))
+  end
+
+  @trixi_testset "elixir_shallowwater_ec.jl with initial_condition_weak_blast_wave" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_ec.jl"),
+      l2   = [0.39464782107209717, 2.03880864210846, 4.1623084150546725e-10],
+      linf = [0.778905801278281, 3.2409883402608273, 7.419800190922032e-10],
+      initial_condition=initial_condition_weak_blast_wave,
       tspan = (0.0, 0.25))
   end
 
   @trixi_testset "elixir_shallowwater_well_balanced.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_well_balanced.jl"),
-      l2   = [1.2427984842961743, 1.0332499675061871e-14, 1.2427984842961741],
-      linf = [1.619041478244762, 1.266865149831811e-14, 1.6190414782447629],
+      l2   = [0.10416666834254829, 1.4352935256803184e-14, 0.10416666834254838],
+      linf = [1.9999999999999996, 3.248036646353028e-14, 2.0],
       tspan = (0.0, 0.25))
   end
 
   @trixi_testset "elixir_shallowwater_well_balanced.jl with FluxHydrostaticReconstruction" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_well_balanced.jl"),
-      l2   = [1.2427984842961743, 1.2663646513352053e-14, 1.2427984842961741],
-      linf = [1.619041478244762, 2.4566658711604395e-14, 1.6190414782447629],
+      l2   = [0.10416666834254835, 1.1891029971551825e-14, 0.10416666834254838],
+      linf = [2.0000000000000018, 2.4019608337954543e-14, 2.0],
       surface_flux=(FluxHydrostaticReconstruction(flux_lax_friedrichs, hydrostatic_reconstruction_audusse_etal), flux_nonconservative_audusse_etal),
       tspan = (0.0, 0.25))
   end
@@ -59,14 +67,14 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
       tspan = (0.0, 0.025))
   end
 
-  @trixi_testset "elixir_shallowwater_well_balanced_nonperiodic.jl with dirichlet boundary" begin
+  @trixi_testset "elixir_shallowwater_well_balanced_nonperiodic.jl with Dirichlet boundary" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_well_balanced_nonperiodic.jl"),
       l2   = [1.725964362045055e-8, 5.0427180314307505e-16, 1.7259643530442137e-8],
       linf = [3.844551077492042e-8, 3.469453422316143e-15, 3.844551077492042e-8],
       tspan = (0.0, 0.25))
   end
 
-  @trixi_testset "elixir_shallowwater_well_nonperiodic.jl with wall boundary" begin
+  @trixi_testset "elixir_shallowwater_well_balanced_nonperiodic.jl with wall boundary" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_well_balanced_nonperiodic.jl"),
       l2   = [1.7259643614361866e-8, 3.5519018243195145e-16, 1.7259643530442137e-8],
       linf = [3.844551010878661e-8, 9.846474508971374e-16, 3.844551077492042e-8],
@@ -76,8 +84,8 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_shallowwater_shock_capturing.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_shock_capturing.jl"),
-      l2   = [0.2884024818919076, 0.5252262013521178, 0.2890348477852955],
-      linf = [0.7565706154863958, 2.076621603471687, 0.8646939843534258],
+      l2   = [0.07424140641160326, 0.2148642632748155, 0.0372579849000542],
+      linf = [1.1209754279344226, 1.3230788645853582, 0.8646939843534251],
       tspan = (0.0, 0.05))
   end
 end
diff --git a/test/test_tree_1d_shallowwater_twolayer.jl b/test/test_tree_1d_shallowwater_twolayer.jl
index 6c0ad2941cc..0d8a83806f9 100644
--- a/test/test_tree_1d_shallowwater_twolayer.jl
+++ b/test/test_tree_1d_shallowwater_twolayer.jl
@@ -38,9 +38,9 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
 
   @trixi_testset "elixir_shallowwater_twolayer_dam_break.jl with flux_lax_friedrichs" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_twolayer_dam_break.jl"),
-    l2    = [0.35490827242437256, 1.6715402155795918, 0.6960264969949427,
-             0.9351481433409805, 0.7938172946965545],
-    linf  = [0.6417127471419837, 1.9742107034120873, 1.135774587483082, 1.236125279347084, 1.1],
+    l2    = [0.10010269243463918, 0.5668733957648654, 0.08759617327649398,
+             0.4538443183566172, 0.013638618139749523],
+    linf  = [0.5854202777756559, 2.1278930820498934, 0.5193686074348809, 1.8071213168086229, 0.5],
     surface_flux = (flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
     tspan = (0.0, 0.25))
   end

From 9ea37cce8f510dcc135ad568280fd770a693ba10 Mon Sep 17 00:00:00 2001
From: David Knapp <david.knapp@dlr.de>
Date: Mon, 19 Jun 2023 16:16:48 +0200
Subject: [PATCH 055/163] Add a pre-commit script and an adapted formatting
 file (#1534)

* Add a pre-commit script and an adapted formatting file

* Formatting

I am aware of the irony

* Fix typo

* Added documentation

* Fix Typo
---
 docs/src/styleguide.md     | 11 ++++++++++
 utils/pre-commit           | 40 ++++++++++++++++++++++++++++++++++++
 utils/trixi-format-file.jl | 42 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+)
 create mode 100755 utils/pre-commit
 create mode 100755 utils/trixi-format-file.jl

diff --git a/docs/src/styleguide.md b/docs/src/styleguide.md
index de367c086cc..9e6b6a8c265 100644
--- a/docs/src/styleguide.md
+++ b/docs/src/styleguide.md
@@ -65,3 +65,14 @@ utils/trixi-format.jl
 ```
 You can get more information about using the convenience script by running it with the
 `--help`/`-h` flag.
+
+### Checking formatting before committing
+It can be convenient to check the formatting of source code automatically before each commit. 
+We use git-hooks for it and provide a `pre-commit` script in the `utils` folder. The script uses
+[JuliaFormatter.jl](https://github.com/domluna/JuliaFormatter.jl) just like formatting script that 
+runs over the whole Trixi.jl directory. 
+You can copy the `pre-commit`-script into `.git/hooks/pre-commit` and it will check your formatting 
+before each commit. If errors are found the commit is aborted and you can add the corrections via
+```shell 
+git add -p
+```
\ No newline at end of file
diff --git a/utils/pre-commit b/utils/pre-commit
new file mode 100755
index 00000000000..2977b9a200b
--- /dev/null
+++ b/utils/pre-commit
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+# Copy this file into .git/hooks/pre-commit to execute before each commit. 
+# It checks and corrects the format for each file.
+# If incorrect formatting is found you can add the correction via git add -p
+
+echo "Checking format before committing"
+
+if git ref-parse --verify HEAD >/dev/null 2>&1
+then
+	against=HEAD
+else
+	# Initial commit: diff against an empty tree object
+        against=280fc57fade28e35046c3e884e587ffef05d3867
+fi
+
+# Redirect output to stderr.
+exec 1>&2
+
+# Create a list of files to format. 
+files=()
+
+for file in `git diff --cached --name-only`
+do
+	# only indent existing files, this is necessary since if we rename or delete
+	# a file it is added to the committed files and we thus would try to indent a	
+	# nonexisting file.
+	if [ ! -e $file ]
+	then
+		continue
+	fi
+	# We only indent .jl files
+ 	FILE_ENDING="${file##*.}"
+	if [ $FILE_ENDING = "jl" ]
+	then
+		files+=($file)
+	fi
+done
+
+julia utils/trixi-format-file.jl "${files[@]}"
diff --git a/utils/trixi-format-file.jl b/utils/trixi-format-file.jl
new file mode 100755
index 00000000000..c4d8e7c9032
--- /dev/null
+++ b/utils/trixi-format-file.jl
@@ -0,0 +1,42 @@
+#!/usr/bin/env julia
+
+using Pkg
+Pkg.activate(; temp = true, io = devnull)
+Pkg.add("JuliaFormatter"; preserve = PRESERVE_ALL, io = devnull)
+
+using JuliaFormatter: format_file
+
+function main()
+    # Show help
+    if "-h" in ARGS || "--help" in ARGS
+        println("usage: trixi-format.jl PATH [PATH...]")
+        println()
+        println("positional arguments:")
+        println()
+        println("    PATH        One or more paths (directories or files) to format. Default: '.'")
+        return nothing
+    end
+
+    file_list = ARGS
+    if isempty(ARGS)
+        exit(0)
+    end
+    non_formatted_files = Vector{String}()
+    for file in file_list
+        println("Checking file " * file)
+        if !format_file(file)
+            push!(non_formatted_files, file)
+        end
+    end
+    if isempty(non_formatted_files)
+        exit(0)
+    else
+        @error "Some files have not been formatted! Formatting has been applied, run 'git add -p' to update changes."
+        for file in non_formatted_files
+            println(file)
+        end
+        exit(1)
+    end
+end
+
+main()

From 11f6fa786340534c10f6356886e19d6291cf618a Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Mon, 19 Jun 2023 16:57:26 +0200
Subject: [PATCH 056/163] set version to v0.5.29

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 9d51e4dcffc..303e97d6324 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.29-pre"
+version = "0.5.29"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From e84946146c022588ef2d4260e4fa4c34b1ab2d9d Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Mon, 19 Jun 2023 16:57:48 +0200
Subject: [PATCH 057/163] set development version to v0.5.30-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 303e97d6324..d3983262591 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.29"
+version = "0.5.30-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 9519c26b2fb88398ab12ddc4f3c6acc62c02a426 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Jun 2023 21:24:37 +0200
Subject: [PATCH 058/163] Bump crate-ci/typos from 1.15.0 to 1.15.1 (#1537)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.15.0 to 1.15.1.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.15.0...v1.15.1)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/SpellCheck.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index bc324c689bc..75886465f85 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v3
       - name: Check spelling
-        uses: crate-ci/typos@v1.15.0
+        uses: crate-ci/typos@v1.15.1

From 830d1d7a5fcc76d9ee205c72abff15f328372d02 Mon Sep 17 00:00:00 2001
From: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
Date: Tue, 20 Jun 2023 09:02:05 +0200
Subject: [PATCH 059/163] Fix pre-commit (#1536)

* fix pre-commit

* consistent indent
---
 utils/pre-commit | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/utils/pre-commit b/utils/pre-commit
index 2977b9a200b..73ad061baef 100755
--- a/utils/pre-commit
+++ b/utils/pre-commit
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 
 # Copy this file into .git/hooks/pre-commit to execute before each commit. 
 # It checks and corrects the format for each file.
@@ -8,10 +8,10 @@ echo "Checking format before committing"
 
 if git ref-parse --verify HEAD >/dev/null 2>&1
 then
-	against=HEAD
+    against=HEAD
 else
-	# Initial commit: diff against an empty tree object
-        against=280fc57fade28e35046c3e884e587ffef05d3867
+    # Initial commit: diff against an empty tree object
+    against=280fc57fade28e35046c3e884e587ffef05d3867
 fi
 
 # Redirect output to stderr.
@@ -22,19 +22,19 @@ files=()
 
 for file in `git diff --cached --name-only`
 do
-	# only indent existing files, this is necessary since if we rename or delete
-	# a file it is added to the committed files and we thus would try to indent a	
-	# nonexisting file.
-	if [ ! -e $file ]
-	then
-		continue
-	fi
-	# We only indent .jl files
- 	FILE_ENDING="${file##*.}"
-	if [ $FILE_ENDING = "jl" ]
-	then
-		files+=($file)
-	fi
+    # only indent existing files, this is necessary since if we rename or delete
+    # a file it is added to the committed files and we thus would try to indent a
+    # nonexisting file.
+    if [ ! -e $file ]
+    then
+        continue
+    fi
+    # We only indent .jl files
+    FILE_ENDING="${file##*.}"
+    if [ $FILE_ENDING = "jl" ]
+    then
+        files+=($file)
+    fi
 done
 
 julia utils/trixi-format-file.jl "${files[@]}"

From 196f139069370bd1d450b1d3865170c447c2b74f Mon Sep 17 00:00:00 2001
From: Johannes Markert <10619309+jmark@users.noreply.github.com>
Date: Tue, 20 Jun 2023 09:12:02 +0200
Subject: [PATCH 060/163] Update styleguide.md (#1538)

Added missing apostrophe.
---
 docs/src/styleguide.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/styleguide.md b/docs/src/styleguide.md
index 9e6b6a8c265..c2562a0c651 100644
--- a/docs/src/styleguide.md
+++ b/docs/src/styleguide.md
@@ -55,7 +55,7 @@ julia -e 'using Pkg; Pkg.add("JuliaFormatter")'
 ```
 You can then recursively format all Julia files in the Trixi.jl repo by executing
 ```shell
-julia -e 'using JuliaFormatter; format(".")
+julia -e 'using JuliaFormatter; format(".")'
 ```
 from inside the Trixi.jl repository. For convenience, there is also a script you can
 directly run from your terminal shell, which will automatically install JuliaFormatter in a
@@ -75,4 +75,4 @@ You can copy the `pre-commit`-script into `.git/hooks/pre-commit` and it will ch
 before each commit. If errors are found the commit is aborted and you can add the corrections via
 ```shell 
 git add -p
-```
\ No newline at end of file
+```

From 3303ed8c0b8af262a16e96f2ad6dcd84034bbe7b Mon Sep 17 00:00:00 2001
From: Johannes Markert <10619309+jmark@users.noreply.github.com>
Date: Tue, 20 Jun 2023 11:45:41 +0200
Subject: [PATCH 061/163] Update styleguide.md (#1540)

* Update styleguide.md

Updated the formatting command line fixing the issue https://github.com/trixi-framework/Trixi.jl/issues/1539

* Update styleguide.md

Removed superfluous whitespace.
---
 docs/src/styleguide.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/src/styleguide.md b/docs/src/styleguide.md
index c2562a0c651..60e227204ca 100644
--- a/docs/src/styleguide.md
+++ b/docs/src/styleguide.md
@@ -53,9 +53,9 @@ of your PR), you need to install JuliaFormatter.jl first by running
 ```shell
 julia -e 'using Pkg; Pkg.add("JuliaFormatter")'
 ```
-You can then recursively format all Julia files in the Trixi.jl repo by executing
+You can then recursively format the core Julia files in the Trixi.jl repo by executing
 ```shell
-julia -e 'using JuliaFormatter; format(".")'
+julia -e 'using JuliaFormatter; format(["benchmark", "ext", "src", "utils"])'
 ```
 from inside the Trixi.jl repository. For convenience, there is also a script you can
 directly run from your terminal shell, which will automatically install JuliaFormatter in a

From 414dafa310738414d43d16d0393ca13588b2c101 Mon Sep 17 00:00:00 2001
From: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
Date: Wed, 21 Jun 2023 11:31:35 +0200
Subject: [PATCH 062/163] Fix some typos in docs (#1541)

* fix some typos in docs

* fix copy mistake

* update link
---
 docs/src/github-git.md                                |  4 ++--
 src/auxiliary/auxiliary.jl                            | 10 +++++-----
 src/auxiliary/math.jl                                 |  6 +++++-
 src/auxiliary/mpi.jl                                  |  4 ++--
 src/equations/acoustic_perturbation_2d.jl             |  2 +-
 src/equations/compressible_euler_2d.jl                |  2 +-
 src/equations/compressible_euler_3d.jl                |  2 +-
 src/equations/compressible_euler_multicomponent_1d.jl |  8 ++++----
 src/equations/compressible_euler_multicomponent_2d.jl |  8 ++++----
 src/equations/compressible_navier_stokes_2d.jl        |  8 ++++----
 src/equations/compressible_navier_stokes_3d.jl        |  4 ++--
 src/equations/hyperbolic_diffusion_2d.jl              |  2 +-
 src/equations/hyperbolic_diffusion_3d.jl              |  2 +-
 src/equations/ideal_glm_mhd_3d.jl                     |  2 +-
 src/equations/shallow_water_two_layer_1d.jl           |  2 +-
 src/equations/shallow_water_two_layer_2d.jl           |  3 ++-
 src/meshes/structured_mesh.jl                         |  4 ++--
 src/solvers/dgmulti/types.jl                          |  6 +++---
 src/solvers/dgsem_tree/indicators.jl                  |  2 +-
 19 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/docs/src/github-git.md b/docs/src/github-git.md
index ad5991d87af..57b63073e79 100644
--- a/docs/src/github-git.md
+++ b/docs/src/github-git.md
@@ -112,7 +112,7 @@ branch, and the corresponding pull request will be updated automatically.
 Please note that a review has nothing to do with the lack of experience of the
 person developing changes: We try to review all code before it gets added to
 `main`, even from the most experienced developers. This is good practice and
-helps to keep the error rate low while ensuring the the code is developed in a
+helps to keep the error rate low while ensuring that the code is developed in a
 consistent fashion. Furthermore, do not take criticism of your code personally -
 we just try to keep Trixi.jl as accessible and easy to use for everyone.
 
@@ -121,7 +121,7 @@ Once your branch is reviewed and declared ready for merging by the reviewer,
 make sure that all the latest changes have been pushed. Then, one of the
 developers will merge your PR. If you are one of the developers, you can also go
 to the pull request page on GitHub and and click on **Merge pull request**.
-Voilá, you are done! Your branch will have been merged to
+Voilà, you are done! Your branch will have been merged to
 `main` and the source branch will have been deleted in the GitHub repository
 (if you are not working in your own fork).
 
diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl
index 115d055c0ca..1f7d30d6aa8 100644
--- a/src/auxiliary/auxiliary.jl
+++ b/src/auxiliary/auxiliary.jl
@@ -132,7 +132,7 @@ end
     default_example()
 
 Return the path to an example elixir that can be used to quickly see Trixi.jl in action on a
-[`TreeMesh`]@(ref). See also [`examples_dir`](@ref) and [`get_examples`](@ref).
+[`TreeMesh`](@ref). See also [`examples_dir`](@ref) and [`get_examples`](@ref).
 """
 function default_example()
     joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_basic.jl")
@@ -142,7 +142,7 @@ end
     default_example_unstructured()
 
 Return the path to an example elixir that can be used to quickly see Trixi.jl in action on an
-[`UnstructuredMesh2D`]@(ref). This simulation is run on the example curved, unstructured mesh
+[`UnstructuredMesh2D`](@ref). This simulation is run on the example curved, unstructured mesh
 given in the Trixi.jl documentation regarding unstructured meshes.
 """
 function default_example_unstructured()
@@ -155,7 +155,7 @@ end
 Return the default options for OrdinaryDiffEq's `solve`. Pass `ode_default_options()...` to `solve`
 to only return the solution at the final time and enable **MPI aware** error-based step size control,
 whenever MPI is used.
-For example, use `solve(ode, alg; ode_default_options()...)`
+For example, use `solve(ode, alg; ode_default_options()...)`.
 """
 function ode_default_options()
     if mpi_isparallel()
@@ -213,8 +213,8 @@ might be provided by other packages such as [Polyester.jl](https://github.com/Ju
     This macro does not necessarily work for general `for` loops. For example,
     it does not necessarily support general iterables such as `eachline(filename)`.
 
-Some discussion can be found at https://discourse.julialang.org/t/overhead-of-threads-threads/53964
-and https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-remove-the-overhead/58435.
+Some discussion can be found at [https://discourse.julialang.org/t/overhead-of-threads-threads/53964](https://discourse.julialang.org/t/overhead-of-threads-threads/53964)
+and [https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-remove-the-overhead/58435](https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-remove-the-overhead/58435).
 """
 macro threaded(expr)
     # Use `esc(quote ... end)` for nested macro calls as suggested in
diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl
index 27c1bed5ca4..4ecf7dd3fcc 100644
--- a/src/auxiliary/math.jl
+++ b/src/auxiliary/math.jl
@@ -51,7 +51,7 @@ Given ε = 1.0e-4, we use the following algorithm.
 - Agner Fog.
   Lists of instruction latencies, throughputs and micro-operation breakdowns
   for Intel, AMD, and VIA CPUs.
-  https://www.agner.org/optimize/instruction_tables.pdf
+  [https://www.agner.org/optimize/instruction_tables.pdf](https://www.agner.org/optimize/instruction_tables.pdf)
 """
 @inline function ln_mean(x, y)
     epsilon_f2 = 1.0e-4
@@ -166,8 +166,10 @@ checks necessary in the presence of `NaN`s (or signed zeros).
 
 # Examples
 
+```jldoctest
 julia> max(2, 5, 1)
 5
+```
 """
 @inline max(args...) = @fastmath max(args...)
 
@@ -183,8 +185,10 @@ checks necessary in the presence of `NaN`s (or signed zeros).
 
 # Examples
 
+```jldoctest
 julia> min(2, 5, 1)
 1
+```
 """
 @inline min(args...) = @fastmath min(args...)
 
diff --git a/src/auxiliary/mpi.jl b/src/auxiliary/mpi.jl
index 2c485b4832c..c85c23670b0 100644
--- a/src/auxiliary/mpi.jl
+++ b/src/auxiliary/mpi.jl
@@ -72,7 +72,7 @@ You must pass this function as a keyword argument
 to OrdinaryDiffEq.jl's `solve` when using error-based step size control with MPI
 parallel execution of Trixi.jl.
 
-See the "Advanced Adaptive Stepsize Control" section of the [documentation](https://docs.sciml.ai/DiffEqDocs/stable/basics/common_solver_opts/)
+See the "Advanced Adaptive Stepsize Control" section of the [documentation](https://docs.sciml.ai/DiffEqDocs/stable/basics/common_solver_opts/).
 """
 ode_norm(u::Number, t) = @fastmath abs(u)
 function ode_norm(u::AbstractArray, t)
@@ -125,6 +125,6 @@ You should pass this function as a keyword argument
 to OrdinaryDiffEq.jl's  `solve` when using error-based step size control with MPI
 parallel execution of Trixi.jl.
 
-See the "Miscellaneous" section of the [documentation](https://docs.sciml.ai/DiffEqDocs/stable/basics/common_solver_opts/)
+See the "Miscellaneous" section of the [documentation](https://docs.sciml.ai/DiffEqDocs/stable/basics/common_solver_opts/).
 """
 ode_unstable_check(dt, u, semi, t) = isnan(dt)
diff --git a/src/equations/acoustic_perturbation_2d.jl b/src/equations/acoustic_perturbation_2d.jl
index 786630a14c7..f4ce770e1e9 100644
--- a/src/equations/acoustic_perturbation_2d.jl
+++ b/src/equations/acoustic_perturbation_2d.jl
@@ -145,7 +145,7 @@ function initial_condition_convergence_test(x, t,
 end
 
 """
-  source_terms_convergence_test(u, x, t, equations::AcousticPerturbationEquations2D)
+    source_terms_convergence_test(u, x, t, equations::AcousticPerturbationEquations2D)
 
 Source terms used for convergence tests in combination with
 [`initial_condition_convergence_test`](@ref).
diff --git a/src/equations/compressible_euler_2d.jl b/src/equations/compressible_euler_2d.jl
index 66e3c7bff84..89f04ef1e05 100644
--- a/src/equations/compressible_euler_2d.jl
+++ b/src/equations/compressible_euler_2d.jl
@@ -31,7 +31,7 @@ The compressible Euler equations
 ```
 for an ideal gas with ratio of specific heats `gamma`
 in two space dimensions.
-Here, ``\rho`` is the density, ``v_1``,`v_2` the velocities, ``e`` the specific total energy **rather than** specific internal energy, and
+Here, ``\rho`` is the density, ``v_1``, ``v_2`` the velocities, ``e`` the specific total energy **rather than** specific internal energy, and
 ```math
 p = (\gamma - 1) \left( \rho e - \frac{1}{2} \rho (v_1^2+v_2^2) \right)
 ```
diff --git a/src/equations/compressible_euler_3d.jl b/src/equations/compressible_euler_3d.jl
index c16a454b176..cd081cfc42a 100644
--- a/src/equations/compressible_euler_3d.jl
+++ b/src/equations/compressible_euler_3d.jl
@@ -36,7 +36,7 @@ The compressible Euler equations
 ```
 for an ideal gas with ratio of specific heats `gamma`
 in three space dimensions.
-Here, ``\rho`` is the density, ``v_1``,`v_2`, `v_3` the velocities, ``e`` the specific total energy **rather than** specific internal energy, and
+Here, ``\rho`` is the density, ``v_1``, ``v_2``, ``v_3`` the velocities, ``e`` the specific total energy **rather than** specific internal energy, and
 ```math
 p = (\gamma - 1) \left( \rho e - \frac{1}{2} \rho (v_1^2+v_2^2+v_3^2) \right)
 ```
diff --git a/src/equations/compressible_euler_multicomponent_1d.jl b/src/equations/compressible_euler_multicomponent_1d.jl
index 4a50d60471a..23ac222b976 100644
--- a/src/equations/compressible_euler_multicomponent_1d.jl
+++ b/src/equations/compressible_euler_multicomponent_1d.jl
@@ -44,8 +44,8 @@ specific heat capacity at constant volume of component ``i``.
 In case of more than one component, the specific heat ratios `gammas` and the gas constants
 `gas_constants` should be passed as tuples, e.g., `gammas=(1.4, 1.667)`.
 
-The remaining variables like the specific heats at constant volume 'cv' or the specific heats at
-constant pressure 'cp' are then calculated considering a calorically perfect gas.
+The remaining variables like the specific heats at constant volume `cv` or the specific heats at
+constant pressure `cp` are then calculated considering a calorically perfect gas.
 """
 struct CompressibleEulerMulticomponentEquations1D{NVARS, NCOMP, RealT <: Real} <:
        AbstractCompressibleEulerMulticomponentEquations{1, NVARS, NCOMP}
@@ -247,8 +247,8 @@ end
 
 Entropy conserving two-point flux by
 - Ayoub Gouasmi, Karthik Duraisamy (2020)
-  "Formulation of Entropy-Stable schemes for the multicomponent compressible Euler equations""
-  arXiv:1904.00972v3 [math.NA] 4 Feb 2020
+  "Formulation of Entropy-Stable schemes for the multicomponent compressible Euler equations"
+  [arXiv:1904.00972v3](https://arxiv.org/abs/1904.00972) [math.NA] 4 Feb 2020
 """
 @inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer,
                                     equations::CompressibleEulerMulticomponentEquations1D)
diff --git a/src/equations/compressible_euler_multicomponent_2d.jl b/src/equations/compressible_euler_multicomponent_2d.jl
index 5a015777cb1..7b437f4a1b4 100644
--- a/src/equations/compressible_euler_multicomponent_2d.jl
+++ b/src/equations/compressible_euler_multicomponent_2d.jl
@@ -48,8 +48,8 @@ specific heat capacity at constant volume of component ``i``.
 In case of more than one component, the specific heat ratios `gammas` and the gas constants
 `gas_constants` in [kJ/(kg*K)] should be passed as tuples, e.g., `gammas=(1.4, 1.667)`.
 
-The remaining variables like the specific heats at constant volume 'cv' or the specific heats at
-constant pressure 'cp' are then calculated considering a calorically perfect gas.
+The remaining variables like the specific heats at constant volume `cv` or the specific heats at
+constant pressure `cp` are then calculated considering a calorically perfect gas.
 """
 struct CompressibleEulerMulticomponentEquations2D{NVARS, NCOMP, RealT <: Real} <:
        AbstractCompressibleEulerMulticomponentEquations{2, NVARS, NCOMP}
@@ -275,8 +275,8 @@ end
 
 Adaption of the entropy conserving two-point flux by
 - Ayoub Gouasmi, Karthik Duraisamy (2020)
-  "Formulation of Entropy-Stable schemes for the multicomponent compressible Euler equations""
-  arXiv:1904.00972v3 [math.NA] 4 Feb 2020
+  "Formulation of Entropy-Stable schemes for the multicomponent compressible Euler equations"
+  [arXiv:1904.00972v3](https://arxiv.org/abs/1904.00972) [math.NA] 4 Feb 2020
 """
 @inline function flux_chandrashekar(u_ll, u_rr, orientation::Integer,
                                     equations::CompressibleEulerMulticomponentEquations2D)
diff --git a/src/equations/compressible_navier_stokes_2d.jl b/src/equations/compressible_navier_stokes_2d.jl
index 33badba15d9..9b06e0b5abf 100644
--- a/src/equations/compressible_navier_stokes_2d.jl
+++ b/src/equations/compressible_navier_stokes_2d.jl
@@ -73,8 +73,8 @@ where
 w_2 = \frac{\rho v_1}{p},\, w_3 = \frac{\rho v_2}{p},\, w_4 = -\frac{\rho}{p}
 ```
 
-#!!! warning "Experimental code"
-#    This code is experimental and may be changed or removed in any future release.
+!!! warning "Experimental code"
+    This code is experimental and may be changed or removed in any future release.
 """
 struct CompressibleNavierStokesDiffusion2D{GradientVariables, RealT <: Real,
                                            E <: AbstractCompressibleEulerEquations{2}} <:
@@ -94,8 +94,8 @@ struct CompressibleNavierStokesDiffusion2D{GradientVariables, RealT <: Real,
 end
 
 """
-#!!! warning "Experimental code"
-#    This code is experimental and may be changed or removed in any future release.
+!!! warning "Experimental code"
+    This code is experimental and may be changed or removed in any future release.
 
 `GradientVariablesPrimitive` and `GradientVariablesEntropy` are gradient variable type parameters
 for `CompressibleNavierStokesDiffusion2D`. By default, the gradient variables are set to be
diff --git a/src/equations/compressible_navier_stokes_3d.jl b/src/equations/compressible_navier_stokes_3d.jl
index 8930489295d..0b770dff1ca 100644
--- a/src/equations/compressible_navier_stokes_3d.jl
+++ b/src/equations/compressible_navier_stokes_3d.jl
@@ -73,8 +73,8 @@ where
 w_2 = \frac{\rho v_1}{p},\, w_3 = \frac{\rho v_2}{p},\, w_4 = \frac{\rho v_3}{p},\, w_5 = -\frac{\rho}{p}
 ```
 
-#!!! warning "Experimental code"
-#    This code is experimental and may be changed or removed in any future release.
+!!! warning "Experimental code"
+    This code is experimental and may be changed or removed in any future release.
 """
 struct CompressibleNavierStokesDiffusion3D{GradientVariables, RealT <: Real,
                                            E <: AbstractCompressibleEulerEquations{3}} <:
diff --git a/src/equations/hyperbolic_diffusion_2d.jl b/src/equations/hyperbolic_diffusion_2d.jl
index 25536a060f8..511d1b8935d 100644
--- a/src/equations/hyperbolic_diffusion_2d.jl
+++ b/src/equations/hyperbolic_diffusion_2d.jl
@@ -10,7 +10,7 @@
 
 The linear hyperbolic diffusion equations in two space dimensions.
 A description of this system can be found in Sec. 2.5 of the book "I Do Like CFD, Too: Vol 1".
-The book is freely available at http://www.cfdbooks.com/ and further analysis can be found in
+The book is freely available at [http://www.cfdbooks.com/](http://www.cfdbooks.com/) and further analysis can be found in
 the paper by Nishikawa [DOI: 10.1016/j.jcp.2007.07.029](https://doi.org/10.1016/j.jcp.2007.07.029)
 """
 struct HyperbolicDiffusionEquations2D{RealT <: Real} <:
diff --git a/src/equations/hyperbolic_diffusion_3d.jl b/src/equations/hyperbolic_diffusion_3d.jl
index bf6a00140d4..ed807511b67 100644
--- a/src/equations/hyperbolic_diffusion_3d.jl
+++ b/src/equations/hyperbolic_diffusion_3d.jl
@@ -10,7 +10,7 @@
 
 The linear hyperbolic diffusion equations in three space dimensions.
 A description of this system can be found in Sec. 2.5 of the book "I Do Like CFD, Too: Vol 1".
-The book is freely available at http://www.cfdbooks.com/ and further analysis can be found in
+The book is freely available at [http://www.cfdbooks.com/](http://www.cfdbooks.com/) and further analysis can be found in
 the paper by Nishikawa [DOI: 10.1016/j.jcp.2007.07.029](https://doi.org/10.1016/j.jcp.2007.07.029)
 """
 struct HyperbolicDiffusionEquations3D{RealT <: Real} <:
diff --git a/src/equations/ideal_glm_mhd_3d.jl b/src/equations/ideal_glm_mhd_3d.jl
index 401fcd2daf1..2e149d2849f 100644
--- a/src/equations/ideal_glm_mhd_3d.jl
+++ b/src/equations/ideal_glm_mhd_3d.jl
@@ -41,7 +41,7 @@ end
 
 # Set initial conditions at physical location `x` for time `t`
 """
-initial_condition_constant(x, t, equations::IdealGlmMhdEquations3D)
+    initial_condition_constant(x, t, equations::IdealGlmMhdEquations3D)
 
 A constant initial condition to test free-stream preservation.
 """
diff --git a/src/equations/shallow_water_two_layer_1d.jl b/src/equations/shallow_water_two_layer_1d.jl
index 02899171509..e126eec7c25 100644
--- a/src/equations/shallow_water_two_layer_1d.jl
+++ b/src/equations/shallow_water_two_layer_1d.jl
@@ -392,7 +392,7 @@ end
                            equations::ShallowWaterTwoLayerEquations1D)
 
 Entropy stable surface flux for the two-layer shallow water equations. Uses the entropy
-conservative flux_fjordholm_etal and adds a Lax-Friedrichs type dissipation dependent on the jump
+conservative [`flux_fjordholm_etal`](@ref) and adds a Lax-Friedrichs type dissipation dependent on the jump
 of entropy variables.
 
 Further details are available in the paper:
diff --git a/src/equations/shallow_water_two_layer_2d.jl b/src/equations/shallow_water_two_layer_2d.jl
index b5e52d636e4..a54831c711f 100644
--- a/src/equations/shallow_water_two_layer_2d.jl
+++ b/src/equations/shallow_water_two_layer_2d.jl
@@ -695,8 +695,9 @@ end
 """
     flux_es_fjordholm_etal(u_ll, u_rr, orientation_or_normal_direction,
                            equations::ShallowWaterTwoLayerEquations1D)
+
 Entropy stable surface flux for the two-layer shallow water equations. Uses the entropy conservative 
-flux_fjordholm_etal and adds a Lax-Friedrichs type dissipation dependent on the jump of entropy
+[`flux_fjordholm_etal`](@ref) and adds a Lax-Friedrichs type dissipation dependent on the jump of entropy
 variables. 
 
 Further details are available in the paper:
diff --git a/src/meshes/structured_mesh.jl b/src/meshes/structured_mesh.jl
index 5872681933a..df067db833d 100644
--- a/src/meshes/structured_mesh.jl
+++ b/src/meshes/structured_mesh.jl
@@ -33,7 +33,7 @@ Create a StructuredMesh of the given size and shape that uses `RealT` as coordin
              the reference mesh to the physical domain.
              If no `mapping_as_string` is defined, this function must be defined with the name `mapping`
              to allow for restarts.
-             This will be changed in the future, see https://github.com/trixi-framework/Trixi.jl/issues/541.
+             This will be changed in the future, see [https://github.com/trixi-framework/Trixi.jl/issues/541](https://github.com/trixi-framework/Trixi.jl/issues/541).
 - `RealT::Type`: the type that should be used for coordinates.
 - `periodicity`: either a `Bool` deciding if all of the boundaries are periodic or an `NTuple{NDIMS, Bool}`
                  deciding for each dimension if the boundaries in this dimension are periodic.
@@ -41,7 +41,7 @@ Create a StructuredMesh of the given size and shape that uses `RealT` as coordin
 - `mapping_as_string::String`: the code that defines the `mapping`.
                                If `CodeTracking` can't find the function definition, it can be passed directly here.
                                The code string must define the mapping function with the name `mapping`.
-                               This will be changed in the future, see https://github.com/trixi-framework/Trixi.jl/issues/541.
+                               This will be changed in the future, see [https://github.com/trixi-framework/Trixi.jl/issues/541](https://github.com/trixi-framework/Trixi.jl/issues/541).
 """
 function StructuredMesh(cells_per_dimension, mapping; RealT = Float64,
                         periodicity = true, unsaved_changes = true,
diff --git a/src/solvers/dgmulti/types.jl b/src/solvers/dgmulti/types.jl
index f1f7b158dec..fe6510856b0 100644
--- a/src/solvers/dgmulti/types.jl
+++ b/src/solvers/dgmulti/types.jl
@@ -180,9 +180,9 @@ GeometricTermsType(mesh_type::Curved, element_type::AbstractElemShape) = NonAffi
 # other potential mesh types to add later: Polynomial{polydeg_geo}?
 
 """
-  DGMultiMesh(dg::DGMulti{NDIMS}, vertex_coordinates, EToV;
-              is_on_boundary=nothing,
-              periodicity=ntuple(_->false, NDIMS)) where {NDIMS}
+    DGMultiMesh(dg::DGMulti{NDIMS}, vertex_coordinates, EToV;
+                is_on_boundary=nothing,
+                periodicity=ntuple(_->false, NDIMS)) where {NDIMS}
 
 - `dg::DGMulti` contains information associated with to the reference element (e.g., quadrature,
   basis evaluation, differentiation, etc).
diff --git a/src/solvers/dgsem_tree/indicators.jl b/src/solvers/dgsem_tree/indicators.jl
index 2eb0af87148..b8f8a796f2b 100644
--- a/src/solvers/dgsem_tree/indicators.jl
+++ b/src/solvers/dgsem_tree/indicators.jl
@@ -159,7 +159,7 @@ and `basis` if this indicator should be used for shock capturing.
 - Löhner (1987)
   "An adaptive finite element scheme for transient problems in CFD"
   [doi: 10.1016/0045-7825(87)90098-3](https://doi.org/10.1016/0045-7825(87)90098-3)
-- http://flash.uchicago.edu/site/flashcode/user_support/flash4_ug_4p62/node59.html#SECTION05163100000000000000
+- [https://flash.rochester.edu/site/flashcode/user_support/flash4_ug_4p62/node59.html#SECTION05163100000000000000](https://flash.rochester.edu/site/flashcode/user_support/flash4_ug_4p62/node59.html#SECTION05163100000000000000)
 """
 struct IndicatorLöhner{RealT <: Real, Variable, Cache} <: AbstractIndicator
     f_wave::RealT # TODO: Taal documentation

From 054a917a09127570dabc458f1350550f2ddb6a09 Mon Sep 17 00:00:00 2001
From: Andrew Winters <andrew.ross.winters@liu.se>
Date: Wed, 21 Jun 2023 17:56:07 +0200
Subject: [PATCH 063/163] update links to Flash manual (#1544)

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 examples/p4est_2d_dgsem/elixir_euler_sedov.jl          |  4 ++--
 examples/p4est_3d_dgsem/elixir_euler_sedov.jl          |  4 ++--
 .../elixir_eulergravity_jeans_instability.jl           |  6 +++---
 .../elixir_eulergravity_sedov_blast_wave.jl            | 10 +++++-----
 examples/structured_1d_dgsem/elixir_euler_sedov.jl     |  8 ++++----
 examples/structured_2d_dgsem/elixir_euler_sedov.jl     | 10 +++++-----
 examples/structured_3d_dgsem/elixir_euler_sedov.jl     | 10 +++++-----
 examples/tree_1d_dgsem/elixir_euler_positivity.jl      |  4 ++--
 .../tree_1d_dgsem/elixir_euler_sedov_blast_wave.jl     |  4 ++--
 .../elixir_euler_sedov_blast_wave_pure_fv.jl           |  4 ++--
 examples/tree_2d_dgsem/elixir_euler_positivity.jl      |  4 ++--
 .../tree_2d_dgsem/elixir_euler_sedov_blast_wave.jl     |  4 ++--
 ...er_sedov_blast_wave_neuralnetwork_perssonperaire.jl |  4 ++--
 .../tree_3d_dgsem/elixir_euler_sedov_blast_wave.jl     |  6 +++---
 examples/unstructured_2d_dgsem/elixir_euler_sedov.jl   |  4 ++--
 15 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/examples/p4est_2d_dgsem/elixir_euler_sedov.jl b/examples/p4est_2d_dgsem/elixir_euler_sedov.jl
index 9f5247e8c4d..d5d8e0c78bf 100644
--- a/examples/p4est_2d_dgsem/elixir_euler_sedov.jl
+++ b/examples/p4est_2d_dgsem/elixir_euler_sedov.jl
@@ -11,7 +11,7 @@ equations = CompressibleEulerEquations2D(1.4)
     initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
 
 The Sedov blast wave setup based on Flash
-- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 """
 function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
   # Set up polar coordinates
@@ -20,7 +20,7 @@ function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEq
   y_norm = x[2] - inicenter[2]
   r = sqrt(x_norm^2 + y_norm^2)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
   E = 1.0
   p0_inner = 3 * (equations.gamma - 1) * E / (3 * pi * r0^2)
diff --git a/examples/p4est_3d_dgsem/elixir_euler_sedov.jl b/examples/p4est_3d_dgsem/elixir_euler_sedov.jl
index 00da4132851..6fa285b5565 100644
--- a/examples/p4est_3d_dgsem/elixir_euler_sedov.jl
+++ b/examples/p4est_3d_dgsem/elixir_euler_sedov.jl
@@ -11,7 +11,7 @@ equations = CompressibleEulerEquations3D(1.4)
     initial_condition_medium_sedov_blast_wave(x, t, equations::CompressibleEulerEquations3D)
 
 The Sedov blast wave setup based on Flash
-- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 with smaller strength of the initial discontinuity.
 """
 function initial_condition_medium_sedov_blast_wave(x, t, equations::CompressibleEulerEquations3D)
@@ -22,7 +22,7 @@ function initial_condition_medium_sedov_blast_wave(x, t, equations::Compressible
   z_norm = x[3] - inicenter[3]
   r = sqrt(x_norm^2 + y_norm^2 + z_norm^2)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
   E = 1.0
   p0_inner = 3 * (equations.gamma - 1) * E / (4 * pi * r0^2)
diff --git a/examples/paper_self_gravitating_gas_dynamics/elixir_eulergravity_jeans_instability.jl b/examples/paper_self_gravitating_gas_dynamics/elixir_eulergravity_jeans_instability.jl
index 1774e39513d..fb445616cd4 100644
--- a/examples/paper_self_gravitating_gas_dynamics/elixir_eulergravity_jeans_instability.jl
+++ b/examples/paper_self_gravitating_gas_dynamics/elixir_eulergravity_jeans_instability.jl
@@ -15,7 +15,7 @@ The classical Jeans instability taken from
 - Dominik Derigs, Andrew R. Winters, Gregor J. Gassner, Stefanie Walch (2016)
   A Novel High-Order, Entropy Stable, 3D AMR MHD Solver with Guaranteed Positive Pressure
   [arXiv: 1605.03572](https://arxiv.org/abs/1605.03572)
-- Flash manual https://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel.pdf
+- Flash manual https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node189.html#SECTION010131000000000000000
 in CGS (centimeter, gram, second) units.
 """
 function initial_condition_jeans_instability(x, t,
@@ -32,7 +32,7 @@ function initial_condition_jeans_instability(x, t,
   pres0  = 1.5e7 # dyn/cm^2
   delta0 = 1e-3
   # set wave vector values for perturbation (units 1/cm)
-  # see FLASH manual: https://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel.pdf
+  # see FLASH manual: https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node189.html#SECTION010131000000000000000
   kx = 2.0*pi/0.5 # 2π/λ_x, λ_x = 0.5
   ky = 0.0   # 2π/λ_y, λ_y = 1e10
   k_dot_x = kx*x[1] + ky*x[2]
@@ -49,7 +49,7 @@ function initial_condition_jeans_instability(x, t,
                                              equations::HyperbolicDiffusionEquations2D)
   # gravity equation: -Δϕ = -4πGρ
   # Constants taken from the FLASH manual
-  # https://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel.pdf
+  # https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node189.html#SECTION010131000000000000000
   rho0   = 1.5e7
   delta0 = 1e-3
 
diff --git a/examples/paper_self_gravitating_gas_dynamics/elixir_eulergravity_sedov_blast_wave.jl b/examples/paper_self_gravitating_gas_dynamics/elixir_eulergravity_sedov_blast_wave.jl
index f7bb5bbb01c..8933224a2c7 100644
--- a/examples/paper_self_gravitating_gas_dynamics/elixir_eulergravity_sedov_blast_wave.jl
+++ b/examples/paper_self_gravitating_gas_dynamics/elixir_eulergravity_sedov_blast_wave.jl
@@ -15,14 +15,14 @@ Adaptation of the Sedov blast wave with self-gravity taken from
   A purely hyperbolic discontinuous Galerkin approach for self-gravitating gas dynamics
   [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593)
 based on
-- http://flash.uchicago.edu/site/flashcode/user_support/flash4_ug_4p62/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114100000000000000
 Should be used together with [`boundary_condition_sedov_self_gravity`](@ref).
 """
 function initial_condition_sedov_self_gravity(x, t, equations::CompressibleEulerEquations2D)
   # Set up polar coordinates
   r = sqrt(x[1]^2 + x[2]^2)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash4_ug_4p62/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114100000000000000
   r0 = 0.125 # = 4.0 * smallest dx (for domain length=8 and max-ref=8)
   E = 1.0
   p_inner   = (equations.gamma - 1) * E / (pi * r0^2)
@@ -59,7 +59,7 @@ Adaptation of the Sedov blast wave with self-gravity taken from
   A purely hyperbolic discontinuous Galerkin approach for self-gravitating gas dynamics
   [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593)
 based on
-- http://flash.uchicago.edu/site/flashcode/user_support/flash4_ug_4p62/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114100000000000000
 Should be used together with [`initial_condition_sedov_self_gravity`](@ref).
 """
 function boundary_condition_sedov_self_gravity(u_inner, orientation, direction, x, t,
@@ -122,7 +122,7 @@ Adaptation of the Sedov blast wave with self-gravity taken from
   A purely hyperbolic discontinuous Galerkin approach for self-gravitating gas dynamics
   [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593)
 based on
-- http://flash.uchicago.edu/site/flashcode/user_support/flash4_ug_4p62/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114100000000000000
 Should be used together with [`boundary_condition_sedov_self_gravity`](@ref).
 """
 function initial_condition_sedov_self_gravity(x, t, equations::HyperbolicDiffusionEquations2D)
@@ -143,7 +143,7 @@ Adaptation of the Sedov blast wave with self-gravity taken from
   A purely hyperbolic discontinuous Galerkin approach for self-gravitating gas dynamics
   [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593)
 based on
-- http://flash.uchicago.edu/site/flashcode/user_support/flash4_ug_4p62/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114100000000000000
 Should be used together with [`initial_condition_sedov_self_gravity`](@ref).
 """
 function boundary_condition_sedov_self_gravity(u_inner, orientation, direction, x, t,
diff --git a/examples/structured_1d_dgsem/elixir_euler_sedov.jl b/examples/structured_1d_dgsem/elixir_euler_sedov.jl
index ee950b3aaaa..9d7be21a5c1 100644
--- a/examples/structured_1d_dgsem/elixir_euler_sedov.jl
+++ b/examples/structured_1d_dgsem/elixir_euler_sedov.jl
@@ -11,7 +11,7 @@ equations = CompressibleEulerEquations1D(1.4)
     initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations1D)
 
 The Sedov blast wave setup based on Flash
-- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 """
 function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations1D)
   # Set up polar coordinates
@@ -19,7 +19,7 @@ function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEq
   x_norm = x[1] - inicenter[1]
   r = abs(x_norm)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
   # r0 = 0.5 # = more reasonable setup
   E = 1.0
@@ -78,8 +78,8 @@ save_solution = SaveSolutionCallback(interval=100,
 
 stepsize_callback = StepsizeCallback(cfl=0.5)
 
-callbacks = CallbackSet(summary_callback, 
-                        analysis_callback, 
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
                         alive_callback,
                         save_solution,
                         stepsize_callback)
diff --git a/examples/structured_2d_dgsem/elixir_euler_sedov.jl b/examples/structured_2d_dgsem/elixir_euler_sedov.jl
index ed1bfab3be2..efc3b6627c0 100644
--- a/examples/structured_2d_dgsem/elixir_euler_sedov.jl
+++ b/examples/structured_2d_dgsem/elixir_euler_sedov.jl
@@ -11,7 +11,7 @@ equations = CompressibleEulerEquations2D(1.4)
     initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
 
 The Sedov blast wave setup based on Flash
-- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 """
 function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
   # Set up polar coordinates
@@ -20,7 +20,7 @@ function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEq
   y_norm = x[2] - inicenter[2]
   r = sqrt(x_norm^2 + y_norm^2)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
   E = 1.0
   p0_inner = 3 * (equations.gamma - 1) * E / (3 * pi * r0^2)
@@ -59,12 +59,12 @@ function mapping(xi, eta)
   y = eta + 0.125 * (cos(1.5 * pi * xi) * cos(0.5 * pi * eta))
 
   x = xi + 0.125 * (cos(0.5 * pi * xi) * cos(2 * pi * y))
-      
+
   return SVector(x, y)
 end
-      
+
 cells_per_dimension = (16, 16)
-      
+
 mesh = StructuredMesh(cells_per_dimension, mapping, periodicity=true)
 
 # create the semidiscretization
diff --git a/examples/structured_3d_dgsem/elixir_euler_sedov.jl b/examples/structured_3d_dgsem/elixir_euler_sedov.jl
index 8f428495b4f..e0595437c99 100644
--- a/examples/structured_3d_dgsem/elixir_euler_sedov.jl
+++ b/examples/structured_3d_dgsem/elixir_euler_sedov.jl
@@ -11,7 +11,7 @@ equations = CompressibleEulerEquations3D(1.4)
     initial_condition_medium_sedov_blast_wave(x, t, equations::CompressibleEulerEquations3D)
 
 The Sedov blast wave setup based on Flash
-- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 with smaller strength of the initial discontinuity.
 """
 function initial_condition_medium_sedov_blast_wave(x, t, equations::CompressibleEulerEquations3D)
@@ -22,11 +22,11 @@ function initial_condition_medium_sedov_blast_wave(x, t, equations::Compressible
   z_norm = x[3] - inicenter[3]
   r = sqrt(x_norm^2 + y_norm^2 + z_norm^2)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
   E = 1.0
   p0_inner = 3 * (equations.gamma - 1) * E / (4 * pi * r0^2)
-  p0_outer = 1.0e-3 
+  p0_outer = 1.0e-3
 
   # Calculate primitive variables
   rho = 1.0
@@ -52,8 +52,8 @@ indicator_sc = IndicatorHennemannGassner(equations, basis,
 volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
                                                  volume_flux_dg=volume_flux,
                                                  volume_flux_fv=surface_flux)
-                                               
-solver = DGSEM(polydeg=polydeg, surface_flux=surface_flux, volume_integral=volume_integral)  
+
+solver = DGSEM(polydeg=polydeg, surface_flux=surface_flux, volume_integral=volume_integral)
 
 # Mapping as described in https://arxiv.org/abs/2012.12040
 function mapping(xi, eta, zeta)
diff --git a/examples/tree_1d_dgsem/elixir_euler_positivity.jl b/examples/tree_1d_dgsem/elixir_euler_positivity.jl
index 7942937151a..966661e8894 100644
--- a/examples/tree_1d_dgsem/elixir_euler_positivity.jl
+++ b/examples/tree_1d_dgsem/elixir_euler_positivity.jl
@@ -11,7 +11,7 @@ equations = CompressibleEulerEquations1D(1.4)
     initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations1D)
 
 The Sedov blast wave setup based on Flash
-- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 """
 function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations1D)
   # Set up polar coordinates
@@ -19,7 +19,7 @@ function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEq
   x_norm = x[1] - inicenter[1]
   r = abs(x_norm)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
   # r0 = 0.5 # = more reasonable setup
   E = 1.0
diff --git a/examples/tree_1d_dgsem/elixir_euler_sedov_blast_wave.jl b/examples/tree_1d_dgsem/elixir_euler_sedov_blast_wave.jl
index 746a7cf1bac..106ccacf4f5 100644
--- a/examples/tree_1d_dgsem/elixir_euler_sedov_blast_wave.jl
+++ b/examples/tree_1d_dgsem/elixir_euler_sedov_blast_wave.jl
@@ -11,7 +11,7 @@ equations = CompressibleEulerEquations1D(1.4)
     initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations1D)
 
 The Sedov blast wave setup based on Flash
-- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 """
 function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations1D)
   # Set up polar coordinates
@@ -19,7 +19,7 @@ function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEq
   x_norm = x[1] - inicenter[1]
   r = abs(x_norm)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
   # r0 = 0.5 # = more reasonable setup
   E = 1.0
diff --git a/examples/tree_1d_dgsem/elixir_euler_sedov_blast_wave_pure_fv.jl b/examples/tree_1d_dgsem/elixir_euler_sedov_blast_wave_pure_fv.jl
index 00b80dbae92..ebe8fa7cebf 100644
--- a/examples/tree_1d_dgsem/elixir_euler_sedov_blast_wave_pure_fv.jl
+++ b/examples/tree_1d_dgsem/elixir_euler_sedov_blast_wave_pure_fv.jl
@@ -11,7 +11,7 @@ equations = CompressibleEulerEquations1D(1.4)
     initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations1D)
 
 The Sedov blast wave setup based on Flash
-- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 """
 function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations1D)
   # Set up polar coordinates
@@ -19,7 +19,7 @@ function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEq
   x_norm = x[1] - inicenter[1]
   r = abs(x_norm)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
   # r0 = 0.5 # = more reasonable setup
   E = 1.0
diff --git a/examples/tree_2d_dgsem/elixir_euler_positivity.jl b/examples/tree_2d_dgsem/elixir_euler_positivity.jl
index e40dc3b47af..4c7dd7eb6cf 100644
--- a/examples/tree_2d_dgsem/elixir_euler_positivity.jl
+++ b/examples/tree_2d_dgsem/elixir_euler_positivity.jl
@@ -11,7 +11,7 @@ equations = CompressibleEulerEquations2D(gamma)
     initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
 
 The Sedov blast wave setup based on Flash
-- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 """
 function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
   # Set up polar coordinates
@@ -20,7 +20,7 @@ function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEq
   y_norm = x[2] - inicenter[2]
   r = sqrt(x_norm^2 + y_norm^2)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
   # r0 = 0.5 # = more reasonable setup
   E = 1.0
diff --git a/examples/tree_2d_dgsem/elixir_euler_sedov_blast_wave.jl b/examples/tree_2d_dgsem/elixir_euler_sedov_blast_wave.jl
index da7e1d55c91..512e5822374 100644
--- a/examples/tree_2d_dgsem/elixir_euler_sedov_blast_wave.jl
+++ b/examples/tree_2d_dgsem/elixir_euler_sedov_blast_wave.jl
@@ -11,7 +11,7 @@ equations = CompressibleEulerEquations2D(gamma)
     initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
 
 The Sedov blast wave setup based on Flash
-- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 """
 function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
   # Set up polar coordinates
@@ -20,7 +20,7 @@ function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEq
   y_norm = x[2] - inicenter[2]
   r = sqrt(x_norm^2 + y_norm^2)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
   # r0 = 0.5 # = more reasonable setup
   E = 1.0
diff --git a/examples/tree_2d_dgsem/elixir_euler_sedov_blast_wave_neuralnetwork_perssonperaire.jl b/examples/tree_2d_dgsem/elixir_euler_sedov_blast_wave_neuralnetwork_perssonperaire.jl
index 56715789377..5fd32da2e5c 100644
--- a/examples/tree_2d_dgsem/elixir_euler_sedov_blast_wave_neuralnetwork_perssonperaire.jl
+++ b/examples/tree_2d_dgsem/elixir_euler_sedov_blast_wave_neuralnetwork_perssonperaire.jl
@@ -23,7 +23,7 @@ equations = CompressibleEulerEquations2D(gamma)
     initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
 
 The Sedov blast wave setup based on Flash
-- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 """
 function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
   # Set up polar coordinates
@@ -32,7 +32,7 @@ function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEq
   y_norm = x[2] - inicenter[2]
   r = sqrt(x_norm^2 + y_norm^2)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
   # r0 = 0.5 # = more reasonable setup
   E = 1.0
diff --git a/examples/tree_3d_dgsem/elixir_euler_sedov_blast_wave.jl b/examples/tree_3d_dgsem/elixir_euler_sedov_blast_wave.jl
index 336c09e9212..3641878149a 100644
--- a/examples/tree_3d_dgsem/elixir_euler_sedov_blast_wave.jl
+++ b/examples/tree_3d_dgsem/elixir_euler_sedov_blast_wave.jl
@@ -15,14 +15,14 @@ Adaptation of the Sedov blast wave with self-gravity taken from
   A purely hyperbolic discontinuous Galerkin approach for self-gravitating gas dynamics
   [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593)
 based on
-- http://flash.uchicago.edu/site/flashcode/user_support/flash4_ug_4p62/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 Should be used together with [`boundary_condition_sedov_self_gravity`](@ref).
 """
 function initial_condition_sedov_self_gravity(x, t, equations::CompressibleEulerEquations3D)
   # Calculate radius as distance from origin
   r = sqrt(x[1]^2 + x[2]^2 + x[3]^2)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash4_ug_4p62/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.25 # = 4.0 * smallest dx (for domain length=8 and max-ref=7)
   E = 1.0
   p_inner   = (equations.gamma - 1) * E / (4/3 * pi * r0^3)
@@ -60,7 +60,7 @@ Adaptation of the Sedov blast wave with self-gravity taken from
   A purely hyperbolic discontinuous Galerkin approach for self-gravitating gas dynamics
   [arXiv: 2008.10593](https://arxiv.org/abs/2008.10593)
 based on
-- http://flash.uchicago.edu/site/flashcode/user_support/flash4_ug_4p62/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 Should be used together with [`initial_condition_sedov_self_gravity`](@ref).
 """
 function boundary_condition_sedov_self_gravity(u_inner, orientation, direction, x, t,
diff --git a/examples/unstructured_2d_dgsem/elixir_euler_sedov.jl b/examples/unstructured_2d_dgsem/elixir_euler_sedov.jl
index 3d5a391bd90..570a2084691 100644
--- a/examples/unstructured_2d_dgsem/elixir_euler_sedov.jl
+++ b/examples/unstructured_2d_dgsem/elixir_euler_sedov.jl
@@ -11,7 +11,7 @@ equations = CompressibleEulerEquations2D(1.4)
     initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
 
 The Sedov blast wave setup based on Flash
-- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+- https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
 """
 function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
   # Set up polar coordinates
@@ -20,7 +20,7 @@ function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEq
   y_norm = x[2] - inicenter[2]
   r = sqrt(x_norm^2 + y_norm^2)
 
-  # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+  # Setup based on https://flash.rochester.edu/site/flashcode/user_support/flash_ug_devel/node187.html#SECTION010114000000000000000
   r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
   E = 1.0
   p0_inner = 3 * (equations.gamma - 1) * E / (3 * pi * r0^2)

From bea4bfed27ce896edcfa3dbe79861a4faef4dec7 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Thu, 22 Jun 2023 17:06:19 +0200
Subject: [PATCH 064/163] `splitting_lax_friedrichs` for
 `LinearScalarAdvection1D` (#1546)

* splitting_lax_friedrichs for LinearScalarAdvection1D

* Update src/equations/linear_scalar_advection_1d.jl

Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>

---------

Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>
---
 .../tree_1d_fdsbp/elixir_advection_upwind.jl  | 57 +++++++++++++++++++
 src/equations/inviscid_burgers_1d.jl          |  2 +-
 src/equations/linear_scalar_advection_1d.jl   | 38 +++++++++++++
 test/test_tree_1d_fdsbp.jl                    | 18 ++++++
 4 files changed, 114 insertions(+), 1 deletion(-)
 create mode 100644 examples/tree_1d_fdsbp/elixir_advection_upwind.jl

diff --git a/examples/tree_1d_fdsbp/elixir_advection_upwind.jl b/examples/tree_1d_fdsbp/elixir_advection_upwind.jl
new file mode 100644
index 00000000000..5c50e1a6c64
--- /dev/null
+++ b/examples/tree_1d_fdsbp/elixir_advection_upwind.jl
@@ -0,0 +1,57 @@
+# !!! warning "Experimental implementation (upwind SBP)"
+#     This is an experimental feature and may change in future releases.
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linear scalar advection equation equation
+
+equations = LinearScalarAdvectionEquation1D(1.0)
+
+function initial_condition_sin(x, t, equation::LinearScalarAdvectionEquation1D)
+    return SVector(sinpi(x[1] - equations.advection_velocity[1] * t))
+end
+
+D_upw = upwind_operators(SummationByPartsOperators.Mattsson2017,
+                         derivative_order = 1,
+                         accuracy_order = 4,
+                         xmin = -1.0, xmax = 1.0,
+                         N = 16)
+flux_splitting = splitting_lax_friedrichs
+solver = FDSBP(D_upw,
+               surface_integral = SurfaceIntegralUpwind(flux_splitting),
+               volume_integral = VolumeIntegralUpwind(flux_splitting))
+
+coordinates_min = -1.0
+coordinates_max =  1.0
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level = 4,
+                n_cells_max = 10_000)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_sin, solver)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 2.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback)
+
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, RDPK3SpFSAL49(); abstol=1.0e-6, reltol=1.0e-6,
+            ode_default_options()..., callback=callbacks);
+summary_callback() # print the timer summary
diff --git a/src/equations/inviscid_burgers_1d.jl b/src/equations/inviscid_burgers_1d.jl
index 8d4410b6ffe..6a2cfb6aa8e 100644
--- a/src/equations/inviscid_burgers_1d.jl
+++ b/src/equations/inviscid_burgers_1d.jl
@@ -132,7 +132,7 @@ end
                              equations::InviscidBurgersEquation1D)
 
 Naive local Lax-Friedrichs style flux splitting of the form `f⁺ = 0.5 (f + λ u)`
-and `f⁻ = 0.5 (f - λ u)` where λ = abs(u).
+and `f⁻ = 0.5 (f - λ u)` where `λ = abs(u)`.
 
 Returns a tuple of the fluxes "minus" (associated with waves going into the
 negative axis direction) and "plus" (associated with waves going into the
diff --git a/src/equations/linear_scalar_advection_1d.jl b/src/equations/linear_scalar_advection_1d.jl
index 7769cb61fbf..6c6b9dd3721 100644
--- a/src/equations/linear_scalar_advection_1d.jl
+++ b/src/equations/linear_scalar_advection_1d.jl
@@ -172,6 +172,44 @@ end
     return abs.(equation.advection_velocity)
 end
 
+"""
+    splitting_lax_friedrichs(u, orientation::Integer,
+                             equations::LinearScalarAdvectionEquation1D)
+    splitting_lax_friedrichs(u, which::Union{Val{:minus}, Val{:plus}}
+                             orientation::Integer,
+                             equations::LinearScalarAdvectionEquation1D)
+
+Naive local Lax-Friedrichs style flux splitting of the form `f⁺ = 0.5 (f + λ u)`
+and `f⁻ = 0.5 (f - λ u)` where `λ` is the absolute value of the advection
+velocity.
+
+Returns a tuple of the fluxes "minus" (associated with waves going into the
+negative axis direction) and "plus" (associated with waves going into the
+positive axis direction). If only one of the fluxes is required, use the
+function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}()`.
+
+!!! warning "Experimental implementation (upwind SBP)"
+    This is an experimental feature and may change in future releases.
+"""
+@inline function splitting_lax_friedrichs(u, orientation::Integer,
+                                          equations::LinearScalarAdvectionEquation1D)
+    fm = splitting_lax_friedrichs(u, Val{:minus}(), orientation, equations)
+    fp = splitting_lax_friedrichs(u, Val{:plus}(), orientation, equations)
+    return fm, fp
+end
+
+@inline function splitting_lax_friedrichs(u, ::Val{:plus}, orientation::Integer,
+                                          equations::LinearScalarAdvectionEquation1D)
+    a = equations.advection_velocity[1]
+    return a > 0 ? flux(u, orientation, equations) : zero(u)
+end
+
+@inline function splitting_lax_friedrichs(u, ::Val{:minus}, orientation::Integer,
+                                          equations::LinearScalarAdvectionEquation1D)
+    a = equations.advection_velocity[1]
+    return a < 0 ? flux(u, orientation, equations) : zero(u)
+end
+
 # Convert conservative variables to primitive
 @inline cons2prim(u, equation::LinearScalarAdvectionEquation1D) = u
 
diff --git a/test/test_tree_1d_fdsbp.jl b/test/test_tree_1d_fdsbp.jl
index a966b3836f3..118385c34b3 100644
--- a/test/test_tree_1d_fdsbp.jl
+++ b/test/test_tree_1d_fdsbp.jl
@@ -7,6 +7,24 @@ include("test_trixi.jl")
 
 EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_fdsbp")
 
+@testset "Linear scalar advection" begin
+  @trixi_testset "elixir_advection_upwind.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_upwind.jl"),
+      l2   = [1.7735637157305526e-6],
+      linf = [1.0418854521951328e-5],
+      tspan = (0.0, 0.5))
+
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+      t = sol.t[end]
+      u_ode = sol.u[end]
+      du_ode = similar(u_ode)
+      @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
+  end
+end
+
 @testset "Inviscid Burgers" begin
   @trixi_testset "elixir_burgers_basic.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_burgers_basic.jl"),

From 6160fe952bd1d6f619fb77627329520f0b586956 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Fri, 23 Jun 2023 07:50:05 +0200
Subject: [PATCH 065/163] fix typos in FDSBP elixir comments (#1548)

---
 examples/tree_2d_fdsbp/elixir_euler_convergence.jl             | 3 ++-
 .../tree_2d_fdsbp/elixir_euler_kelvin_helmholtz_instability.jl | 3 ++-
 examples/tree_2d_fdsbp/elixir_euler_vortex.jl                  | 3 ++-
 examples/tree_3d_fdsbp/elixir_euler_convergence.jl             | 1 -
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/examples/tree_2d_fdsbp/elixir_euler_convergence.jl b/examples/tree_2d_fdsbp/elixir_euler_convergence.jl
index 0843cece67e..2a6c291f0bf 100644
--- a/examples/tree_2d_fdsbp/elixir_euler_convergence.jl
+++ b/examples/tree_2d_fdsbp/elixir_euler_convergence.jl
@@ -5,7 +5,8 @@ using OrdinaryDiffEq
 using Trixi
 
 ###############################################################################
-# semidiscretization of the linear advection equation
+# semidiscretization of the compressible Euler equations
+
 equations = CompressibleEulerEquations2D(1.4)
 
 initial_condition = initial_condition_convergence_test
diff --git a/examples/tree_2d_fdsbp/elixir_euler_kelvin_helmholtz_instability.jl b/examples/tree_2d_fdsbp/elixir_euler_kelvin_helmholtz_instability.jl
index 1e58badf47a..e63343852ab 100644
--- a/examples/tree_2d_fdsbp/elixir_euler_kelvin_helmholtz_instability.jl
+++ b/examples/tree_2d_fdsbp/elixir_euler_kelvin_helmholtz_instability.jl
@@ -5,7 +5,8 @@ using OrdinaryDiffEq
 using Trixi
 
 ###############################################################################
-# semidiscretization of the linear advection equation
+# semidiscretization of the compressible Euler equations
+
 equations = CompressibleEulerEquations2D(1.4)
 
 function initial_condition_kelvin_helmholtz_instability(x, t, equations::CompressibleEulerEquations2D)
diff --git a/examples/tree_2d_fdsbp/elixir_euler_vortex.jl b/examples/tree_2d_fdsbp/elixir_euler_vortex.jl
index abaf3d494d4..c1bee8f9c4d 100644
--- a/examples/tree_2d_fdsbp/elixir_euler_vortex.jl
+++ b/examples/tree_2d_fdsbp/elixir_euler_vortex.jl
@@ -5,7 +5,8 @@ using OrdinaryDiffEq
 using Trixi
 
 ###############################################################################
-# semidiscretization of the linear advection equation
+# semidiscretization of the compressible Euler equations
+
 equations = CompressibleEulerEquations2D(1.4)
 
 """
diff --git a/examples/tree_3d_fdsbp/elixir_euler_convergence.jl b/examples/tree_3d_fdsbp/elixir_euler_convergence.jl
index 576a07e6aba..6aafa1b5cc1 100644
--- a/examples/tree_3d_fdsbp/elixir_euler_convergence.jl
+++ b/examples/tree_3d_fdsbp/elixir_euler_convergence.jl
@@ -6,7 +6,6 @@ using Trixi
 
 ###############################################################################
 # semidiscretization of the compressible Euler equations
-
 equations = CompressibleEulerEquations3D(1.4)
 
 initial_condition = initial_condition_convergence_test

From d4c556960d6307d6279c698203a9e741c2479c2e Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Fri, 23 Jun 2023 07:51:00 +0200
Subject: [PATCH 066/163] set version to v0.5.30

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index d3983262591..e015c90310f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.30-pre"
+version = "0.5.30"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 1b69182dc06bddbcb4dd693d7aedb576d68fabc0 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Fri, 23 Jun 2023 07:51:14 +0200
Subject: [PATCH 067/163] set development version to v0.5.31-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index e015c90310f..0edba6b681c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.30"
+version = "0.5.31-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From b7be5856eba029a3d91257166be4ee62514ae0dd Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Fri, 23 Jun 2023 03:34:22 -0500
Subject: [PATCH 068/163] Add parabolic BCs for `P4estMesh{2}` (#1493)

* generalize function signatures to P4estMesh

* add specializations for P4estMesh


d

* add normals

* add surface integrals

* fix type ambiguity

* generalizing `apply_jacobian!` to P4estMesh

* resolving type ambiguity with apply_jacobian!


d

* `apply_jacobian!` -> `apply_jacobian_parabolic!`

* `apply_jacobian!` -> `apply_jacobian_parabolic!`

* switch to `apply_jacobian_parabolic!`

* Update src/solvers/dgsem_tree/dg_1d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* missed one

* draft of prolong2interfaces and calc_interface_flux

* cache -> cache_parabolic

* adding prolong2boundaries! and calc_boundary_flux_gradients! back

* remove todo

* variable renaming

* extending TreeMesh parabolic functions to P4estMesh

* adding elixir

* comments

* add prolong2boundaries! (untested)

* update test

* initial commit

* fix CI

f

* Update src/solvers/dgsem_p4est/dg_2d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/solvers/dgsem_p4est/dg_2d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* add "no mortars" check

* add curved elixir

* fix gradient bug

* add curved test

* Apply suggestions from code review

Co-authored-by: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* add comment on mapping

* reuse P4estMesh{2} code

* fix += for muladd

* Update examples/p4est_2d_dgsem/elixir_advection_diffusion_periodic_curved.jl

Co-authored-by: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com>

* comment

* comments + remove cruft

* add BCs for parabolic P43st

* add tests

* Update examples/p4est_2d_dgsem/elixir_navierstokes_convergence.jl

* formatting

* fix CNS convergence elixir and add to tests

* update test values

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
Co-authored-by: Erik Faulhaber <44124897+efaulhaber@users.noreply.github.com>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
---
 ..._advection_diffusion_nonperiodic_curved.jl |  96 ++++++++
 .../elixir_navierstokes_convergence.jl        | 209 ++++++++++++++++++
 .../elixir_navierstokes_lid_driven_cavity.jl  |  82 +++++++
 src/solvers/dgsem_p4est/dg_2d_parabolic.jl    | 133 ++++++++++-
 test/test_parabolic_2d.jl                     |  32 +++
 5 files changed, 544 insertions(+), 8 deletions(-)
 create mode 100644 examples/p4est_2d_dgsem/elixir_advection_diffusion_nonperiodic_curved.jl
 create mode 100644 examples/p4est_2d_dgsem/elixir_navierstokes_convergence.jl
 create mode 100644 examples/p4est_2d_dgsem/elixir_navierstokes_lid_driven_cavity.jl

diff --git a/examples/p4est_2d_dgsem/elixir_advection_diffusion_nonperiodic_curved.jl b/examples/p4est_2d_dgsem/elixir_advection_diffusion_nonperiodic_curved.jl
new file mode 100644
index 00000000000..55682f73fce
--- /dev/null
+++ b/examples/p4est_2d_dgsem/elixir_advection_diffusion_nonperiodic_curved.jl
@@ -0,0 +1,96 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linear advection-diffusion equation
+
+diffusivity() = 5.0e-2
+advection_velocity = (1.0, 0.0)
+equations = LinearScalarAdvectionEquation2D(advection_velocity)
+equations_parabolic = LaplaceDiffusion2D(diffusivity(), equations)
+
+# Example setup taken from
+# - Truman Ellis, Jesse Chan, and Leszek Demkowicz (2016).
+#   Robust DPG methods for transient convection-diffusion.
+#   In: Building bridges: connections and challenges in modern approaches
+#   to numerical partial differential equations.
+#   [DOI](https://doi.org/10.1007/978-3-319-41640-3_6).
+function initial_condition_eriksson_johnson(x, t, equations)
+  l = 4
+  epsilon = diffusivity() # TODO: this requires epsilon < .6 due to sqrt
+  lambda_1 = (-1 + sqrt(1 - 4 * epsilon * l)) / (-2 * epsilon)
+  lambda_2 = (-1 - sqrt(1 - 4 * epsilon * l)) / (-2 * epsilon)
+  r1 = (1 + sqrt(1 + 4 * pi^2 * epsilon^2)) / (2 * epsilon)
+  s1 = (1 - sqrt(1 + 4 * pi^2 * epsilon^2)) / (2 * epsilon)
+  u = exp(-l * t) * (exp(lambda_1 * x[1]) - exp(lambda_2 * x[1])) +
+      cos(pi * x[2]) * (exp(s1 * x[1]) - exp(r1 * x[1])) / (exp(-s1) - exp(-r1))
+  return SVector{1}(u)
+end
+initial_condition = initial_condition_eriksson_johnson
+
+boundary_conditions = Dict(:x_neg => BoundaryConditionDirichlet(initial_condition),
+                           :y_neg => BoundaryConditionDirichlet(initial_condition),
+                           :y_pos => BoundaryConditionDirichlet(initial_condition),
+                           :x_pos => boundary_condition_do_nothing)
+
+boundary_conditions_parabolic = Dict(:x_neg => BoundaryConditionDirichlet(initial_condition), 
+                                     :x_pos => BoundaryConditionDirichlet(initial_condition), 
+                                     :y_neg => BoundaryConditionDirichlet(initial_condition), 
+                                     :y_pos => BoundaryConditionDirichlet(initial_condition))
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs)
+
+coordinates_min = (-1.0, -0.5) 
+coordinates_max = ( 0.0,  0.5) 
+
+# This maps the domain [-1, 1]^2 to [-1, 0] x [-0.5, 0.5] while also 
+# introducing a curved warping to interior nodes. 
+function mapping(xi, eta)
+    x = xi  + 0.1 * sin(pi * xi) * sin(pi * eta)
+    y = eta + 0.1 * sin(pi * xi) * sin(pi * eta)
+    return SVector(0.5 * (1 + x) - 1, 0.5 * y)
+end 
+
+trees_per_dimension = (4, 4)
+mesh = P4estMesh(trees_per_dimension,
+                 polydeg=3, initial_refinement_level=2,
+                 mapping=mapping, periodicity=(false, false))
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic), initial_condition, solver, 
+                                             boundary_conditions = (boundary_conditions, boundary_conditions_parabolic))
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span `tspan`
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan);
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+# The AliveCallback prints short status information in regular intervals
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback)
+
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+time_int_tol = 1.0e-11
+sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
+            ode_default_options()..., callback=callbacks)
+
+# Print the timer summary
+summary_callback()
diff --git a/examples/p4est_2d_dgsem/elixir_navierstokes_convergence.jl b/examples/p4est_2d_dgsem/elixir_navierstokes_convergence.jl
new file mode 100644
index 00000000000..8111df8251a
--- /dev/null
+++ b/examples/p4est_2d_dgsem/elixir_navierstokes_convergence.jl
@@ -0,0 +1,209 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the ideal compressible Navier-Stokes equations
+
+prandtl_number() = 0.72
+mu() = 0.01
+
+equations = CompressibleEulerEquations2D(1.4)
+equations_parabolic = CompressibleNavierStokesDiffusion2D(equations, mu=mu(), Prandtl=prandtl_number(),
+                                                          gradient_variables=GradientVariablesPrimitive())
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs,
+               volume_integral=VolumeIntegralWeakForm())
+
+coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y))
+coordinates_max = ( 1.0,  1.0) # maximum coordinates (max(x), max(y))
+
+trees_per_dimension = (4, 4)
+mesh = P4estMesh(trees_per_dimension,
+                 polydeg=3, initial_refinement_level=2,
+                 coordinates_min=coordinates_min, coordinates_max=coordinates_max,
+                 periodicity=(true, false))
+
+# Note: the initial condition cannot be specialized to `CompressibleNavierStokesDiffusion2D`
+#       since it is called by both the parabolic solver (which passes in `CompressibleNavierStokesDiffusion2D`)
+#       and by the initial condition (which passes in `CompressibleEulerEquations2D`).
+# This convergence test setup was originally derived by Andrew Winters (@andrewwinters5000)
+function initial_condition_navier_stokes_convergence_test(x, t, equations)
+  # Amplitude and shift
+  A = 0.5
+  c = 2.0
+
+  # convenience values for trig. functions
+  pi_x = pi * x[1]
+  pi_y = pi * x[2]
+  pi_t = pi * t
+
+  rho = c + A * sin(pi_x) * cos(pi_y) * cos(pi_t)
+  v1  = sin(pi_x) * log(x[2] + 2.0) * (1.0 - exp(-A * (x[2] - 1.0)) ) * cos(pi_t)
+  v2  = v1
+  p   = rho^2
+
+  return prim2cons(SVector(rho, v1, v2, p), equations)
+end
+
+@inline function source_terms_navier_stokes_convergence_test(u, x, t, equations)
+  y = x[2]
+
+  # TODO: parabolic
+  # we currently need to hardcode these parameters until we fix the "combined equation" issue
+  # see also https://github.com/trixi-framework/Trixi.jl/pull/1160
+  inv_gamma_minus_one = inv(equations.gamma - 1)
+  Pr = prandtl_number()
+  mu_ = mu()
+
+  # Same settings as in `initial_condition`
+  # Amplitude and shift
+  A = 0.5
+  c = 2.0
+
+  # convenience values for trig. functions
+  pi_x = pi * x[1]
+  pi_y = pi * x[2]
+  pi_t = pi * t
+
+  # compute the manufactured solution and all necessary derivatives
+  rho    =  c  + A * sin(pi_x) * cos(pi_y) * cos(pi_t)
+  rho_t  = -pi * A * sin(pi_x) * cos(pi_y) * sin(pi_t)
+  rho_x  =  pi * A * cos(pi_x) * cos(pi_y) * cos(pi_t)
+  rho_y  = -pi * A * sin(pi_x) * sin(pi_y) * cos(pi_t)
+  rho_xx = -pi * pi * A * sin(pi_x) * cos(pi_y) * cos(pi_t)
+  rho_yy = -pi * pi * A * sin(pi_x) * cos(pi_y) * cos(pi_t)
+
+  v1    =       sin(pi_x) * log(y + 2.0) * (1.0 - exp(-A * (y - 1.0))) * cos(pi_t)
+  v1_t  = -pi * sin(pi_x) * log(y + 2.0) * (1.0 - exp(-A * (y - 1.0))) * sin(pi_t)
+  v1_x  =  pi * cos(pi_x) * log(y + 2.0) * (1.0 - exp(-A * (y - 1.0))) * cos(pi_t)
+  v1_y  =       sin(pi_x) * (A * log(y + 2.0) * exp(-A * (y - 1.0)) + (1.0 - exp(-A * (y - 1.0))) / (y + 2.0)) * cos(pi_t)
+  v1_xx = -pi * pi * sin(pi_x) * log(y + 2.0) * (1.0 - exp(-A * (y - 1.0))) * cos(pi_t)
+  v1_xy =  pi * cos(pi_x) * (A * log(y + 2.0) * exp(-A * (y - 1.0)) + (1.0 - exp(-A * (y - 1.0))) / (y + 2.0)) * cos(pi_t)
+  v1_yy = (sin(pi_x) * ( 2.0 * A * exp(-A * (y - 1.0)) / (y + 2.0)
+                         - A * A * log(y + 2.0) * exp(-A * (y - 1.0))
+                         - (1.0 - exp(-A * (y - 1.0))) / ((y + 2.0) * (y + 2.0))) * cos(pi_t))
+  v2    = v1
+  v2_t  = v1_t
+  v2_x  = v1_x
+  v2_y  = v1_y
+  v2_xx = v1_xx
+  v2_xy = v1_xy
+  v2_yy = v1_yy
+
+  p    = rho * rho
+  p_t  = 2.0 * rho * rho_t
+  p_x  = 2.0 * rho * rho_x
+  p_y  = 2.0 * rho * rho_y
+  p_xx = 2.0 * rho * rho_xx + 2.0 * rho_x * rho_x
+  p_yy = 2.0 * rho * rho_yy + 2.0 * rho_y * rho_y
+
+  # Note this simplifies slightly because the ansatz assumes that v1 = v2
+  E   = p * inv_gamma_minus_one + 0.5 * rho * (v1^2 + v2^2)
+  E_t = p_t * inv_gamma_minus_one + rho_t * v1^2 + 2.0 * rho * v1 * v1_t
+  E_x = p_x * inv_gamma_minus_one + rho_x * v1^2 + 2.0 * rho * v1 * v1_x
+  E_y = p_y * inv_gamma_minus_one + rho_y * v1^2 + 2.0 * rho * v1 * v1_y
+
+  # Some convenience constants
+  T_const = equations.gamma * inv_gamma_minus_one / Pr
+  inv_rho_cubed = 1.0 / (rho^3)
+
+  # compute the source terms
+  # density equation
+  du1 = rho_t + rho_x * v1 + rho * v1_x + rho_y * v2 + rho * v2_y
+
+  # x-momentum equation
+  du2 = ( rho_t * v1 + rho * v1_t + p_x + rho_x * v1^2
+                                        + 2.0   * rho  * v1 * v1_x
+                                        + rho_y * v1   * v2
+                                        + rho   * v1_y * v2
+                                        + rho   * v1   * v2_y
+    # stress tensor from x-direction
+                      - 4.0 / 3.0 * v1_xx * mu_
+                      + 2.0 / 3.0 * v2_xy * mu_
+                      - v1_yy             * mu_
+                      - v2_xy             * mu_ )
+  # y-momentum equation
+  du3 = ( rho_t * v2 + rho * v2_t + p_y + rho_x * v1    * v2
+                                        + rho   * v1_x  * v2
+                                        + rho   * v1    * v2_x
+                                        +         rho_y * v2^2
+                                        + 2.0   * rho   * v2 * v2_y
+    # stress tensor from y-direction
+                      - v1_xy             * mu_
+                      - v2_xx             * mu_
+                      - 4.0 / 3.0 * v2_yy * mu_
+                      + 2.0 / 3.0 * v1_xy * mu_ )
+  # total energy equation
+  du4 = ( E_t + v1_x * (E + p) + v1 * (E_x + p_x)
+              + v2_y * (E + p) + v2 * (E_y + p_y)
+    # stress tensor and temperature gradient terms from x-direction
+                                - 4.0 / 3.0 * v1_xx * v1   * mu_
+                                + 2.0 / 3.0 * v2_xy * v1   * mu_
+                                - 4.0 / 3.0 * v1_x  * v1_x * mu_
+                                + 2.0 / 3.0 * v2_y  * v1_x * mu_
+                                - v1_xy     * v2           * mu_
+                                - v2_xx     * v2           * mu_
+                                - v1_y      * v2_x         * mu_
+                                - v2_x      * v2_x         * mu_
+         - T_const * inv_rho_cubed * (        p_xx * rho   * rho
+                                      - 2.0 * p_x  * rho   * rho_x
+                                      + 2.0 * p    * rho_x * rho_x
+                                      -       p    * rho   * rho_xx ) * mu_
+    # stress tensor and temperature gradient terms from y-direction
+                                - v1_yy     * v1           * mu_
+                                - v2_xy     * v1           * mu_
+                                - v1_y      * v1_y         * mu_
+                                - v2_x      * v1_y         * mu_
+                                - 4.0 / 3.0 * v2_yy * v2   * mu_
+                                + 2.0 / 3.0 * v1_xy * v2   * mu_
+                                - 4.0 / 3.0 * v2_y  * v2_y * mu_
+                                + 2.0 / 3.0 * v1_x  * v2_y * mu_
+         - T_const * inv_rho_cubed * (        p_yy * rho   * rho
+                                      - 2.0 * p_y  * rho   * rho_y
+                                      + 2.0 * p    * rho_y * rho_y
+                                      -       p    * rho   * rho_yy ) * mu_ )
+
+  return SVector(du1, du2, du3, du4)
+end
+
+initial_condition = initial_condition_navier_stokes_convergence_test
+
+# BC types
+velocity_bc_top_bottom = NoSlip((x, t, equations) -> initial_condition_navier_stokes_convergence_test(x, t, equations)[2:3])
+heat_bc_top_bottom = Adiabatic((x, t, equations) -> 0.0)
+boundary_condition_top_bottom = BoundaryConditionNavierStokesWall(velocity_bc_top_bottom, heat_bc_top_bottom)
+
+# define inviscid boundary conditions
+boundary_conditions = Dict(:y_neg => boundary_condition_slip_wall,
+                           :y_pos => boundary_condition_slip_wall)
+
+# define viscous boundary conditions
+boundary_conditions_parabolic = Dict(:y_neg => boundary_condition_top_bottom,
+                                     :y_pos => boundary_condition_top_bottom)
+
+semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic), initial_condition, solver;
+                                             boundary_conditions=(boundary_conditions, boundary_conditions_parabolic),
+                                             source_terms=source_terms_navier_stokes_convergence_test)
+
+# ###############################################################################
+# # ODE solvers, callbacks etc.
+
+# Create ODE problem with time span `tspan`
+tspan = (0.0, 0.5)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+alive_callback = AliveCallback(alive_interval=10)
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
+
+###############################################################################
+# run the simulation
+
+time_int_tol = 1e-8
+sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol, dt = 1e-5,
+            ode_default_options()..., callback=callbacks)
+summary_callback() # print the timer summary
+
diff --git a/examples/p4est_2d_dgsem/elixir_navierstokes_lid_driven_cavity.jl b/examples/p4est_2d_dgsem/elixir_navierstokes_lid_driven_cavity.jl
new file mode 100644
index 00000000000..051f4defe54
--- /dev/null
+++ b/examples/p4est_2d_dgsem/elixir_navierstokes_lid_driven_cavity.jl
@@ -0,0 +1,82 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the ideal compressible Navier-Stokes equations
+
+# TODO: parabolic; unify names of these accessor functions
+prandtl_number() = 0.72
+mu() = 0.001
+
+equations = CompressibleEulerEquations2D(1.4)
+equations_parabolic = CompressibleNavierStokesDiffusion2D(equations, mu=mu(),
+                                                          Prandtl=prandtl_number())
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs)
+
+coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y))
+coordinates_max = ( 1.0,  1.0) # maximum coordinates (max(x), max(y))
+
+# Create a uniformly refined mesh
+trees_per_dimension = (4, 4)
+mesh = P4estMesh(trees_per_dimension,
+                 polydeg=3, initial_refinement_level=2,
+                 coordinates_min=coordinates_min, coordinates_max=coordinates_max,
+                 periodicity=(false, false))
+
+function initial_condition_cavity(x, t, equations::CompressibleEulerEquations2D)
+  Ma = 0.1
+  rho = 1.0
+  u, v = 0.0, 0.0
+  p = 1.0 / (Ma^2 * equations.gamma)
+  return prim2cons(SVector(rho, u, v, p), equations)
+end
+initial_condition = initial_condition_cavity
+
+# BC types
+velocity_bc_lid = NoSlip((x, t, equations) -> SVector(1.0, 0.0))
+velocity_bc_cavity = NoSlip((x, t, equations) -> SVector(0.0, 0.0))
+heat_bc = Adiabatic((x, t, equations) -> 0.0)
+boundary_condition_lid = BoundaryConditionNavierStokesWall(velocity_bc_lid, heat_bc)
+boundary_condition_cavity = BoundaryConditionNavierStokesWall(velocity_bc_cavity, heat_bc)
+
+# define periodic boundary conditions everywhere
+boundary_conditions = Dict( :x_neg => boundary_condition_slip_wall,
+                            :y_neg => boundary_condition_slip_wall,
+                            :y_pos => boundary_condition_slip_wall,
+                            :x_pos => boundary_condition_slip_wall)
+
+boundary_conditions_parabolic = Dict( :x_neg => boundary_condition_cavity,
+                                      :y_neg => boundary_condition_cavity,
+                                      :y_pos => boundary_condition_lid,
+                                      :x_pos => boundary_condition_cavity)
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic),
+                                             initial_condition, solver;
+                                             boundary_conditions=(boundary_conditions,
+                                                                  boundary_conditions_parabolic))
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span `tspan`
+tspan = (0.0, 25.0)
+ode = semidiscretize(semi, tspan);
+
+summary_callback = SummaryCallback()
+alive_callback = AliveCallback(alive_interval=100)
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+callbacks = CallbackSet(summary_callback, alive_callback)
+
+###############################################################################
+# run the simulation
+
+time_int_tol = 1e-8
+sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
+            ode_default_options()..., callback=callbacks)
+summary_callback() # print the timer summary
+
+
diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
index e73a8cda9b8..73ac47ed1e3 100644
--- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
+++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
@@ -365,6 +365,7 @@ function prolong2interfaces!(cache_parabolic, flux_viscous,
     return nothing
 end
 
+# This version is used for divergence flux computations 
 function calc_interface_flux!(surface_flux_values,
                               mesh::P4estMesh{2}, equations_parabolic,
                               dg::DG, cache_parabolic)
@@ -405,7 +406,7 @@ function calc_interface_flux!(surface_flux_values,
         end
 
         for node in eachnode(dg)
-            # We prolong the viscous flux dotted with respect the outward normal on the
+            # We prolong the viscous flux dotted with respect the outward normal on the 
             # primary element. We assume a BR-1 type of flux.
             viscous_flux_normal_ll, viscous_flux_normal_rr = get_surface_node_vars(cache_parabolic.interfaces.u,
                                                                                    equations_parabolic,
@@ -446,6 +447,7 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
         # a start value and a step size to get the correct face and orientation.
         element = boundaries.neighbor_ids[boundary]
         node_indices = boundaries.node_indices[boundary]
+        direction = indices2direction(node_indices)
 
         i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range)
         j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range)
@@ -454,15 +456,12 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
         j_node = j_node_start
         for i in eachnode(dg)
             # this is the outward normal direction on the primary element
-            normal_direction = get_normal_direction(primary_direction,
-                                                    contravariant_vectors,
-                                                    i_node, j_node, primary_element)
+            normal_direction = get_normal_direction(direction, contravariant_vectors,
+                                                    i_node, j_node, element)
 
             for v in eachvariable(equations_parabolic)
-                flux_viscous = SVector(flux_viscous_x[v, i_primary, j_primary,
-                                                      primary_element],
-                                       flux_viscous_y[v, i_primary, j_primary,
-                                                      primary_element])
+                flux_viscous = SVector(flux_viscous_x[v, i_node, j_node, element],
+                                       flux_viscous_y[v, i_node, j_node, element])
 
                 boundaries.u[v, i, boundary] = dot(flux_viscous, normal_direction)
             end
@@ -470,6 +469,124 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
             j_node += j_node_step
         end
     end
+    return nothing
+end
+
+function calc_boundary_flux_gradients!(cache, t,
+                                       boundary_condition::Union{BoundaryConditionPeriodic,
+                                                                 BoundaryConditionDoNothing
+                                                                 },
+                                       mesh::P4estMesh, equations, surface_integral, dg::DG)
+    @assert isempty(eachboundary(dg, cache))
+end
+
+# Function barrier for type stability
+function calc_boundary_flux_gradients!(cache, t, boundary_conditions, mesh::P4estMesh,
+                                       equations, surface_integral, dg::DG)
+    (; boundary_condition_types, boundary_indices) = boundary_conditions
 
+    calc_boundary_flux_by_type!(cache, t, boundary_condition_types, boundary_indices,
+                                Gradient(), mesh, equations, surface_integral, dg)
     return nothing
 end
+
+function calc_boundary_flux_divergence!(cache, t, boundary_conditions, mesh::P4estMesh,
+                                        equations, surface_integral, dg::DG)
+    (; boundary_condition_types, boundary_indices) = boundary_conditions
+
+    calc_boundary_flux_by_type!(cache, t, boundary_condition_types, boundary_indices,
+                                Divergence(), mesh, equations, surface_integral, dg)
+    return nothing
+end
+
+# Iterate over tuples of boundary condition types and associated indices
+# in a type-stable way using "lispy tuple programming".
+function calc_boundary_flux_by_type!(cache, t, BCs::NTuple{N, Any},
+                                     BC_indices::NTuple{N, Vector{Int}},
+                                     operator_type,
+                                     mesh::P4estMesh,
+                                     equations, surface_integral, dg::DG) where {N}
+    # Extract the boundary condition type and index vector
+    boundary_condition = first(BCs)
+    boundary_condition_indices = first(BC_indices)
+    # Extract the remaining types and indices to be processed later
+    remaining_boundary_conditions = Base.tail(BCs)
+    remaining_boundary_condition_indices = Base.tail(BC_indices)
+
+    # process the first boundary condition type
+    calc_boundary_flux!(cache, t, boundary_condition, boundary_condition_indices,
+                        operator_type, mesh, equations, surface_integral, dg)
+
+    # recursively call this method with the unprocessed boundary types
+    calc_boundary_flux_by_type!(cache, t, remaining_boundary_conditions,
+                                remaining_boundary_condition_indices,
+                                operator_type,
+                                mesh, equations, surface_integral, dg)
+
+    return nothing
+end
+
+# terminate the type-stable iteration over tuples
+function calc_boundary_flux_by_type!(cache, t, BCs::Tuple{}, BC_indices::Tuple{},
+                                     operator_type, mesh::P4estMesh, equations,
+                                     surface_integral, dg::DG)
+    nothing
+end
+
+function calc_boundary_flux!(cache, t,
+                             boundary_condition_parabolic, # works with Dict types
+                             boundary_condition_indices,
+                             operator_type, mesh::P4estMesh{2},
+                             equations_parabolic::AbstractEquationsParabolic,
+                             surface_integral, dg::DG)
+    (; boundaries) = cache
+    (; node_coordinates, surface_flux_values) = cache.elements
+    (; contravariant_vectors) = cache.elements
+    index_range = eachnode(dg)
+
+    @threaded for local_index in eachindex(boundary_condition_indices)
+        # Use the local index to get the global boundary index from the pre-sorted list
+        boundary_index = boundary_condition_indices[local_index]
+
+        # Get information on the adjacent element, compute the surface fluxes,
+        # and store them
+        element = boundaries.neighbor_ids[boundary_index]
+        node_indices = boundaries.node_indices[boundary_index]
+        direction_index = indices2direction(node_indices)
+
+        i_node_start, i_node_step = index_to_start_step_2d(node_indices[1], index_range)
+        j_node_start, j_node_step = index_to_start_step_2d(node_indices[2], index_range)
+
+        i_node = i_node_start
+        j_node = j_node_start
+        for node_index in eachnode(dg)
+            # Extract solution data from boundary container
+            u_inner = get_node_vars(boundaries.u, equations_parabolic, dg, node_index,
+                                    boundary_index)
+
+            # Outward-pointing normal direction (not normalized)
+            normal_direction = get_normal_direction(direction_index, contravariant_vectors,
+                                                    i_node, j_node, element)
+
+            # TODO: revisit if we want more general boundary treatments.
+            # This assumes the gradient numerical flux at the boundary is the gradient variable,
+            # which is consistent with BR1, LDG.
+            flux_inner = u_inner
+
+            # Coordinates at boundary node
+            x = get_node_coords(node_coordinates, equations_parabolic, dg, i_node, j_node,
+                                element)
+
+            flux_ = boundary_condition_parabolic(flux_inner, u_inner, normal_direction,
+                                                 x, t, operator_type, equations_parabolic)
+
+            # Copy flux to element storage in the correct orientation
+            for v in eachvariable(equations_parabolic)
+                surface_flux_values[v, node_index, direction_index, element] = flux_[v]
+            end
+
+            i_node += i_node_step
+            j_node += j_node_step
+        end
+    end
+end
diff --git a/test/test_parabolic_2d.jl b/test/test_parabolic_2d.jl
index b0ac63d4ce9..471b976e990 100644
--- a/test/test_parabolic_2d.jl
+++ b/test/test_parabolic_2d.jl
@@ -200,6 +200,38 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
+  @trixi_testset "P4estMesh2D: elixir_advection_diffusion_periodic_curved.jl" begin
+    @test_trixi_include(joinpath(examples_dir(), "p4est_2d_dgsem", "elixir_advection_diffusion_periodic_curved.jl"),
+      trees_per_dimension = (1, 1), initial_refinement_level = 2, tspan=(0.0, 0.5),
+      l2 = [0.012380458938507371], 
+      linf = [0.10860506906472567]
+    )
+  end
+
+  @trixi_testset "P4estMesh2D: elixir_advection_diffusion_nonperiodic_curved.jl" begin
+    @test_trixi_include(joinpath(examples_dir(), "p4est_2d_dgsem", "elixir_advection_diffusion_nonperiodic_curved.jl"),
+      trees_per_dimension = (1, 1), initial_refinement_level = 2, tspan=(0.0, 0.5),
+      l2 = [0.04933902988507035], 
+      linf = [0.2550261714590271]
+    )
+  end
+
+  @trixi_testset "P4estMesh2D: elixir_navierstokes_convergence.jl" begin
+    @test_trixi_include(joinpath(examples_dir(), "p4est_2d_dgsem", "elixir_navierstokes_convergence.jl"),
+      initial_refinement_level = 1, tspan=(0.0, 0.2), 
+      l2 = [0.0003811978985836709, 0.0005874314969169538, 0.0009142898787923481, 0.0011613918899727263], 
+      linf = [0.0021633623982135752, 0.009484348274135372, 0.004231572066492217, 0.011661660275365193]
+    )
+  end
+
+  @trixi_testset "P4estMesh2D: elixir_navierstokes_lid_driven_cavity.jl" begin
+    @test_trixi_include(joinpath(examples_dir(), "p4est_2d_dgsem", "elixir_navierstokes_lid_driven_cavity.jl"),
+      initial_refinement_level = 2, tspan=(0.0, 0.5),
+      l2 = [0.00028716166408816073, 0.08101204560401647, 0.02099595625377768, 0.05008149754143295], 
+      linf = [0.014804500261322406, 0.9513271652357098, 0.7223919625994717, 1.4846907331004786]
+    )
+  end
+
 end
 
 # Clean up afterwards: delete Trixi.jl output directory

From 87a16931fdcafd488b05d4009d476250f0b98a4f Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Mon, 26 Jun 2023 12:08:58 +0200
Subject: [PATCH 069/163] fix some typos in docstrings of flux splittings
 (#1550)

---
 src/equations/compressible_euler_1d.jl | 6 +++---
 src/equations/compressible_euler_2d.jl | 6 +++---
 src/equations/compressible_euler_3d.jl | 2 +-
 src/equations/inviscid_burgers_1d.jl   | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/equations/compressible_euler_1d.jl b/src/equations/compressible_euler_1d.jl
index f484f26a588..15f7a2cb4c4 100644
--- a/src/equations/compressible_euler_1d.jl
+++ b/src/equations/compressible_euler_1d.jl
@@ -374,7 +374,7 @@ Splitting of the compressible Euler flux of Steger and Warming.
 Returns a tuple of the fluxes "minus" (associated with waves going into the
 negative axis direction) and "plus" (associated with waves going into the
 positive axis direction). If only one of the fluxes is required, use the
-function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
+function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}()`.
 
 !!! warning "Experimental implementation (upwind SBP)"
     This is an experimental feature and may change in future releases.
@@ -462,7 +462,7 @@ it proved the most robust in practice.
 Returns a tuple of the fluxes "minus" (associated with waves going into the
 negative axis direction) and "plus" (associated with waves going into the
 positive axis direction). If only one of the fluxes is required, use the
-function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
+function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}()`.
 
 !!! warning "Experimental implementation (upwind SBP)"
     This is an experimental feature and may change in future releases.
@@ -555,7 +555,7 @@ are to handle flows at the low Mach number limit.
 Returns a tuple of the fluxes "minus" (associated with waves going into the
 negative axis direction) and "plus" (associated with waves going into the
 positive axis direction). If only one of the fluxes is required, use the
-function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
+function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}()`.
 
 !!! warning "Experimental implementation (upwind SBP)"
     This is an experimental feature and may change in future releases.
diff --git a/src/equations/compressible_euler_2d.jl b/src/equations/compressible_euler_2d.jl
index 89f04ef1e05..05987c510b8 100644
--- a/src/equations/compressible_euler_2d.jl
+++ b/src/equations/compressible_euler_2d.jl
@@ -694,7 +694,7 @@ Splitting of the compressible Euler flux of Steger and Warming.
 Returns a tuple of the fluxes "minus" (associated with waves going into the
 negative axis direction) and "plus" (associated with waves going into the
 positive axis direction). If only one of the fluxes is required, use the
-function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
+function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}()`.
 
 !!! warning "Experimental implementation (upwind SBP)"
     This is an experimental feature and may change in future releases.
@@ -826,7 +826,7 @@ it proved the most robust in practice.
 Returns a tuple of the fluxes "minus" (associated with waves going into the
 negative axis direction) and "plus" (associated with waves going into the
 positive axis direction). If only one of the fluxes is required, use the
-function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
+function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}()`.
 
 !!! warning "Experimental implementation (upwind SBP)"
     This is an experimental feature and may change in future releases.
@@ -924,7 +924,7 @@ to Burgers' equation.
 Returns a tuple of the fluxes "minus" (associated with waves going into the
 negative axis direction) and "plus" (associated with waves going into the
 positive axis direction). If only one of the fluxes is required, use the
-function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
+function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}()`.
 
 !!! warning "Experimental implementation (upwind SBP)"
     This is an experimental feature and may change in future releases.
diff --git a/src/equations/compressible_euler_3d.jl b/src/equations/compressible_euler_3d.jl
index cd081cfc42a..2085811f832 100644
--- a/src/equations/compressible_euler_3d.jl
+++ b/src/equations/compressible_euler_3d.jl
@@ -770,7 +770,7 @@ Splitting of the compressible Euler flux of Steger and Warming.
 Returns a tuple of the fluxes "minus" (associated with waves going into the
 negative axis direction) and "plus" (associated with waves going into the
 positive axis direction). If only one of the fluxes is required, use the
-function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
+function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}()`.
 
 !!! warning "Experimental implementation (upwind SBP)"
     This is an experimental feature and may change in future releases.
diff --git a/src/equations/inviscid_burgers_1d.jl b/src/equations/inviscid_burgers_1d.jl
index 6a2cfb6aa8e..f2387f26ba7 100644
--- a/src/equations/inviscid_burgers_1d.jl
+++ b/src/equations/inviscid_burgers_1d.jl
@@ -137,7 +137,7 @@ and `f⁻ = 0.5 (f - λ u)` where `λ = abs(u)`.
 Returns a tuple of the fluxes "minus" (associated with waves going into the
 negative axis direction) and "plus" (associated with waves going into the
 positive axis direction). If only one of the fluxes is required, use the
-function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}`.
+function signature with argument `which` set to `Val{:minus}()` or `Val{:plus}()`.
 
 !!! warning "Experimental implementation (upwind SBP)"
     This is an experimental feature and may change in future releases.

From fdccbb17aaddd31a935ee7560fd4ea506a6d93ad Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 27 Jun 2023 07:43:59 +0200
Subject: [PATCH 070/163] Bump crate-ci/typos from 1.15.1 to 1.15.6 (#1552)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.15.1 to 1.15.6.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.15.1...v1.15.6)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/SpellCheck.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index 75886465f85..93bee1ce4fc 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v3
       - name: Check spelling
-        uses: crate-ci/typos@v1.15.1
+        uses: crate-ci/typos@v1.15.6

From 13b26a3d5c27c6c8040b56a56cb35b66f0e3bb42 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 4 Jul 2023 03:25:15 +0200
Subject: [PATCH 071/163] Bump crate-ci/typos from 1.15.6 to 1.15.10 (#1559)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.15.6 to 1.15.10.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.15.6...v1.15.10)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/SpellCheck.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index 93bee1ce4fc..90bd9366b50 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v3
       - name: Check spelling
-        uses: crate-ci/typos@v1.15.6
+        uses: crate-ci/typos@v1.15.10

From 58fbab4df7bdfaf161fff16214f629a10426532b Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Wed, 5 Jul 2023 16:06:48 +0200
Subject: [PATCH 072/163] fix typo in docs (#1560)

---
 docs/literate/src/files/shock_capturing.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/literate/src/files/shock_capturing.jl b/docs/literate/src/files/shock_capturing.jl
index b165f7ec8bd..afa34cbf06a 100644
--- a/docs/literate/src/files/shock_capturing.jl
+++ b/docs/literate/src/files/shock_capturing.jl
@@ -48,7 +48,7 @@
 # with the total energy $\mathbb{E}=\max\big(\frac{m_N^2}{\sum_{j=0}^N m_j^2}, \frac{m_{N-1}^2}{\sum_{j=0}^{N-1} m_j^2}\big)$,
 # threshold $\mathbb{T}= 0.5 * 10^{-1.8*(N+1)^{1/4}}$ and parameter $s=ln\big(\frac{1-0.0001}{0.0001}\big)\approx 9.21024$.
 
-# For computational efficiency, $\alpha_{min}$ is introduced und used for
+# For computational efficiency, $\alpha_{min}$ is introduced and used for
 # ```math
 # \tilde{\alpha} = \begin{cases}
 # 0, & \text{if } \alpha<\alpha_{min}\\

From 3bd55515a03dac926446cbb8ee41edd21d9baec0 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Sat, 8 Jul 2023 06:16:24 +0200
Subject: [PATCH 073/163] set version to v0.5.31

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 0edba6b681c..81657e868db 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.31-pre"
+version = "0.5.31"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From dc364ebc665c7f0ab74601ba0041618dddbabc18 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Sat, 8 Jul 2023 06:16:45 +0200
Subject: [PATCH 074/163] set development version to v0.5.32-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 81657e868db..828f4778f74 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.31"
+version = "0.5.32-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 766e3f94465f48608c92d1fe91cd46db4c31c362 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 11 Jul 2023 07:14:30 +0200
Subject: [PATCH 075/163] Bump crate-ci/typos from 1.15.10 to 1.16.0 (#1563)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.15.10 to 1.16.0.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.15.10...v1.16.0)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/SpellCheck.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index 90bd9366b50..bb5a32f72ee 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v3
       - name: Check spelling
-        uses: crate-ci/typos@v1.15.10
+        uses: crate-ci/typos@v1.16.0

From 5ff677c1d246e7a500ab845201bc328d1d3bde92 Mon Sep 17 00:00:00 2001
From: Lars Christmann <account-github@l12n.eu>
Date: Tue, 11 Jul 2023 18:53:25 +0200
Subject: [PATCH 076/163] Implement upwind flux for linearized Euler equations
 (#1557)

* Enable input checks for LEE keyword constructor

* Extend LEE implementation to curved meshes

* Implement upwind flux for linearized Euler equations

* Add upwind flux examples and tests

* Fix comments in linearized Euler elixirs

* Clarify LEE Gaussian source elixir

* Rename `flux_upwind` to `flux_godunov`

* Add parentheses around multiline expressions

* Add consistency checks for LEE Godunov flux

* Explain odd mean values in more detail

* Use normalized normal vector to simplify flux

* Add docstring for LEE upwind flux

* Update examples/p4est_2d_dgsem/elixir_linearizedeuler_gaussian_source.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

---------

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
---
 .../elixir_linearizedeuler_gaussian_source.jl |  89 ++++++++
 .../elixir_linearizedeuler_gauss_wall.jl      |  68 ++++++
 src/equations/linearized_euler_2d.jl          | 212 +++++++++++++++++-
 test/test_p4est_2d.jl                         |   5 +
 test/test_tree_2d_linearizedeuler.jl          |   6 +
 test/test_unit.jl                             |  20 ++
 6 files changed, 399 insertions(+), 1 deletion(-)
 create mode 100644 examples/p4est_2d_dgsem/elixir_linearizedeuler_gaussian_source.jl
 create mode 100644 examples/tree_2d_dgsem/elixir_linearizedeuler_gauss_wall.jl

diff --git a/examples/p4est_2d_dgsem/elixir_linearizedeuler_gaussian_source.jl b/examples/p4est_2d_dgsem/elixir_linearizedeuler_gaussian_source.jl
new file mode 100644
index 00000000000..ba2ec827778
--- /dev/null
+++ b/examples/p4est_2d_dgsem/elixir_linearizedeuler_gaussian_source.jl
@@ -0,0 +1,89 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+# Based on the TreeMesh example `elixir_acoustics_gaussian_source.jl`.
+# The acoustic perturbation equations have been replaced with the linearized Euler
+# equations and instead of the Cartesian `TreeMesh` a rotated `P4estMesh` is used
+
+# Oscillating Gaussian-shaped source terms
+function source_terms_gauss(u, x, t, equations::LinearizedEulerEquations2D)
+  r = 0.1
+  A = 1.0
+  f = 2.0
+
+  # Velocity sources
+  s2 = 0.0
+  s3 = 0.0
+  # Density and pressure source
+  s1 = s4 = exp(-(x[1]^2 + x[2]^2) / (2 * r^2)) * A * sin(2 * pi * f * t)
+
+  return SVector(s1, s2, s3, s4)
+end
+
+initial_condition_zero(x, t, equations::LinearizedEulerEquations2D) = SVector(0.0, 0.0, 0.0, 0.0)
+
+###############################################################################
+# semidiscretization of the linearized Euler equations
+
+# Create a domain that is a 30° rotated version of [-3, 3]^2
+c = cospi(2 * 30.0 / 360.0)
+s = sinpi(2 * 30.0 / 360.0)
+rot_mat = Trixi.SMatrix{2, 2}([c -s; s c])
+mapping(xi, eta) = rot_mat * SVector(3.0*xi, 3.0*eta)
+
+# Mean density and speed of sound are slightly off from 1.0 to allow proper verification of
+# curved LEE implementation using this elixir (some things in the LEE cancel if both are 1.0)
+equations = LinearizedEulerEquations2D(v_mean_global=Tuple(rot_mat * SVector(-0.5, 0.25)),
+                                       c_mean_global=1.02, rho_mean_global=1.01)
+
+initial_condition = initial_condition_zero
+
+# Create DG solver with polynomial degree = 3 and upwind flux as surface flux
+solver = DGSEM(polydeg=3, surface_flux=flux_godunov)
+
+# Create a uniformly refined mesh with periodic boundaries
+trees_per_dimension = (4, 4)
+mesh = P4estMesh(trees_per_dimension, polydeg=1,
+                 mapping=mapping,
+                 periodicity=true, initial_refinement_level=2)
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    source_terms=source_terms_gauss)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 2.0
+tspan = (0.0, 2.0)
+ode = semidiscretize(semi, tspan)
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_callback = AnalysisCallback(semi, interval=100)
+
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval=100)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl=0.5)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, stepsize_callback)
+
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep=false, callback=callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/examples/tree_2d_dgsem/elixir_linearizedeuler_gauss_wall.jl b/examples/tree_2d_dgsem/elixir_linearizedeuler_gauss_wall.jl
new file mode 100644
index 00000000000..14fe201a291
--- /dev/null
+++ b/examples/tree_2d_dgsem/elixir_linearizedeuler_gauss_wall.jl
@@ -0,0 +1,68 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linearized Euler equations
+
+equations = LinearizedEulerEquations2D(v_mean_global=(0.5, 0.0), c_mean_global=1.0,
+                                       rho_mean_global=1.0)
+
+# Create DG solver with polynomial degree = 5 and upwind flux as surface flux
+solver = DGSEM(polydeg=5, surface_flux=flux_godunov)
+
+coordinates_min = (-100.0, 0.0) # minimum coordinates (min(x), min(y))
+coordinates_max = (100.0, 200.0) # maximum coordinates (max(x), max(y))
+
+# Create a uniformly refined mesh with periodic boundaries
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=4,
+                n_cells_max=100_000,
+                periodicity=false)
+
+function initial_condition_gauss_wall(x, t, equations::LinearizedEulerEquations2D)
+  v1_prime = 0.0
+  v2_prime = 0.0
+  rho_prime = p_prime = exp(-log(2) * (x[1]^2 + (x[2] - 25)^2) / 25)
+  return SVector(rho_prime, v1_prime, v2_prime, p_prime)
+end
+initial_condition = initial_condition_gauss_wall
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions=boundary_condition_wall)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 30.0
+tspan = (0.0, 30.0)
+ode = semidiscretize(semi, tspan)
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_callback = AnalysisCallback(semi, interval=100)
+
+# The SaveSolutionCallback allows to save the solution to a file in regular intervals
+save_solution = SaveSolutionCallback(interval=100)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl=0.7)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, save_solution, stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep=false, callback=callbacks)
+
+# Print the timer summary
+summary_callback()
diff --git a/src/equations/linearized_euler_2d.jl b/src/equations/linearized_euler_2d.jl
index cd681365cae..e478c32bd29 100644
--- a/src/equations/linearized_euler_2d.jl
+++ b/src/equations/linearized_euler_2d.jl
@@ -53,7 +53,7 @@ end
 
 function LinearizedEulerEquations2D(; v_mean_global::NTuple{2, <:Real},
                                     c_mean_global::Real, rho_mean_global::Real)
-    return LinearizedEulerEquations2D(SVector(v_mean_global), c_mean_global,
+    return LinearizedEulerEquations2D(v_mean_global, c_mean_global,
                                       rho_mean_global)
 end
 
@@ -126,6 +126,24 @@ end
     return SVector(f1, f2, f3, f4)
 end
 
+# Calculate 1D flux for a single point
+@inline function flux(u, normal_direction::AbstractVector,
+                      equations::LinearizedEulerEquations2D)
+    @unpack v_mean_global, c_mean_global, rho_mean_global = equations
+    rho_prime, v1_prime, v2_prime, p_prime = u
+
+    v_mean_normal = v_mean_global[1] * normal_direction[1] +
+                    v_mean_global[2] * normal_direction[2]
+    v_prime_normal = v1_prime * normal_direction[1] + v2_prime * normal_direction[2]
+
+    f1 = v_mean_normal * rho_prime + rho_mean_global * v_prime_normal
+    f2 = v_mean_normal * v1_prime + normal_direction[1] * p_prime / rho_mean_global
+    f3 = v_mean_normal * v2_prime + normal_direction[2] * p_prime / rho_mean_global
+    f4 = v_mean_normal * p_prime + c_mean_global^2 * rho_mean_global * v_prime_normal
+
+    return SVector(f1, f2, f3, f4)
+end
+
 @inline have_constant_speed(::LinearizedEulerEquations2D) = True()
 
 @inline function max_abs_speeds(equations::LinearizedEulerEquations2D)
@@ -143,6 +161,198 @@ end
     end
 end
 
+@inline function max_abs_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::LinearizedEulerEquations2D)
+    @unpack v_mean_global, c_mean_global = equations
+    v_mean_normal = normal_direction[1] * v_mean_global[1] +
+                    normal_direction[2] * v_mean_global[2]
+    return abs(v_mean_normal) + c_mean_global * norm(normal_direction)
+end
+
+@doc raw"""
+    flux_godunov(u_ll, u_rr, orientation_or_normal_direction,
+                 equations::LinearizedEulerEquations2D)
+
+An upwind flux for the linearized Euler equations based on diagonalization of the physical
+flux matrix. Given the physical flux ``Au``, ``A=T \Lambda T^{-1}`` with
+``\Lambda`` being a diagonal matrix that holds the eigenvalues of ``A``, decompose
+``\Lambda = \Lambda^+ + \Lambda^-`` where ``\Lambda^+`` and ``\Lambda^-`` are diagonal
+matrices holding the positive and negative eigenvalues of ``A``, respectively. Then for
+left and right states ``u_L, u_R``, the numerical flux calculated by this function is given
+by ``A^+ u_L + A^- u_R`` where ``A^{\pm} = T \Lambda^{\pm} T^{-1}``.
+
+The diagonalization of the flux matrix can be found in
+- R. F. Warming, Richard M. Beam and B. J. Hyett (1975)
+  Diagonalization and simultaneous symmetrization of the gas-dynamic matrices
+  [DOI: 10.1090/S0025-5718-1975-0388967-5](https://doi.org/10.1090/S0025-5718-1975-0388967-5)
+"""
+@inline function flux_godunov(u_ll, u_rr, orientation::Integer,
+                              equations::LinearizedEulerEquations2D)
+    @unpack v_mean_global, rho_mean_global, c_mean_global = equations
+    v1_mean = v_mean_global[1]
+    v2_mean = v_mean_global[2]
+
+    rho_prime_ll, v1_prime_ll, v2_prime_ll, p_prime_ll = u_ll
+    rho_prime_rr, v1_prime_rr, v2_prime_rr, p_prime_rr = u_rr
+
+    if orientation == 1
+        # Eigenvalues of the flux matrix
+        lambda1 = v1_mean
+        lambda2 = v1_mean - c_mean_global
+        lambda3 = v1_mean + c_mean_global
+
+        lambda1_p = positive_part(lambda1)
+        lambda2_p = positive_part(lambda2)
+        lambda3_p = positive_part(lambda3)
+        lambda2p3_half_p = 0.5 * (lambda2_p + lambda3_p)
+        lambda3m2_half_p = 0.5 * (lambda3_p - lambda2_p)
+
+        lambda1_m = negative_part(lambda1)
+        lambda2_m = negative_part(lambda2)
+        lambda3_m = negative_part(lambda3)
+        lambda2p3_half_m = 0.5 * (lambda2_m + lambda3_m)
+        lambda3m2_half_m = 0.5 * (lambda3_m - lambda2_m)
+
+        f1p = (lambda1_p * rho_prime_ll +
+               lambda3m2_half_p / c_mean_global * rho_mean_global * v1_prime_ll +
+               (lambda2p3_half_p - lambda1_p) / c_mean_global^2 * p_prime_ll)
+        f2p = (lambda2p3_half_p * v1_prime_ll +
+               lambda3m2_half_p / c_mean_global * p_prime_ll / rho_mean_global)
+        f3p = lambda1_p * v2_prime_ll
+        f4p = (lambda3m2_half_p * c_mean_global * rho_mean_global * v1_prime_ll +
+               lambda2p3_half_p * p_prime_ll)
+
+        f1m = (lambda1_m * rho_prime_rr +
+               lambda3m2_half_m / c_mean_global * rho_mean_global * v1_prime_rr +
+               (lambda2p3_half_m - lambda1_m) / c_mean_global^2 * p_prime_rr)
+        f2m = (lambda2p3_half_m * v1_prime_rr +
+               lambda3m2_half_m / c_mean_global * p_prime_rr / rho_mean_global)
+        f3m = lambda1_m * v2_prime_rr
+        f4m = (lambda3m2_half_m * c_mean_global * rho_mean_global * v1_prime_rr +
+               lambda2p3_half_m * p_prime_rr)
+
+        f1 = f1p + f1m
+        f2 = f2p + f2m
+        f3 = f3p + f3m
+        f4 = f4p + f4m
+    else # orientation == 2
+        # Eigenvalues of the flux matrix
+        lambda1 = v2_mean
+        lambda2 = v2_mean - c_mean_global
+        lambda3 = v2_mean + c_mean_global
+
+        lambda1_p = positive_part(lambda1)
+        lambda2_p = positive_part(lambda2)
+        lambda3_p = positive_part(lambda3)
+        lambda2p3_half_p = 0.5 * (lambda2_p + lambda3_p)
+        lambda3m2_half_p = 0.5 * (lambda3_p - lambda2_p)
+
+        lambda1_m = negative_part(lambda1)
+        lambda2_m = negative_part(lambda2)
+        lambda3_m = negative_part(lambda3)
+        lambda2p3_half_m = 0.5 * (lambda2_m + lambda3_m)
+        lambda3m2_half_m = 0.5 * (lambda3_m - lambda2_m)
+
+        f1p = (lambda1_p * rho_prime_ll +
+               lambda3m2_half_p / c_mean_global * rho_mean_global * v2_prime_ll +
+               (lambda2p3_half_p - lambda1_p) / c_mean_global^2 * p_prime_ll)
+        f2p = lambda1_p * v1_prime_ll
+        f3p = (lambda2p3_half_p * v2_prime_ll +
+               lambda3m2_half_p / c_mean_global * p_prime_ll / rho_mean_global)
+        f4p = (lambda3m2_half_p * c_mean_global * rho_mean_global * v2_prime_ll +
+               lambda2p3_half_p * p_prime_ll)
+
+        f1m = (lambda1_m * rho_prime_rr +
+               lambda3m2_half_m / c_mean_global * rho_mean_global * v2_prime_rr +
+               (lambda2p3_half_m - lambda1_m) / c_mean_global^2 * p_prime_rr)
+        f2m = lambda1_m * v1_prime_rr
+        f3m = (lambda2p3_half_m * v2_prime_rr +
+               lambda3m2_half_m / c_mean_global * p_prime_rr / rho_mean_global)
+        f4m = (lambda3m2_half_m * c_mean_global * rho_mean_global * v2_prime_rr +
+               lambda2p3_half_m * p_prime_rr)
+
+        f1 = f1p + f1m
+        f2 = f2p + f2m
+        f3 = f3p + f3m
+        f4 = f4p + f4m
+    end
+
+    return SVector(f1, f2, f3, f4)
+end
+
+@inline function flux_godunov(u_ll, u_rr, normal_direction::AbstractVector,
+                              equations::LinearizedEulerEquations2D)
+    @unpack v_mean_global, rho_mean_global, c_mean_global = equations
+    rho_prime_ll, v1_prime_ll, v2_prime_ll, p_prime_ll = u_ll
+    rho_prime_rr, v1_prime_rr, v2_prime_rr, p_prime_rr = u_rr
+
+    # Do not use `normalize` since we use `norm_` later to scale the eigenvalues
+    norm_ = norm(normal_direction)
+    normal_vector = normal_direction / norm_
+
+    # Use normalized vector here, scaling is applied via eigenvalues of the flux matrix
+    v_mean_normal = v_mean_global[1] * normal_vector[1] +
+                    v_mean_global[2] * normal_vector[2]
+    v_prime_normal_ll = v1_prime_ll * normal_vector[1] + v2_prime_ll * normal_vector[2]
+    v_prime_normal_rr = v1_prime_rr * normal_vector[1] + v2_prime_rr * normal_vector[2]
+
+    # Eigenvalues of the flux matrix
+    lambda1 = v_mean_normal * norm_
+    lambda2 = (v_mean_normal - c_mean_global) * norm_
+    lambda3 = (v_mean_normal + c_mean_global) * norm_
+
+    lambda1_p = positive_part(lambda1)
+    lambda2_p = positive_part(lambda2)
+    lambda3_p = positive_part(lambda3)
+    lambda2p3_half_p = 0.5 * (lambda2_p + lambda3_p)
+    lambda3m2_half_p = 0.5 * (lambda3_p - lambda2_p)
+
+    lambda1_m = negative_part(lambda1)
+    lambda2_m = negative_part(lambda2)
+    lambda3_m = negative_part(lambda3)
+    lambda2p3_half_m = 0.5 * (lambda2_m + lambda3_m)
+    lambda3m2_half_m = 0.5 * (lambda3_m - lambda2_m)
+
+    f1p = (lambda1_p * rho_prime_ll +
+           lambda3m2_half_p / c_mean_global * rho_mean_global * v_prime_normal_ll +
+           (lambda2p3_half_p - lambda1_p) / c_mean_global^2 * p_prime_ll)
+    f2p = (((lambda1_p * normal_vector[2]^2 +
+             lambda2p3_half_p * normal_vector[1]^2) * v1_prime_ll +
+            (lambda2p3_half_p - lambda1_p) * prod(normal_vector) * v2_prime_ll) +
+           lambda3m2_half_p / c_mean_global * normal_vector[1] * p_prime_ll /
+           rho_mean_global)
+    f3p = (((lambda1_p * normal_vector[1]^2 +
+             lambda2p3_half_p * normal_vector[2]^2) * v2_prime_ll +
+            (lambda2p3_half_p - lambda1_p) * prod(normal_vector) * v1_prime_ll) +
+           lambda3m2_half_p / c_mean_global * normal_vector[2] * p_prime_ll /
+           rho_mean_global)
+    f4p = (lambda3m2_half_p * c_mean_global * rho_mean_global * v_prime_normal_ll +
+           lambda2p3_half_p * p_prime_ll)
+
+    f1m = (lambda1_m * rho_prime_rr +
+           lambda3m2_half_m / c_mean_global * rho_mean_global * v_prime_normal_rr +
+           (lambda2p3_half_m - lambda1_m) / c_mean_global^2 * p_prime_rr)
+    f2m = (((lambda1_m * normal_vector[2]^2 +
+             lambda2p3_half_m * normal_vector[1]^2) * v1_prime_rr +
+            (lambda2p3_half_m - lambda1_m) * prod(normal_vector) * v2_prime_rr) +
+           lambda3m2_half_m / c_mean_global * normal_vector[1] * p_prime_rr /
+           rho_mean_global)
+    f3m = (((lambda1_m * normal_vector[1]^2 +
+             lambda2p3_half_m * normal_vector[2]^2) * v2_prime_rr +
+            (lambda2p3_half_m - lambda1_m) * prod(normal_vector) * v1_prime_rr) +
+           lambda3m2_half_m / c_mean_global * normal_vector[2] * p_prime_rr /
+           rho_mean_global)
+    f4m = (lambda3m2_half_m * c_mean_global * rho_mean_global * v_prime_normal_rr +
+           lambda2p3_half_m * p_prime_rr)
+
+    f1 = f1p + f1m
+    f2 = f2p + f2m
+    f3 = f3p + f3m
+    f4 = f4p + f4m
+
+    return SVector(f1, f2, f3, f4)
+end
+
 # Convert conservative variables to primitive
 @inline cons2prim(u, equations::LinearizedEulerEquations2D) = u
 @inline cons2entropy(u, ::LinearizedEulerEquations2D) = u
diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl
index f66664c7a89..c4ce2619e15 100644
--- a/test/test_p4est_2d.jl
+++ b/test/test_p4est_2d.jl
@@ -164,6 +164,11 @@ isdir(outdir) && rm(outdir, recursive=true)
       tspan = (0.0, 0.02))
   end
 
+  @trixi_testset "elixir_linearizedeuler_gaussian_source.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_linearizedeuler_gaussian_source.jl"),
+      l2 = [0.006047938590548741, 0.0040953286019907035, 0.004222698522497298, 0.006269492499336128],
+      linf = [0.06386175207349379, 0.0378926444850457, 0.041759728067967065, 0.06430136016259067])
+  end
 end
 
 # Clean up afterwards: delete Trixi.jl output directory
diff --git a/test/test_tree_2d_linearizedeuler.jl b/test/test_tree_2d_linearizedeuler.jl
index 540b3951212..2c5f6dc2cd1 100644
--- a/test/test_tree_2d_linearizedeuler.jl
+++ b/test/test_tree_2d_linearizedeuler.jl
@@ -13,4 +13,10 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_2d_dgsem")
       linf = [0.0011006084408365924, 0.0005788678074691855, 0.0005788678074701847, 0.0011006084408365924]
     )
   end
+
+  @trixi_testset "elixir_linearizedeuler_gauss_wall.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_linearizedeuler_gauss_wall.jl"),
+      l2 = [0.048185623945503485, 0.01941899333212175, 0.019510224816991825, 0.048185623945503485],
+      linf = [1.0392165942153189, 0.18188777290819994, 0.1877028372108587, 1.0392165942153189])
+  end
 end
diff --git a/test/test_unit.jl b/test/test_unit.jl
index 2156e9bac32..b0c3e4205e5 100644
--- a/test/test_unit.jl
+++ b/test/test_unit.jl
@@ -670,6 +670,26 @@ isdir(outdir) && rm(outdir, recursive=true)
     for normal_direction in normal_directions
       @test flux_godunov(u, u, normal_direction, equation) ≈ flux(u, normal_direction, equation)
     end
+
+    # Linearized Euler 2D
+    equation = LinearizedEulerEquations2D(v_mean_global=(0.5, -0.7), c_mean_global=1.1,
+                                          rho_mean_global=1.2)
+    u_values = [SVector(1.0, 0.5, -0.7, 1.0),
+                SVector(1.5, -0.2, 0.1, 5.0),]
+
+    orientations = [1, 2]
+    for orientation in orientations, u in u_values
+      @test flux_godunov(u, u, orientation, equation) ≈ flux(u, orientation, equation)
+    end
+
+    normal_directions = [SVector(1.0, 0.0),
+                         SVector(0.0, 1.0),
+                         SVector(0.5, -0.5),
+                         SVector(-1.2, 0.3)]
+
+    for normal_direction in normal_directions, u in u_values
+      @test flux_godunov(u, u, normal_direction, equation) ≈ flux(u, normal_direction, equation)
+    end
   end
 
   @timed_testset "Consistency check for Engquist-Osher flux" begin

From 42732dbd09b21c2e0237ba3f004469b94f3d5600 Mon Sep 17 00:00:00 2001
From: Simon Candelaresi <10759273+SimonCan@users.noreply.github.com>
Date: Tue, 11 Jul 2023 22:44:39 +0100
Subject: [PATCH 077/163] Added load_timestep function. (#1528)

* Added load_timestep function.
Corrected time index in restart simulations.

* Changed doc string for load_timestep to clarify that we read the iteration number.

* Added reading function for dt.
Changed restart example elixir such that they use dt from previous simulation.

* Get attribute 'current_filename' when loading an existing mesh.
This fixes issues with converting from hdf5 into vtk
when rerunning a simulation.

* format

* Update make.jl to include restart simulation documentation.

* Create restart.md.

* Added unformatted docs on how to restart a simulation from an old snapshot.

* Completed restart tutorial.

* Fixed a few typos in the docs for restarting a simulation.

* Minor typo.

* Added myself to the contributor list.

* Update docs/src/restart.md

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update docs/src/restart.md

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update docs/src/restart.md

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update docs/src/restart.md

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update docs/src/restart.md

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* Update docs/src/restart.md

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* Update restart.md

Added a few links to the restart documentation.

* Update docs/src/restart.md

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Corrected reference file name.

* Added reference to save solution callback.

---------

Co-authored-by: Hendrik Ranocha <mail@ranocha.de>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 AUTHORS.md                                    |  1 +
 docs/make.jl                                  |  1 +
 docs/src/restart.md                           | 89 +++++++++++++++++++
 .../elixir_advection_restart.jl               | 16 +++-
 .../elixir_advection_restart.jl               | 16 +++-
 .../elixir_advection_restart.jl               | 15 +++-
 .../elixir_advection_restart.jl               | 16 +++-
 .../tree_2d_dgsem/elixir_advection_restart.jl | 16 +++-
 .../tree_3d_dgsem/elixir_advection_restart.jl | 16 +++-
 .../elixir_euler_restart.jl                   | 16 +++-
 src/Trixi.jl                                  |  2 +-
 src/callbacks_step/save_restart.jl            | 22 +++++
 src/meshes/mesh_io.jl                         |  1 +
 13 files changed, 205 insertions(+), 22 deletions(-)
 create mode 100644 docs/src/restart.md

diff --git a/AUTHORS.md b/AUTHORS.md
index 973e311920b..abaa3e7e037 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -24,6 +24,7 @@ are listed in alphabetical order:
 
 * Maximilian D. Bertrand
 * Benjamin Bolm
+* Simon Candelaresi
 * Jesse Chan
 * Lars Christmann
 * Christof Czernik
diff --git a/docs/make.jl b/docs/make.jl
index 5069e4dc49a..57629577ddb 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -92,6 +92,7 @@ makedocs(
         "Getting started" => [
             "Overview" => "overview.md",
             "Visualization" => "visualization.md",
+            "Restart simulation" => "restart.md",
         ],
         "Tutorials" => tutorials,
         "Basic building blocks" => [
diff --git a/docs/src/restart.md b/docs/src/restart.md
new file mode 100644
index 00000000000..d24d93cb297
--- /dev/null
+++ b/docs/src/restart.md
@@ -0,0 +1,89 @@
+# [Restart simulation](@id restart)
+
+You can continue running an already finished simulation by first
+preparing the simulation for the restart and then performing the restart.
+Here we suppose that in the first run your simulation stops at time 1.0
+and then you want it to run further to time 2.0.
+
+## [Prepare the simulation for a restart](@id restart_preparation)
+In you original elixir you need to specify to write out restart files.
+Those will later be read for the restart of your simulation.
+This is done almost the same way as writing the snapshots using the
+[`SaveSolutionCallback`](@ref) callback.
+For the restart files it is called [`SaveRestartCallback`](@ref):
+```julia
+save_restart = SaveRestartCallback(interval=100,
+                                   save_final_restart=true)
+```
+Make this part of your `CallbackSet`.
+
+An example is
+[```examples/examples/structured_2d_dgsem/elixir_advection_extended.jl```](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/structured_2d_dgsem/elixir_advection_extended.jl).
+
+
+## [Perform the simulation restart](@id restart_perform)
+Since all of the information about the simulation can be obtained from the
+last snapshot, the restart can be done with relatively few lines
+in an extra elixir file.
+However, some might prefer to keep everything in one elixir and
+conditionals like ```if restart``` with a boolean variable ```restart``` that is user defined.
+
+First we need to define from which file we want to restart, e.g.
+```julia
+restart_file = "restart_000021.h5"
+restart_filename = joinpath("out", restart_file)
+```
+
+Then we load the mesh file:
+```julia
+mesh = load_mesh(restart_filename)
+```
+
+This is then needed for the semidiscretization:
+```julia
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+```
+
+We then define a new time span for the simulation that takes as starting
+time the one form the snapshot:
+```julia
+tspan = (load_time(restart_filename), 2.0)
+```
+
+We now also take the last ```dt```, so that our solver does not need to first find
+one to fulfill the CFL condition:
+```julia
+dt = load_dt(restart_filename)
+```
+
+The ODE that we will pass to the solver is now:
+```julia
+ode = semidiscretize(semi, tspan, restart_filename)
+```
+
+You should now define a [`SaveSolutionCallback`](@ref) similar to the
+[original simulation](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/structured_2d_dgsem/elixir_advection_extended.jl),
+but with ```save_initial_solution=false```, otherwise our initial snapshot will be overwritten.
+If you are using one file for the original simulation and the restart
+you can reuse your [`SaveSolutionCallback`](@ref), but need to set
+```julia
+save_solution.condition.save_initial_solution = false
+```
+
+Before we compute the solution using 
+[OrdinaryDiffEq.jl](https://github.com/SciML/OrdinaryDiffEq.jl)
+we need to set the integrator
+and its time step number, e.g.:
+```julia
+integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
+                  dt=dt, save_everystep=false, callback=callbacks);
+integrator.iter = load_timestep(restart_filename)
+integrator.stats.naccept = integrator.iter
+```
+
+Now we can compute the solution:
+```julia
+sol = solve!(integrator)
+```
+
+An example is in `[``examples/structured_2d_dgsem/elixir_advection_restart.jl```](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/structured_2d_dgsem/elixir_advection_restart.jl).
diff --git a/examples/p4est_2d_dgsem/elixir_advection_restart.jl b/examples/p4est_2d_dgsem/elixir_advection_restart.jl
index 1906fb2896e..79a35199b83 100644
--- a/examples/p4est_2d_dgsem/elixir_advection_restart.jl
+++ b/examples/p4est_2d_dgsem/elixir_advection_restart.jl
@@ -24,13 +24,23 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                     boundary_conditions=boundary_conditions)
 
 tspan = (load_time(restart_filename), 2.0)
+dt = load_dt(restart_filename)
 ode = semidiscretize(semi, tspan, restart_filename);
 
+# Do not overwrite the initial snapshot written by elixir_advection_extended.jl.
+save_solution.condition.save_initial_solution = false
+
+integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
+                  dt=dt, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep=false, callback=callbacks);
+
+# Get the last time index and work with that.
+integrator.iter = load_timestep(restart_filename)
+integrator.stats.naccept = integrator.iter
+
 
 ###############################################################################
 # run the simulation
 
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
-            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep=false, callback=callbacks);
+sol = solve!(integrator)
 summary_callback() # print the timer summary
diff --git a/examples/p4est_3d_dgsem/elixir_advection_restart.jl b/examples/p4est_3d_dgsem/elixir_advection_restart.jl
index 71b37e9f39b..b27eaab62e2 100644
--- a/examples/p4est_3d_dgsem/elixir_advection_restart.jl
+++ b/examples/p4est_3d_dgsem/elixir_advection_restart.jl
@@ -21,13 +21,23 @@ mesh = load_mesh(restart_filename)
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, solver)
 
 tspan = (load_time(restart_filename), 2.0)
+dt = load_dt(restart_filename)
 ode = semidiscretize(semi, tspan, restart_filename);
 
+# Do not overwrite the initial snapshot written by elixir_advection_extended.jl.
+save_solution.condition.save_initial_solution = false
+
+integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
+                  dt=dt, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep=false, callback=callbacks);
+
+# Get the last time index and work with that.
+integrator.iter = load_timestep(restart_filename)
+integrator.stats.naccept = integrator.iter
+
 
 ###############################################################################
 # run the simulation
 
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
-            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep=false, callback=callbacks);
+sol = solve!(integrator)
 summary_callback() # print the timer summary
diff --git a/examples/structured_2d_dgsem/elixir_advection_restart.jl b/examples/structured_2d_dgsem/elixir_advection_restart.jl
index 2c2a0ef8f51..98c44fac71a 100644
--- a/examples/structured_2d_dgsem/elixir_advection_restart.jl
+++ b/examples/structured_2d_dgsem/elixir_advection_restart.jl
@@ -23,13 +23,22 @@ mesh = load_mesh(restart_filename)
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
 
 tspan = (load_time(restart_filename), 2.0)
+dt = load_dt(restart_filename)
 ode = semidiscretize(semi, tspan, restart_filename);
 
+# Do not overwrite the initial snapshot written by elixir_advection_extended.jl.
+save_solution.condition.save_initial_solution = false
+
+integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
+                 dt=dt, # solve needs some value here but it will be overwritten by the stepsize_callback
+                 save_everystep=false, callback=callbacks);
+
+# Get the last time index and work with that.
+integrator.iter = load_timestep(restart_filename)
+integrator.stats.naccept = integrator.iter
 
 ###############################################################################
 # run the simulation
 
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
-            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep=false, callback=callbacks);
+sol = solve!(integrator)
 summary_callback() # print the timer summary
diff --git a/examples/structured_3d_dgsem/elixir_advection_restart.jl b/examples/structured_3d_dgsem/elixir_advection_restart.jl
index 39e1a675167..39d28848c77 100644
--- a/examples/structured_3d_dgsem/elixir_advection_restart.jl
+++ b/examples/structured_3d_dgsem/elixir_advection_restart.jl
@@ -21,13 +21,23 @@ mesh = load_mesh(restart_filename)
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test, solver)
 
 tspan = (load_time(restart_filename), 2.0)
+dt = load_dt(restart_filename)
 ode = semidiscretize(semi, tspan, restart_filename);
 
+# Do not overwrite the initial snapshot written by elixir_advection_extended.jl.
+save_solution.condition.save_initial_solution = false
+
+integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
+                  dt=dt, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep=false, callback=callbacks);
+
+# Get the last time index and work with that.
+integrator.iter = load_timestep(restart_filename)
+integrator.stats.naccept = integrator.iter
+
 
 ###############################################################################
 # run the simulation
 
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
-            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep=false, callback=callbacks);
+sol = solve!(integrator)
 summary_callback() # print the timer summary
diff --git a/examples/tree_2d_dgsem/elixir_advection_restart.jl b/examples/tree_2d_dgsem/elixir_advection_restart.jl
index 2cb45c0b47e..4ceb5932573 100644
--- a/examples/tree_2d_dgsem/elixir_advection_restart.jl
+++ b/examples/tree_2d_dgsem/elixir_advection_restart.jl
@@ -20,13 +20,23 @@ mesh = load_mesh(restart_filename)
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
 
 tspan = (load_time(restart_filename), 2.0)
+dt = load_dt(restart_filename)
 ode = semidiscretize(semi, tspan, restart_filename);
 
+# Do not overwrite the initial snapshot written by elixir_advection_extended.jl.
+save_solution.condition.save_initial_solution = false
+
+integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
+                  dt=dt, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep=false, callback=callbacks)
+
+# Get the last time index and work with that.
+integrator.iter = load_timestep(restart_filename)
+integrator.stats.naccept = integrator.iter
 
 ###############################################################################
 # run the simulation
 
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
-            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep=false, callback=callbacks);
+sol = solve!(integrator)
+
 summary_callback() # print the timer summary
diff --git a/examples/tree_3d_dgsem/elixir_advection_restart.jl b/examples/tree_3d_dgsem/elixir_advection_restart.jl
index 83bf4418b98..3061f165874 100644
--- a/examples/tree_3d_dgsem/elixir_advection_restart.jl
+++ b/examples/tree_3d_dgsem/elixir_advection_restart.jl
@@ -20,13 +20,23 @@ mesh = load_mesh(restart_filename)
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
 
 tspan = (load_time(restart_filename), 2.0)
+dt = load_dt(restart_filename)
 ode = semidiscretize(semi, tspan, restart_filename);
 
+# Do not overwrite the initial snapshot written by elixir_advection_extended.jl.
+save_solution.condition.save_initial_solution = false
+
+integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
+                  dt=dt, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep=false, callback=callbacks);
+
+# Get the last time index and work with that.
+integrator.iter = load_timestep(restart_filename)
+integrator.stats.naccept = integrator.iter
+
 
 ###############################################################################
 # run the simulation
 
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
-            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep=false, callback=callbacks);
+sol = solve!(integrator)
 summary_callback() # print the timer summary
diff --git a/examples/unstructured_2d_dgsem/elixir_euler_restart.jl b/examples/unstructured_2d_dgsem/elixir_euler_restart.jl
index 2ac67652023..b85cc2c6d70 100644
--- a/examples/unstructured_2d_dgsem/elixir_euler_restart.jl
+++ b/examples/unstructured_2d_dgsem/elixir_euler_restart.jl
@@ -22,14 +22,24 @@ semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
                                     boundary_conditions=boundary_conditions)
 
 tspan = (load_time(restart_filename), 1.0)
+dt = load_dt(restart_filename)
 ode = semidiscretize(semi, tspan, restart_filename);
 
+# Do not overwrite the initial snapshot written by elixir_advection_extended.jl.
+save_solution.condition.save_initial_solution = false
+
+integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
+                  dt=dt, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep=false, callback=callbacks);
+
+# Get the last time index and work with that.
+integrator.iter = load_timestep(restart_filename)
+integrator.stats.naccept = integrator.iter
+
 
 ###############################################################################
 # run the simulation
 
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
-            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep=false, callback=callbacks);
+sol = solve!(integrator)
 summary_callback() # print the timer summary
 
diff --git a/src/Trixi.jl b/src/Trixi.jl
index 66878f4b459..6fc62f50520 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -241,7 +241,7 @@ export SummaryCallback, SteadyStateCallback, AnalysisCallback, AliveCallback,
        GlmSpeedCallback, LBMCollisionCallback, EulerAcousticsCouplingCallback,
        TrivialCallback, AnalysisCallbackCoupled
 
-export load_mesh, load_time
+export load_mesh, load_time, load_timestep, load_dt
 
 export ControllerThreeLevel, ControllerThreeLevelCombined,
        IndicatorLöhner, IndicatorLoehner, IndicatorMax,
diff --git a/src/callbacks_step/save_restart.jl b/src/callbacks_step/save_restart.jl
index e23f58f26ea..f567a5c7fda 100644
--- a/src/callbacks_step/save_restart.jl
+++ b/src/callbacks_step/save_restart.jl
@@ -130,6 +130,28 @@ function load_time(restart_file::AbstractString)
     end
 end
 
+"""
+    load_timestep(restart_file::AbstractString)
+
+Load the time step number (`iter` in OrdinaryDiffEq.jl) saved in a `restart_file`.
+"""
+function load_timestep(restart_file::AbstractString)
+    h5open(restart_file, "r") do file
+        read(attributes(file)["timestep"])
+    end
+end
+
+"""
+    load_dt(restart_file::AbstractString)
+
+Load the time step size (`dt` in OrdinaryDiffEq.jl) saved in a `restart_file`.
+"""
+function load_dt(restart_file::AbstractString)
+    h5open(restart_file, "r") do file
+        read(attributes(file)["dt"])
+    end
+end
+
 function load_restart_file(semi::AbstractSemidiscretization, restart_file)
     load_restart_file(mesh_equations_solver_cache(semi)..., restart_file)
 end
diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index ede85d80106..da67fe23e0e 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -286,6 +286,7 @@ function load_mesh_serial(mesh_file::AbstractString; n_cells_max, RealT)
 
         mesh = StructuredMesh(size, mapping; RealT = RealT, unsaved_changes = false,
                               mapping_as_string = mapping_as_string)
+        mesh.current_filename = mesh_file
     elseif mesh_type == "UnstructuredMesh2D"
         mesh_filename, periodicity_ = h5open(mesh_file, "r") do file
             return read(attributes(file)["mesh_filename"]),

From a191e39b0d6bc75616aa2113b446b68e3b65ee41 Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Thu, 13 Jul 2023 20:28:55 +0200
Subject: [PATCH 078/163] Hll 2 wave improvements non breaking (#1561)

* Add Classical and Naive HLL 2 Wave solver to classic Hyperbolic PDEs

* Format Code

* HLLE wave speeds for SWE

* Fix typos

* Update tests for HLL

* Unit test 1D MHD HLL, HLLE

* Add example for classical HLL 2 wave

* remove plots

* Use lowercase for flux

* Use einfeldt for mhd

* Use hlle for mhd tets

* Missing comma causes failing tests

* Correct bug in SWE 2D Roe eigval comp, unit tests

* format

* Revert "format"

This reverts commit 047a5e75b4a5ee4a0f58a7979d58b26f15f24334.

* format equations

* Add unit tests for HLL naive

* Revert default hll flux

* Rename min_max_speed to min_max_speed_davis and reduce documentation

* Update src/equations/shallow_water_1d.jl: Comments

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Add published resource for Roe averages for SWE

* Add tests for rotation

* Remove breaking portionv from PR

* fix copy paste error

* Lowercase davis

* Update src/equations/numerical_fluxes.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/equations/numerical_fluxes.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/equations/numerical_fluxes.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/equations/numerical_fluxes.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/equations/numerical_fluxes.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/equations/numerical_fluxes.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update test/test_tree_2d_mhd.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/equations/ideal_glm_mhd_1d.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/equations/ideal_glm_mhd_2d.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/equations/ideal_glm_mhd_3d.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update test/test_tree_3d_mhd.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Remove hll_davis test

* Split consistency checks

* Try to resolve conflict with 5ff677c

* Add tests

* More tests

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 examples/dgmulti_2d/elixir_euler_bilinear.jl  |   2 +-
 examples/dgmulti_2d/elixir_euler_curved.jl    |   2 +-
 .../elixir_euler_triangulate_pkg_mesh.jl      |   2 +-
 examples/dgmulti_2d/elixir_euler_weakform.jl  |   2 +-
 .../elixir_euler_weakform_periodic.jl         |   2 +-
 examples/dgmulti_3d/elixir_euler_curved.jl    |   2 +-
 examples/dgmulti_3d/elixir_euler_weakform.jl  |   2 +-
 .../elixir_euler_weakform_periodic.jl         |   2 +-
 src/Trixi.jl                                  |   2 +-
 src/equations/compressible_euler_1d.jl        |  19 +-
 src/equations/compressible_euler_2d.jl        |  43 +-
 src/equations/compressible_euler_3d.jl        |  50 ++-
 src/equations/ideal_glm_mhd_1d.jl             |  24 +-
 src/equations/ideal_glm_mhd_2d.jl             |  67 ++-
 src/equations/ideal_glm_mhd_3d.jl             |  71 ++++
 src/equations/linearized_euler_2d.jl          |  38 ++
 src/equations/numerical_fluxes.jl             |  45 ++-
 src/equations/shallow_water_1d.jl             |  66 ++-
 src/equations/shallow_water_2d.jl             | 147 ++++++-
 test/test_structured_1d.jl                    |   8 +
 test/test_unit.jl                             | 381 +++++++++++++++++-
 21 files changed, 946 insertions(+), 31 deletions(-)

diff --git a/examples/dgmulti_2d/elixir_euler_bilinear.jl b/examples/dgmulti_2d/elixir_euler_bilinear.jl
index beb5c863971..bdd582610ea 100644
--- a/examples/dgmulti_2d/elixir_euler_bilinear.jl
+++ b/examples/dgmulti_2d/elixir_euler_bilinear.jl
@@ -2,7 +2,7 @@
 using Trixi, OrdinaryDiffEq
 
 dg = DGMulti(polydeg = 3, element_type = Quad(), approximation_type = SBP(),
-             surface_integral = SurfaceIntegralWeakForm(FluxHLL()),
+             surface_integral = SurfaceIntegralWeakForm(flux_hll),
              volume_integral = VolumeIntegralFluxDifferencing(flux_ranocha))
 
 equations = CompressibleEulerEquations2D(1.4)
diff --git a/examples/dgmulti_2d/elixir_euler_curved.jl b/examples/dgmulti_2d/elixir_euler_curved.jl
index 4f1d613b247..a3ba62f1cfb 100644
--- a/examples/dgmulti_2d/elixir_euler_curved.jl
+++ b/examples/dgmulti_2d/elixir_euler_curved.jl
@@ -2,7 +2,7 @@
 using Trixi, OrdinaryDiffEq
 
 dg = DGMulti(polydeg = 3, element_type = Quad(), approximation_type = SBP(),
-             surface_integral = SurfaceIntegralWeakForm(FluxHLL()),
+             surface_integral = SurfaceIntegralWeakForm(flux_hll),
              volume_integral = VolumeIntegralFluxDifferencing(flux_ranocha))
 
 equations = CompressibleEulerEquations2D(1.4)
diff --git a/examples/dgmulti_2d/elixir_euler_triangulate_pkg_mesh.jl b/examples/dgmulti_2d/elixir_euler_triangulate_pkg_mesh.jl
index 1f35a11bf8e..c10b5e46a14 100644
--- a/examples/dgmulti_2d/elixir_euler_triangulate_pkg_mesh.jl
+++ b/examples/dgmulti_2d/elixir_euler_triangulate_pkg_mesh.jl
@@ -1,7 +1,7 @@
 using Trixi, OrdinaryDiffEq
 
 dg = DGMulti(polydeg = 3, element_type = Tri(),
-             surface_integral = SurfaceIntegralWeakForm(FluxHLL()),
+             surface_integral = SurfaceIntegralWeakForm(flux_hll),
              volume_integral = VolumeIntegralWeakForm())
 
 equations = CompressibleEulerEquations2D(1.4)
diff --git a/examples/dgmulti_2d/elixir_euler_weakform.jl b/examples/dgmulti_2d/elixir_euler_weakform.jl
index 1ecc666c8db..486a30b37f1 100644
--- a/examples/dgmulti_2d/elixir_euler_weakform.jl
+++ b/examples/dgmulti_2d/elixir_euler_weakform.jl
@@ -2,7 +2,7 @@
 using Trixi, OrdinaryDiffEq
 
 dg = DGMulti(polydeg = 3, element_type = Tri(), approximation_type = Polynomial(),
-             surface_integral = SurfaceIntegralWeakForm(FluxHLL()),
+             surface_integral = SurfaceIntegralWeakForm(flux_hll),
              volume_integral = VolumeIntegralWeakForm())
 
 equations = CompressibleEulerEquations2D(1.4)
diff --git a/examples/dgmulti_2d/elixir_euler_weakform_periodic.jl b/examples/dgmulti_2d/elixir_euler_weakform_periodic.jl
index 48cc8070857..c4c83fff642 100644
--- a/examples/dgmulti_2d/elixir_euler_weakform_periodic.jl
+++ b/examples/dgmulti_2d/elixir_euler_weakform_periodic.jl
@@ -2,7 +2,7 @@
 using Trixi, OrdinaryDiffEq
 
 dg = DGMulti(polydeg = 3, element_type = Tri(), approximation_type = Polynomial(),
-             surface_integral = SurfaceIntegralWeakForm(FluxHLL()),
+             surface_integral = SurfaceIntegralWeakForm(flux_hll),
              volume_integral = VolumeIntegralWeakForm())
 
 equations = CompressibleEulerEquations2D(1.4)
diff --git a/examples/dgmulti_3d/elixir_euler_curved.jl b/examples/dgmulti_3d/elixir_euler_curved.jl
index 339d6ce0186..d8c4df5dd64 100644
--- a/examples/dgmulti_3d/elixir_euler_curved.jl
+++ b/examples/dgmulti_3d/elixir_euler_curved.jl
@@ -2,7 +2,7 @@
 using Trixi, OrdinaryDiffEq
 
 dg = DGMulti(polydeg = 3, element_type = Hex(), approximation_type=SBP(),
-             surface_integral = SurfaceIntegralWeakForm(FluxHLL()),
+             surface_integral = SurfaceIntegralWeakForm(flux_hll),
              volume_integral = VolumeIntegralFluxDifferencing(flux_ranocha))
 
 equations = CompressibleEulerEquations3D(1.4)
diff --git a/examples/dgmulti_3d/elixir_euler_weakform.jl b/examples/dgmulti_3d/elixir_euler_weakform.jl
index 4ad9f045eb6..b167377af51 100644
--- a/examples/dgmulti_3d/elixir_euler_weakform.jl
+++ b/examples/dgmulti_3d/elixir_euler_weakform.jl
@@ -2,7 +2,7 @@
 using Trixi, OrdinaryDiffEq
 
 dg = DGMulti(polydeg = 3, element_type = Tet(),
-             surface_integral = SurfaceIntegralWeakForm(FluxHLL()),
+             surface_integral = SurfaceIntegralWeakForm(flux_hll),
              volume_integral = VolumeIntegralWeakForm())
 
 equations = CompressibleEulerEquations3D(1.4)
diff --git a/examples/dgmulti_3d/elixir_euler_weakform_periodic.jl b/examples/dgmulti_3d/elixir_euler_weakform_periodic.jl
index f554167df90..6b17d4bba65 100644
--- a/examples/dgmulti_3d/elixir_euler_weakform_periodic.jl
+++ b/examples/dgmulti_3d/elixir_euler_weakform_periodic.jl
@@ -2,7 +2,7 @@
 using Trixi, OrdinaryDiffEq
 
 dg = DGMulti(polydeg = 3, element_type = Tet(), approximation_type = Polynomial(),
-             surface_integral = SurfaceIntegralWeakForm(FluxHLL()),
+             surface_integral = SurfaceIntegralWeakForm(flux_hll),
              volume_integral = VolumeIntegralWeakForm())
 
 equations = CompressibleEulerEquations3D(1.4)
diff --git a/src/Trixi.jl b/src/Trixi.jl
index 6fc62f50520..34a1977d4f5 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -164,7 +164,7 @@ export flux, flux_central, flux_lax_friedrichs, flux_hll, flux_hllc, flux_hlle,
        hydrostatic_reconstruction_audusse_etal, flux_nonconservative_audusse_etal,
        FluxPlusDissipation, DissipationGlobalLaxFriedrichs, DissipationLocalLaxFriedrichs,
        FluxLaxFriedrichs, max_abs_speed_naive,
-       FluxHLL, min_max_speed_naive,
+       FluxHLL, min_max_speed_naive, min_max_speed_davis, min_max_speed_einfeldt,
        FluxLMARS,
        FluxRotated,
        flux_shima_etal_turbo, flux_ranocha_turbo,
diff --git a/src/equations/compressible_euler_1d.jl b/src/equations/compressible_euler_1d.jl
index 15f7a2cb4c4..e4fd0997eae 100644
--- a/src/equations/compressible_euler_1d.jl
+++ b/src/equations/compressible_euler_1d.jl
@@ -628,7 +628,7 @@ end
     return SVector(f1m, f2m, f3m)
 end
 
-# Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the
+# Calculate estimates for maximum wave speed for local Lax-Friedrichs-type dissipation as the
 # maximum velocity magnitude plus the maximum speed of sound
 @inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
                                      equations::CompressibleEulerEquations1D)
@@ -648,7 +648,7 @@ end
     λ_max = max(v_mag_ll, v_mag_rr) + max(c_ll, c_rr)
 end
 
-# Calculate minimum and maximum wave speeds for HLL-type fluxes
+# Calculate estimates for minimum and maximum wave speeds for HLL-type fluxes
 @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
                                      equations::CompressibleEulerEquations1D)
     rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations)
@@ -660,6 +660,21 @@ end
     return λ_min, λ_max
 end
 
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerEquations1D)
+    rho_ll, v1_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, p_rr = cons2prim(u_rr, equations)
+
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+
+    λ_min = min(v1_ll - c_ll, v1_rr - c_rr)
+    λ_max = max(v1_ll + c_ll, v1_rr + c_rr)
+
+    return λ_min, λ_max
+end
+
 """
     flux_hllc(u_ll, u_rr, orientation, equations::CompressibleEulerEquations1D)
 
diff --git a/src/equations/compressible_euler_2d.jl b/src/equations/compressible_euler_2d.jl
index 05987c510b8..27b92f41953 100644
--- a/src/equations/compressible_euler_2d.jl
+++ b/src/equations/compressible_euler_2d.jl
@@ -1032,7 +1032,7 @@ end
     return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction)
 end
 
-# Calculate minimum and maximum wave speeds for HLL-type fluxes
+# Calculate estimate for minimum and maximum wave speeds for HLL-type fluxes
 @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
                                      equations::CompressibleEulerEquations2D)
     rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
@@ -1065,6 +1065,47 @@ end
     return λ_min, λ_max
 end
 
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerEquations2D)
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+
+    if orientation == 1 # x-direction
+        λ_min = min(v1_ll - c_ll, v1_rr - c_rr)
+        λ_max = max(v1_ll + c_ll, v1_rr + c_rr)
+    else # y-direction
+        λ_min = min(v2_ll - c_ll, v2_rr - c_rr)
+        λ_max = max(v2_ll + c_ll, v2_rr + c_rr)
+    end
+
+    return λ_min, λ_max
+end
+
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::CompressibleEulerEquations2D)
+    rho_ll, v1_ll, v2_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, p_rr = cons2prim(u_rr, equations)
+
+    norm_ = norm(normal_direction)
+
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll) * norm_
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr) * norm_
+
+    v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
+    v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
+
+    # The v_normals are already scaled by the norm
+    λ_min = min(v_normal_ll - c_ll, v_normal_rr - c_rr)
+    λ_max = max(v_normal_ll + c_ll, v_normal_rr + c_rr)
+
+    return λ_min, λ_max
+end
+
 # Called inside `FluxRotated` in `numerical_fluxes.jl` so the direction
 # has been normalized prior to this rotation of the state vector
 @inline function rotate_to_x(u, normal_vector, equations::CompressibleEulerEquations2D)
diff --git a/src/equations/compressible_euler_3d.jl b/src/equations/compressible_euler_3d.jl
index 2085811f832..7f25bde31fd 100644
--- a/src/equations/compressible_euler_3d.jl
+++ b/src/equations/compressible_euler_3d.jl
@@ -1070,7 +1070,7 @@ end
     return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr) * norm(normal_direction)
 end
 
-# Calculate minimum and maximum wave speeds for HLL-type fluxes
+# Calculate estimates for minimum and maximum wave speeds for HLL-type fluxes
 @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
                                      equations::CompressibleEulerEquations3D)
     rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
@@ -1108,6 +1108,54 @@ end
     return λ_min, λ_max
 end
 
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, orientation::Integer,
+                                     equations::CompressibleEulerEquations3D)
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll)
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr)
+
+    if orientation == 1 # x-direction
+        λ_min = min(v1_ll - c_ll, v1_rr - c_rr)
+        λ_max = max(v1_ll + c_ll, v1_rr + c_rr)
+    elseif orientation == 2 # y-direction
+        λ_min = min(v2_ll - c_ll, v2_rr - c_rr)
+        λ_max = max(v2_ll + c_ll, v2_rr + c_rr)
+    else # z-direction
+        λ_min = min(v3_ll - c_ll, v3_rr - c_rr)
+        λ_max = max(v3_ll + c_ll, v3_rr + c_rr)
+    end
+
+    return λ_min, λ_max
+end
+
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::CompressibleEulerEquations3D)
+    rho_ll, v1_ll, v2_ll, v3_ll, p_ll = cons2prim(u_ll, equations)
+    rho_rr, v1_rr, v2_rr, v3_rr, p_rr = cons2prim(u_rr, equations)
+
+    norm_ = norm(normal_direction)
+
+    c_ll = sqrt(equations.gamma * p_ll / rho_ll) * norm_
+    c_rr = sqrt(equations.gamma * p_rr / rho_rr) * norm_
+
+    v_normal_ll = v1_ll * normal_direction[1] +
+                  v2_ll * normal_direction[2] +
+                  v3_ll * normal_direction[3]
+    v_normal_rr = v1_rr * normal_direction[1] +
+                  v2_rr * normal_direction[2] +
+                  v3_rr * normal_direction[3]
+
+    # The v_normals are already scaled by the norm
+    λ_min = min(v_normal_ll - c_ll, v_normal_rr - c_rr)
+    λ_max = max(v_normal_ll + c_ll, v_normal_rr + c_rr)
+
+    return λ_min, λ_max
+end
+
 # Rotate normal vector to x-axis; normal, tangent1 and tangent2 need to be orthonormal
 # Called inside `FluxRotated` in `numerical_fluxes.jl` so the directions
 # has been normalized prior to this rotation of the state vector
diff --git a/src/equations/ideal_glm_mhd_1d.jl b/src/equations/ideal_glm_mhd_1d.jl
index 4ef593cda53..7e5c94c7bc3 100644
--- a/src/equations/ideal_glm_mhd_1d.jl
+++ b/src/equations/ideal_glm_mhd_1d.jl
@@ -277,13 +277,33 @@ end
     λ_max = max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
 end
 
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, orientation::Integer,
+                                     equations::IdealGlmMhdEquations1D)
+    rho_ll, rho_v1_ll, _ = u_ll
+    rho_rr, rho_v1_rr, _ = u_rr
+
+    # Calculate primitive variables
+    v1_ll = rho_v1_ll / rho_ll
+    v1_rr = rho_v1_rr / rho_rr
+
+    # Approximate the left-most and right-most eigenvalues in the Riemann fan
+    c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+    c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+
+    λ_min = min(v1_ll - c_f_ll, v1_rr - c_f_rr)
+    λ_max = max(v1_ll + c_f_ll, v1_rr + c_f_rr)
+
+    return λ_min, λ_max
+end
+
 """
-    min_max_speed_naive(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations1D)
+    min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations1D)
 
 Calculate minimum and maximum wave speeds for HLL-type fluxes as in
 - Li (2005)
   An HLLC Riemann solver for magneto-hydrodynamics
-  [DOI: 10.1016/j.jcp.2004.08.020](https://doi.org/10.1016/j.jcp.2004.08.020)
+  [DOI: 10.1016/j.jcp.2004.08.020](https://doi.org/10.1016/j.jcp.2004.08.020).
 """
 @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
                                      equations::IdealGlmMhdEquations1D)
diff --git a/src/equations/ideal_glm_mhd_2d.jl b/src/equations/ideal_glm_mhd_2d.jl
index fb3048fe883..8fef1ee22c9 100644
--- a/src/equations/ideal_glm_mhd_2d.jl
+++ b/src/equations/ideal_glm_mhd_2d.jl
@@ -585,13 +585,70 @@ end
     return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
 end
 
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, orientation::Integer,
+                                     equations::IdealGlmMhdEquations2D)
+    rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr
+
+    # Calculate primitive velocity variables
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+
+    # Approximate the left-most and right-most eigenvalues in the Riemann fan
+    if orientation == 1 # x-direction
+        c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+        c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+
+        λ_min = min(v1_ll - c_f_ll, v1_rr - c_f_rr)
+        λ_max = max(v1_ll + c_f_ll, v1_rr + c_f_rr)
+    else # y-direction
+        c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+        c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+
+        λ_min = min(v2_ll - c_f_ll, v2_rr - c_f_rr)
+        λ_max = max(v2_ll + c_f_ll, v1_rr + c_f_rr)
+    end
+
+    return λ_min, λ_max
+end
+
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::IdealGlmMhdEquations2D)
+    rho_ll, rho_v1_ll, rho_v2_ll, _ = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, _ = u_rr
+
+    # Calculate primitive velocity variables
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+
+    v_normal_ll = (v1_ll * normal_direction[1] + v2_ll * normal_direction[2])
+    v_normal_rr = (v1_rr * normal_direction[1] + v2_rr * normal_direction[2])
+
+    c_f_ll = calc_fast_wavespeed(u_ll, normal_direction, equations)
+    c_f_rr = calc_fast_wavespeed(u_rr, normal_direction, equations)
+
+    # Estimate the min/max eigenvalues in the normal direction
+    λ_min = min(v_normal_ll - c_f_ll, v_normal_rr - c_f_rr)
+    λ_max = max(v_normal_ll + c_f_ll, v_normal_rr + c_f_rr)
+
+    return λ_min, λ_max
+end
+
 """
-    min_max_speed_naive(u_ll, u_rr, orientation, equations::IdealGlmMhdEquations2D)
+    min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations::IdealGlmMhdEquations2D)
 
 Calculate minimum and maximum wave speeds for HLL-type fluxes as in
 - Li (2005)
   An HLLC Riemann solver for magneto-hydrodynamics
-  [DOI: 10.1016/j.jcp.2004.08.020](https://doi.org/10.1016/j.jcp.2004.08.020)
+  [DOI: 10.1016/j.jcp.2004.08.020](https://doi.org/10.1016/j.jcp.2004.08.020).
 """
 @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
                                      equations::IdealGlmMhdEquations2D)
@@ -635,10 +692,8 @@ end
     v1_rr = rho_v1_rr / rho_rr
     v2_rr = rho_v2_rr / rho_rr
 
-    v_normal_ll = (v1_ll * normal_direction[1] +
-                   v2_ll * normal_direction[2])
-    v_normal_rr = (v1_rr * normal_direction[1] +
-                   v2_rr * normal_direction[2])
+    v_normal_ll = (v1_ll * normal_direction[1] + v2_ll * normal_direction[2])
+    v_normal_rr = (v1_rr * normal_direction[1] + v2_rr * normal_direction[2])
 
     c_f_ll = calc_fast_wavespeed(u_ll, normal_direction, equations)
     c_f_rr = calc_fast_wavespeed(u_rr, normal_direction, equations)
diff --git a/src/equations/ideal_glm_mhd_3d.jl b/src/equations/ideal_glm_mhd_3d.jl
index 2e149d2849f..09990837706 100644
--- a/src/equations/ideal_glm_mhd_3d.jl
+++ b/src/equations/ideal_glm_mhd_3d.jl
@@ -670,6 +670,77 @@ end
     return max(abs(v_ll), abs(v_rr)) + max(cf_ll, cf_rr)
 end
 
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, orientation::Integer,
+                                     equations::IdealGlmMhdEquations3D)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr
+
+    # Calculate primitive variables and speed of sound
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+
+    # Approximate the left-most and right-most eigenvalues in the Riemann fan
+    if orientation == 1 # x-direction
+        c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+        c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+
+        λ_min = min(v1_ll - c_f_ll, v1_rr - c_f_rr)
+        λ_max = max(v1_ll + c_f_ll, v1_rr + c_f_rr)
+    elseif orientation == 2 # y-direction
+        c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+        c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+
+        λ_min = min(v2_ll - c_f_ll, v2_rr - c_f_rr)
+        λ_max = max(v2_ll + c_f_ll, v2_rr + c_f_rr)
+    else # z-direction
+        c_f_ll = calc_fast_wavespeed(u_ll, orientation, equations)
+        c_f_rr = calc_fast_wavespeed(u_rr, orientation, equations)
+
+        λ_min = min(v3_ll - c_f_ll, v3_rr - c_f_rr)
+        λ_max = max(v3_ll + c_f_ll, v3_rr + c_f_rr)
+    end
+
+    return λ_min, λ_max
+end
+
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::IdealGlmMhdEquations3D)
+    rho_ll, rho_v1_ll, rho_v2_ll, rho_v3_ll, _ = u_ll
+    rho_rr, rho_v1_rr, rho_v2_rr, rho_v3_rr, _ = u_rr
+
+    # Calculate primitive velocity variables
+    v1_ll = rho_v1_ll / rho_ll
+    v2_ll = rho_v2_ll / rho_ll
+    v3_ll = rho_v3_ll / rho_ll
+
+    v1_rr = rho_v1_rr / rho_rr
+    v2_rr = rho_v2_rr / rho_rr
+    v3_rr = rho_v3_rr / rho_rr
+
+    v_normal_ll = (v1_ll * normal_direction[1] +
+                   v2_ll * normal_direction[2] +
+                   v3_ll * normal_direction[3])
+    v_normal_rr = (v1_rr * normal_direction[1] +
+                   v2_rr * normal_direction[2] +
+                   v3_rr * normal_direction[3])
+
+    c_f_ll = calc_fast_wavespeed(u_ll, normal_direction, equations)
+    c_f_rr = calc_fast_wavespeed(u_rr, normal_direction, equations)
+
+    # Estimate the min/max eigenvalues in the normal direction
+    λ_min = min(v_normal_ll - c_f_ll, v_normal_rr - c_f_rr)
+    λ_max = max(v_normal_ll + c_f_ll, v_normal_rr + c_f_rr)
+
+    return λ_min, λ_max
+end
+
 """
     min_max_speed_naive(u_ll, u_rr, orientation_or_normal_direction, equations::IdealGlmMhdEquations3D)
 
diff --git a/src/equations/linearized_euler_2d.jl b/src/equations/linearized_euler_2d.jl
index e478c32bd29..d497762bf62 100644
--- a/src/equations/linearized_euler_2d.jl
+++ b/src/equations/linearized_euler_2d.jl
@@ -353,6 +353,44 @@ end
     return SVector(f1, f2, f3, f4)
 end
 
+# Calculate estimate for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::LinearizedEulerEquations2D)
+    min_max_speed_davis(u_ll, u_rr, orientation, equations)
+end
+
+@inline function min_max_speed_naive(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::LinearizedEulerEquations2D)
+    min_max_speed_davis(u_ll, u_rr, normal_direction, equations)
+end
+
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, orientation::Integer,
+                                     equations::LinearizedEulerEquations2D)
+    @unpack v_mean_global, c_mean_global = equations
+
+    λ_min = v_mean_global[orientation] - c_mean_global
+    λ_max = v_mean_global[orientation] + c_mean_global
+
+    return λ_min, λ_max
+end
+
+@inline function min_max_speed_davis(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::LinearizedEulerEquations2D)
+    @unpack v_mean_global, c_mean_global = equations
+
+    norm_ = norm(normal_direction)
+
+    v_normal = v_mean_global[1] * normal_direction[1] +
+               v_mean_global[2] * normal_direction[2]
+
+    # The v_normals are already scaled by the norm
+    λ_min = v_normal - c_mean_global * norm_
+    λ_max = v_normal + c_mean_global * norm_
+
+    return λ_min, λ_max
+end
+
 # Convert conservative variables to primitive
 @inline cons2prim(u, equations::LinearizedEulerEquations2D) = u
 @inline cons2entropy(u, ::LinearizedEulerEquations2D) = u
diff --git a/src/equations/numerical_fluxes.jl b/src/equations/numerical_fluxes.jl
index 16a83124d14..abd9d66c490 100644
--- a/src/equations/numerical_fluxes.jl
+++ b/src/equations/numerical_fluxes.jl
@@ -214,6 +214,10 @@ Create an HLL (Harten, Lax, van Leer) numerical flux where the minimum and maxim
 wave speeds are estimated as
 `λ_min, λ_max = min_max_speed(u_ll, u_rr, orientation_or_normal_direction, equations)`,
 defaulting to [`min_max_speed_naive`](@ref).
+Original paper:
+- Amiram Harten, Peter D. Lax, Bram van Leer (1983)
+  On Upstream Differencing and Godunov-Type Schemes for Hyperbolic Conservation Laws
+  [DOI: 10.1137/1025002](https://doi.org/10.1137/1025002)
 """
 struct FluxHLL{MinMaxSpeed}
     min_max_speed::MinMaxSpeed
@@ -222,18 +226,55 @@ end
 FluxHLL() = FluxHLL(min_max_speed_naive)
 
 """
-    min_max_speed_naive(u_ll, u_rr, orientation::Integer,   equations)
+    min_max_speed_naive(u_ll, u_rr, orientation::Integer, equations)
     min_max_speed_naive(u_ll, u_rr, normal_direction::AbstractVector, equations)
 
-Simple and fast estimate of the minimal and maximal wave speed of the Riemann problem with
+Simple and fast estimate(!) of the minimal and maximal wave speed of the Riemann problem with
 left and right states `u_ll, u_rr`, usually based only on the local wave speeds associated to
 `u_ll` and `u_rr`.
 - Amiram Harten, Peter D. Lax, Bram van Leer (1983)
   On Upstream Differencing and Godunov-Type Schemes for Hyperbolic Conservation Laws
   [DOI: 10.1137/1025002](https://doi.org/10.1137/1025002)
+
+See also [`FluxHLL`](@ref), [`min_max_speed_davis`](@ref), [`min_max_speed_einfeldt`](@ref).
 """
 function min_max_speed_naive end
 
+"""
+    min_max_speed_davis(u_ll, u_rr, orientation::Integer, equations)
+    min_max_speed_davis(u_ll, u_rr, normal_direction::AbstractVector, equations)
+
+Simple and fast estimates of the minimal and maximal wave speed of the Riemann problem with
+left and right states `u_ll, u_rr`, usually based only on the local wave speeds associated to
+`u_ll` and `u_rr`.
+
+- S.F. Davis (1988)
+  Simplified Second-Order Godunov-Type Methods
+  [DOI: 10.1137/0909030](https://doi.org/10.1137/0909030)
+
+See also [`FluxHLL`](@ref), [`min_max_speed_naive`](@ref), [`min_max_speed_einfeldt`](@ref).
+"""
+function min_max_speed_davis end
+
+"""
+    min_max_speed_einfeldt(u_ll, u_rr, orientation::Integer, equations)
+    min_max_speed_einfeldt(u_ll, u_rr, normal_direction::AbstractVector, equations)
+
+More advanced mininmal and maximal wave speed computation based on
+- Bernd Einfeldt (1988)
+  On Godunov-type methods for gas dynamics.
+  [DOI: 10.1137/0725021](https://doi.org/10.1137/0725021)
+- Bernd Einfeldt, Claus-Dieter Munz, Philip L. Roe and Björn Sjögreen (1991)
+  On Godunov-type methods near low densities.
+  [DOI: 10.1016/0021-9991(91)90211-3](https://doi.org/10.1016/0021-9991(91)90211-3)
+
+originally developed for the compressible Euler equations.
+A compact representation can be found in [this lecture notes, eq. (9.28)](https://metaphor.ethz.ch/x/2019/hs/401-4671-00L/literature/mishra_hyperbolic_pdes.pdf).
+
+See also [`FluxHLL`](@ref), [`min_max_speed_naive`](@ref), [`min_max_speed_davis`](@ref).
+"""
+function min_max_speed_einfeldt end
+
 @inline function (numflux::FluxHLL)(u_ll, u_rr, orientation_or_normal_direction,
                                     equations)
     λ_min, λ_max = numflux.min_max_speed(u_ll, u_rr, orientation_or_normal_direction,
diff --git a/src/equations/shallow_water_1d.jl b/src/equations/shallow_water_1d.jl
index 851cbacdd57..c33b31fca81 100644
--- a/src/equations/shallow_water_1d.jl
+++ b/src/equations/shallow_water_1d.jl
@@ -460,7 +460,7 @@ end
     end
 end
 
-# Calculate minimum and maximum wave speeds for HLL-type fluxes
+# Calculate estimate for minimum and maximum wave speeds for HLL-type fluxes
 @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
                                      equations::ShallowWaterEquations1D)
     h_ll = waterheight(u_ll, equations)
@@ -474,6 +474,41 @@ end
     return λ_min, λ_max
 end
 
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, orientation::Integer,
+                                     equations::ShallowWaterEquations1D)
+    h_ll = waterheight(u_ll, equations)
+    v_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v_rr = velocity(u_rr, equations)
+
+    c_ll = sqrt(equations.gravity * h_ll)
+    c_rr = sqrt(equations.gravity * h_rr)
+
+    λ_min = min(v_ll - c_ll, v_rr - c_rr)
+    λ_max = max(v_rr + c_rr, v_rr + c_rr)
+
+    return λ_min, λ_max
+end
+
+@inline function min_max_speed_einfeldt(u_ll, u_rr, orientation::Integer,
+                                        equations::ShallowWaterEquations1D)
+    h_ll = waterheight(u_ll, equations)
+    v_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v_rr = velocity(u_rr, equations)
+
+    c_ll = sqrt(equations.gravity * h_ll)
+    c_rr = sqrt(equations.gravity * h_rr)
+
+    v_roe, c_roe = calc_wavespeed_roe(u_ll, u_rr, orientation, equations)
+
+    λ_min = min(v_ll - c_ll, v_roe - c_roe)
+    λ_max = max(v_rr + c_rr, v_roe + c_roe)
+
+    return λ_min, λ_max
+end
+
 @inline function max_abs_speeds(u, equations::ShallowWaterEquations1D)
     h = waterheight(u, equations)
     v = velocity(u, equations)
@@ -547,6 +582,35 @@ end
     return waterheight(u, equations) * pressure(u, equations)
 end
 
+"""
+    calc_wavespeed_roe(u_ll, u_rr, direction::Integer,
+                       equations::ShallowWaterEquations1D)
+
+Calculate Roe-averaged velocity `v_roe` and wavespeed `c_roe = sqrt{g * h_roe}`
+See for instance equation (62) in 
+- Paul A. Ullrich, Christiane Jablonowski, and Bram van Leer (2010)
+  High-order finite-volume methods for the shallow-water equations on the sphere
+  [DOI: 10.1016/j.jcp.2010.04.044](https://doi.org/10.1016/j.jcp.2010.04.044)
+Or equation (9.17) in [this lecture notes](https://metaphor.ethz.ch/x/2019/hs/401-4671-00L/literature/mishra_hyperbolic_pdes.pdf).
+"""
+@inline function calc_wavespeed_roe(u_ll, u_rr, direction::Integer,
+                                    equations::ShallowWaterEquations1D)
+    h_ll = waterheight(u_ll, equations)
+    v_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v_rr = velocity(u_rr, equations)
+
+    h_roe = 0.5 * (h_ll + h_rr)
+    c_roe = sqrt(equations.gravity * h_roe)
+
+    h_ll_sqrt = sqrt(h_ll)
+    h_rr_sqrt = sqrt(h_rr)
+
+    v_roe = (h_ll_sqrt * v_ll + h_rr_sqrt * v_rr) / (h_ll_sqrt + h_rr_sqrt)
+
+    return v_roe, c_roe
+end
+
 # Entropy function for the shallow water equations is the total energy
 @inline function entropy(cons, equations::ShallowWaterEquations1D)
     energy_total(cons, equations)
diff --git a/src/equations/shallow_water_2d.jl b/src/equations/shallow_water_2d.jl
index f9ebbd597f9..9e227cd4a77 100644
--- a/src/equations/shallow_water_2d.jl
+++ b/src/equations/shallow_water_2d.jl
@@ -725,7 +725,7 @@ end
     end
 end
 
-# Calculate minimum and maximum wave speeds for HLL-type fluxes
+# Calculate estimates for minimum and maximum wave speeds for HLL-type fluxes
 @inline function min_max_speed_naive(u_ll, u_rr, orientation::Integer,
                                      equations::ShallowWaterEquations2D)
     h_ll = waterheight(u_ll, equations)
@@ -762,6 +762,94 @@ end
     return λ_min, λ_max
 end
 
+# More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
+@inline function min_max_speed_davis(u_ll, u_rr, orientation::Integer,
+                                     equations::ShallowWaterEquations2D)
+    h_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    c_ll = sqrt(equations.gravity * h_ll)
+    c_rr = sqrt(equations.gravity * h_rr)
+
+    if orientation == 1 # x-direction
+        λ_min = min(v1_ll - c_ll, v1_rr - c_rr)
+        λ_max = max(v1_ll + c_ll, v1_rr + c_rr)
+    else # y-direction
+        λ_min = min(v2_ll - c_ll, v2_rr - c_rr)
+        λ_max = max(v2_ll + c_ll, v2_rr + c_rr)
+    end
+
+    return λ_min, λ_max
+end
+
+@inline function min_max_speed_davis(u_ll, u_rr, normal_direction::AbstractVector,
+                                     equations::ShallowWaterEquations2D)
+    h_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    norm_ = norm(normal_direction)
+    c_ll = sqrt(equations.gravity * h_ll) * norm_
+    c_rr = sqrt(equations.gravity * h_rr) * norm_
+
+    v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
+    v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
+
+    # The v_normals are already scaled by the norm
+    λ_min = min(v_normal_ll - c_ll, v_normal_rr - c_rr)
+    λ_max = max(v_normal_ll + c_ll, v_normal_rr + c_rr)
+
+    return λ_min, λ_max
+end
+
+@inline function min_max_speed_einfeldt(u_ll, u_rr, orientation::Integer,
+                                        equations::ShallowWaterEquations2D)
+    h_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    c_ll = sqrt(equations.gravity * h_ll)
+    c_rr = sqrt(equations.gravity * h_rr)
+
+    if orientation == 1 # x-direction
+        v_roe, c_roe = calc_wavespeed_roe(u_ll, u_rr, orientation, equations)
+        λ_min = min(v1_ll - c_ll, v_roe - c_roe)
+        λ_max = max(v1_rr + c_rr, v_roe + c_roe)
+    else # y-direction
+        v_roe, c_roe = calc_wavespeed_roe(u_ll, u_rr, orientation, equations)
+        λ_min = min(v2_ll - c_ll, v_roe - c_roe)
+        λ_max = max(v2_rr + c_rr, v_roe + c_roe)
+    end
+
+    return λ_min, λ_max
+end
+
+@inline function min_max_speed_einfeldt(u_ll, u_rr, normal_direction::AbstractVector,
+                                        equations::ShallowWaterEquations2D)
+    h_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    norm_ = norm(normal_direction)
+
+    c_ll = sqrt(equations.gravity * h_ll) * norm_
+    c_rr = sqrt(equations.gravity * h_rr) * norm_
+
+    v_normal_ll = (v1_ll * normal_direction[1] + v2_ll * normal_direction[2])
+    v_normal_rr = (v1_rr * normal_direction[1] + v2_rr * normal_direction[2])
+
+    v_roe, c_roe = calc_wavespeed_roe(u_ll, u_rr, normal_direction, equations)
+    λ_min = min(v_normal_ll - c_ll, v_roe - c_roe)
+    λ_max = max(v_normal_rr + c_rr, v_roe + c_roe)
+
+    return λ_min, λ_max
+end
+
 @inline function max_abs_speeds(u, equations::ShallowWaterEquations2D)
     h = waterheight(u, equations)
     v1, v2 = velocity(u, equations)
@@ -837,6 +925,63 @@ end
     return waterheight(u, equations) * pressure(u, equations)
 end
 
+"""
+    calc_wavespeed_roe(u_ll, u_rr, direction::Integer,
+                       equations::ShallowWaterEquations2D)
+
+Calculate Roe-averaged velocity `v_roe` and wavespeed `c_roe = sqrt{g * h_roe}` depending on direction.
+See for instance equation (62) in 
+- Paul A. Ullrich, Christiane Jablonowski, and Bram van Leer (2010)
+  High-order finite-volume methods for the shallow-water equations on the sphere
+  [DOI: 10.1016/j.jcp.2010.04.044](https://doi.org/10.1016/j.jcp.2010.04.044)
+Or [this slides](https://faculty.washington.edu/rjl/classes/am574w2011/slides/am574lecture20nup3.pdf), 
+slides 8 and 9.
+"""
+@inline function calc_wavespeed_roe(u_ll, u_rr, orientation::Integer,
+                                    equations::ShallowWaterEquations2D)
+    h_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    h_roe = 0.5 * (h_ll + h_rr)
+    c_roe = sqrt(equations.gravity * h_roe)
+
+    h_ll_sqrt = sqrt(h_ll)
+    h_rr_sqrt = sqrt(h_rr)
+
+    if orientation == 1 # x-direction
+        v_roe = (h_ll_sqrt * v1_ll + h_rr_sqrt * v1_rr) / (h_ll_sqrt + h_rr_sqrt)
+    else # y-direction
+        v_roe = (h_ll_sqrt * v2_ll + h_rr_sqrt * v2_rr) / (h_ll_sqrt + h_rr_sqrt)
+    end
+
+    return v_roe, c_roe
+end
+
+@inline function calc_wavespeed_roe(u_ll, u_rr, normal_direction::AbstractVector,
+                                    equations::ShallowWaterEquations2D)
+    h_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    norm_ = norm(normal_direction)
+
+    h_roe = 0.5 * (h_ll + h_rr)
+    c_roe = sqrt(equations.gravity * h_roe) * norm_
+
+    h_ll_sqrt = sqrt(h_ll)
+    h_rr_sqrt = sqrt(h_rr)
+
+    v1_roe = (h_ll_sqrt * v1_ll + h_rr_sqrt * v1_rr) / (h_ll_sqrt + h_rr_sqrt)
+    v2_roe = (h_ll_sqrt * v2_ll + h_rr_sqrt * v2_rr) / (h_ll_sqrt + h_rr_sqrt)
+
+    v_roe = (v1_roe * normal_direction[1] + v2_roe * normal_direction[2])
+
+    return v_roe, c_roe
+end
+
 # Entropy function for the shallow water equations is the total energy
 @inline function entropy(cons, equations::ShallowWaterEquations2D)
     energy_total(cons, equations)
diff --git a/test/test_structured_1d.jl b/test/test_structured_1d.jl
index ec8c7a138d5..d280e2a5e01 100644
--- a/test/test_structured_1d.jl
+++ b/test/test_structured_1d.jl
@@ -39,6 +39,14 @@ isdir(outdir) && rm(outdir, recursive=true)
       tspan = (0.0, 0.3))
   end
 
+  @trixi_testset "elixir_euler_sedov_hll_davis.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov.jl"),
+      l2   = [1.278661029299215, 0.0663853410742763, 0.9585741943783386],
+      linf = [3.1661064228547255, 0.16256363944708607, 2.667676158812806],
+      tspan = (0.0, 12.5),
+      surface_flux = FluxHLL(min_max_speed_davis))
+  end
+
   @trixi_testset "elixir_euler_source_terms.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_source_terms.jl"),
       # Expected errors are exactly the same as with TreeMesh!
diff --git a/test/test_unit.jl b/test/test_unit.jl
index b0c3e4205e5..2ce111b2bf4 100644
--- a/test/test_unit.jl
+++ b/test/test_unit.jl
@@ -382,7 +382,7 @@ isdir(outdir) && rm(outdir, recursive=true)
   @timed_testset "HLL flux with vanishing wave speed estimates (#502)" begin
     equations = CompressibleEulerEquations1D(1.4)
     u = SVector(1.0, 0.0, 0.0)
-    @test !any(isnan, FluxHLL()(u, u, 1, equations))
+    @test !any(isnan, flux_hll(u, u, 1, equations))
   end
 
   @timed_testset "DG L2 mortar container debug output" begin
@@ -586,7 +586,265 @@ isdir(outdir) && rm(outdir, recursive=true)
     @test_throws ArgumentError TimeSeriesCallback(semi, [1.0 1.0 1.0; 2.0 2.0 2.0])
   end
 
-  @timed_testset "Consistency check for HLLE flux" begin
+  @timed_testset "Consistency check for HLL flux (naive): CEE" begin
+    flux_hll = FluxHLL(min_max_speed_naive)
+
+    # Set up equations and dummy conservative variables state
+    equations = CompressibleEulerEquations1D(1.4)
+    u = SVector(1.1, 2.34, 5.5)
+
+    orientations = [1]
+    for orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    equations = CompressibleEulerEquations2D(1.4)
+    u = SVector(1.1, -0.5, 2.34, 5.5)
+
+    orientations = [1, 2]
+    for orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    equations = CompressibleEulerEquations3D(1.4)
+    u = SVector(1.1, -0.5, 2.34, 2.4, 5.5)
+
+    orientations = [1, 2, 3]
+    for orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+  end
+
+  @timed_testset "Consistency check for HLL flux (naive): LEE" begin
+    flux_hll = FluxHLL(min_max_speed_naive)
+
+    equations = LinearizedEulerEquations2D(SVector(1.0, 1.0), 1.0, 1.0)
+    u = SVector(1.1, -0.5, 2.34, 5.5)
+
+    orientations = [1, 2]
+    for orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    normal_directions = [SVector(1.0, 0.0),
+                         SVector(0.0, 1.0),
+                         SVector(0.5, -0.5),
+                         SVector(-1.2, 0.3)]
+
+    for normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end                         
+  end
+
+  @timed_testset "Consistency check for HLL flux (naive): SWE" begin
+    flux_hll = FluxHLL(min_max_speed_naive)
+
+    equations = ShallowWaterEquations1D(gravity_constant=9.81)
+    u = SVector(1, 0.5, 0.0)
+    @test flux_hll(u, u, 1, equations) ≈ flux(u, 1, equations)
+
+    equations = ShallowWaterEquations2D(gravity_constant=9.81)
+    normal_directions = [SVector(1.0, 0.0),
+                         SVector(0.0, 1.0),
+                         SVector(0.5, -0.5),
+                         SVector(-1.2, 0.3)]
+    u = SVector(1, 0.5, 0.5, 0.0)
+    for normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end
+  end
+
+  @timed_testset "Consistency check for HLL flux (naive): MHD" begin
+    flux_hll = FluxHLL(min_max_speed_naive)
+
+    equations = IdealGlmMhdEquations1D(1.4)
+    u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1),
+                SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2),]
+
+    for u in u_values
+      @test flux_hll(u, u, 1, equations) ≈ flux(u, 1, equations)
+    end
+
+    equations = IdealGlmMhdEquations2D(1.4, 5.0 #= c_h =#)
+    normal_directions = [SVector(1.0, 0.0),
+                          SVector(0.0, 1.0),
+                          SVector(0.5, -0.5),
+                          SVector(-1.2, 0.3)]
+    orientations = [1, 2]                      
+
+    u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1, 0.0),
+                SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2, 0.2),]
+
+    for u in u_values, orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    for u in u_values, normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end
+
+    equations = IdealGlmMhdEquations3D(1.4, 5.0 #= c_h =#)
+    normal_directions = [SVector(1.0, 0.0, 0.0),
+                        SVector(0.0, 1.0, 0.0),
+                        SVector(0.0, 0.0, 1.0),
+                        SVector(0.5, -0.5, 0.2),
+                        SVector(-1.2, 0.3, 1.4)]
+    orientations = [1, 2, 3]
+
+    u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1, 0.0),
+                SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2, 0.2),]
+
+    for u in u_values, orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    for u in u_values, normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end                  
+  end
+
+  @timed_testset "Consistency check for HLL flux with Davis wave speed estimates: CEE" begin
+    flux_hll = FluxHLL(min_max_speed_davis)
+
+    # Set up equations and dummy conservative variables state
+    equations = CompressibleEulerEquations1D(1.4)
+    u = SVector(1.1, 2.34, 5.5)
+
+    orientations = [1]
+    for orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+  
+    equations = CompressibleEulerEquations2D(1.4)
+    u = SVector(1.1, -0.5, 2.34, 5.5)
+
+    orientations = [1, 2]
+    for orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    normal_directions = [SVector(1.0, 0.0),
+                         SVector(0.0, 1.0),
+                         SVector(0.5, -0.5),
+                         SVector(-1.2, 0.3)]
+
+    for normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end    
+
+    equations = CompressibleEulerEquations3D(1.4)
+    u = SVector(1.1, -0.5, 2.34, 2.4, 5.5)
+
+    orientations = [1, 2, 3]
+    for orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    normal_directions = [SVector(1.0, 0.0, 0.0),
+                        SVector(0.0, 1.0, 0.0),
+                        SVector(0.0, 0.0, 1.0),
+                        SVector(0.5, -0.5, 0.2),
+                        SVector(-1.2, 0.3, 1.4)]
+
+    for normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end                        
+  end
+
+  @timed_testset "Consistency check for HLL flux with Davis wave speed estimates: LEE" begin
+    flux_hll = FluxHLL(min_max_speed_davis)
+
+    equations = LinearizedEulerEquations2D(SVector(1.0, 1.0), 1.0, 1.0)
+    u = SVector(1.1, -0.5, 2.34, 5.5)
+
+    orientations = [1, 2]
+    for orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    normal_directions = [SVector(1.0, 0.0),
+                         SVector(0.0, 1.0),
+                         SVector(0.5, -0.5),
+                         SVector(-1.2, 0.3)]
+
+    for normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end
+  end
+
+  @timed_testset "Consistency check for HLL flux with Davis wave speed estimates: SWE" begin
+    flux_hll = FluxHLL(min_max_speed_davis)
+
+    equations = ShallowWaterEquations1D(gravity_constant=9.81)
+    u = SVector(1, 0.5, 0.0)
+    @test flux_hll(u, u, 1, equations) ≈ flux(u, 1, equations)
+
+    equations = ShallowWaterEquations2D(gravity_constant=9.81)
+    normal_directions = [SVector(1.0, 0.0),
+                         SVector(0.0, 1.0),
+                         SVector(0.5, -0.5),
+                         SVector(-1.2, 0.3)]
+    u = SVector(1, 0.5, 0.5, 0.0)
+    for normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end
+
+    orientations = [1, 2]
+    for orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+  end
+
+  @timed_testset "Consistency check for HLL flux with Davis wave speed estimates: MHD" begin
+    flux_hll = FluxHLL(min_max_speed_davis)
+
+    equations = IdealGlmMhdEquations1D(1.4)
+    u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1),
+                SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2),]
+
+    for u in u_values
+      @test flux_hll(u, u, 1, equations) ≈ flux(u, 1, equations)
+    end
+
+    equations = IdealGlmMhdEquations2D(1.4, 5.0 #= c_h =#)
+    normal_directions = [SVector(1.0, 0.0),
+                          SVector(0.0, 1.0),
+                          SVector(0.5, -0.5),
+                          SVector(-1.2, 0.3)]
+    orientations = [1, 2]                      
+
+    u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1, 0.0),
+                SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2, 0.2),]
+
+    for u in u_values, orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    for u in u_values, normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end
+
+    equations = IdealGlmMhdEquations3D(1.4, 5.0 #= c_h =#)
+    normal_directions = [SVector(1.0, 0.0, 0.0),
+                        SVector(0.0, 1.0, 0.0),
+                        SVector(0.0, 0.0, 1.0),
+                        SVector(0.5, -0.5, 0.2),
+                        SVector(-1.2, 0.3, 1.4)]
+    orientations = [1, 2, 3]
+
+    u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1, 0.0),
+                SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2, 0.2),]
+
+    for u in u_values, orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    for u in u_values, normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end                  
+  end
+
+  @timed_testset "Consistency check for HLLE flux: CEE" begin
     # Set up equations and dummy conservative variables state
     equations = CompressibleEulerEquations1D(1.4)
     u = SVector(1.1, 2.34, 5.5)
@@ -604,6 +862,15 @@ isdir(outdir) && rm(outdir, recursive=true)
       @test flux_hlle(u, u, orientation, equations) ≈ flux(u, orientation, equations)
     end
 
+    normal_directions = [SVector(1.0, 0.0),
+                          SVector(0.0, 1.0),
+                          SVector(0.5, -0.5),
+                          SVector(-1.2, 0.3)]
+
+    for normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end                          
+
     equations = CompressibleEulerEquations3D(1.4)
     u = SVector(1.1, -0.5, 2.34, 2.4, 5.5)
 
@@ -611,6 +878,92 @@ isdir(outdir) && rm(outdir, recursive=true)
     for orientation in orientations
       @test flux_hlle(u, u, orientation, equations) ≈ flux(u, orientation, equations)
     end
+
+    normal_directions = [SVector(1.0, 0.0, 0.0),
+                        SVector(0.0, 1.0, 0.0),
+                        SVector(0.0, 0.0, 1.0),
+                        SVector(0.5, -0.5, 0.2),
+                        SVector(-1.2, 0.3, 1.4)]
+
+    for normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end                            
+  end
+
+  @timed_testset "Consistency check for HLLE flux: SWE" begin
+    # Test HLL flux with min_max_speed_einfeldt
+    flux_hll = FluxHLL(min_max_speed_einfeldt)
+
+    equations = ShallowWaterEquations1D(gravity_constant=9.81)
+    u = SVector(1, 0.5, 0.0)
+    @test flux_hll(u, u, 1, equations) ≈ flux(u, 1, equations)
+
+    equations = ShallowWaterEquations2D(gravity_constant=9.81)
+    normal_directions = [SVector(1.0, 0.0),
+                         SVector(0.0, 1.0),
+                         SVector(0.5, -0.5),
+                         SVector(-1.2, 0.3)]
+    orientations = [1, 2]                           
+
+    u = SVector(1, 0.5, 0.5, 0.0)
+
+    for orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    for normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end
+  end
+
+  @timed_testset "Consistency check for HLLE flux: MHD" begin
+    # Test HLL flux with min_max_speed_einfeldt
+    flux_hll = FluxHLL(min_max_speed_naive)
+
+    equations = IdealGlmMhdEquations1D(1.4)
+    u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1),
+                SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2),]
+
+    for u in u_values
+      @test flux_hll(u, u, 1, equations) ≈ flux(u, 1, equations)
+    end
+
+    equations = IdealGlmMhdEquations2D(1.4, 5.0 #= c_h =#)
+    normal_directions = [SVector(1.0, 0.0),
+                          SVector(0.0, 1.0),
+                          SVector(0.5, -0.5),
+                          SVector(-1.2, 0.3)]
+    orientations = [1, 2]  
+
+    u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1, 0.0),
+                SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2, 0.2),]
+
+    for u in u_values, orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    for u in u_values, normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end
+
+    equations = IdealGlmMhdEquations3D(1.4, 5.0 #= c_h =#)
+    normal_directions = [SVector(1.0, 0.0, 0.0),
+                        SVector(0.0, 1.0, 0.0),
+                        SVector(0.0, 0.0, 1.0),
+                        SVector(0.5, -0.5, 0.2),
+                        SVector(-1.2, 0.3, 1.4)]
+    orientations = [1, 2, 3]                        
+
+    u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1, 0.0),
+                SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2, 0.2),]
+
+    for u in u_values, orientation in orientations
+      @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
+    end
+
+    for u in u_values, normal_direction in normal_directions
+      @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
+    end                  
   end
 
   @timed_testset "Consistency check for Godunov flux" begin
@@ -780,7 +1133,8 @@ isdir(outdir) && rm(outdir, recursive=true)
                            SVector(-1.2, 0.3)]
       u_values = [SVector(1.0, 0.5, -0.7, 1.0),
                   SVector(1.5, -0.2, 0.1, 5.0),]
-      fluxes = [flux_central, flux_ranocha, flux_shima_etal, flux_kennedy_gruber]
+      fluxes = [flux_central, flux_ranocha, flux_shima_etal, flux_kennedy_gruber, 
+                flux_hll, FluxHLL(min_max_speed_davis)]
 
       for f_std in fluxes
         f_rot = FluxRotated(f_std)
@@ -799,7 +1153,8 @@ isdir(outdir) && rm(outdir, recursive=true)
                           SVector(-1.2, 0.3, 1.4)]
       u_values = [SVector(1.0, 0.5, -0.7, 0.1, 1.0),
                   SVector(1.5, -0.2, 0.1, 0.2, 5.0),]
-      fluxes = [flux_central, flux_ranocha, flux_shima_etal, flux_kennedy_gruber, FluxLMARS(340)]
+      fluxes = [flux_central, flux_ranocha, flux_shima_etal, flux_kennedy_gruber, FluxLMARS(340), 
+                flux_hll, FluxHLL(min_max_speed_davis)]
 
       for f_std in fluxes
         f_rot = FluxRotated(f_std)
@@ -809,6 +1164,20 @@ isdir(outdir) && rm(outdir, recursive=true)
       end
     end
 
+    @timed_testset "ShallowWaterEquations2D" begin
+      equations = ShallowWaterEquations2D(gravity_constant=9.81)
+      normal_directions = [SVector(1.0, 0.0),
+                          SVector(0.0, 1.0),
+                          SVector(0.5, -0.5),
+                          SVector(-1.2, 0.3)]                       
+
+      u = SVector(1, 0.5, 0.5, 0.0)
+
+      fluxes = [flux_central, flux_fjordholm_etal, flux_wintermeyer_etal, 
+                flux_hll, FluxHLL(min_max_speed_davis), FluxHLL(min_max_speed_einfeldt)]
+
+    end
+
     @timed_testset "IdealGlmMhdEquations2D" begin
       equations = IdealGlmMhdEquations2D(1.4, 5.0 #= c_h =#)
       normal_directions = [SVector(1.0, 0.0),
@@ -817,7 +1186,7 @@ isdir(outdir) && rm(outdir, recursive=true)
                            SVector(-1.2, 0.3)]
       u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1, 0.0),
                   SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2, 0.2),]
-      fluxes = [flux_central, flux_hindenlang_gassner]
+      fluxes = [flux_central, flux_hindenlang_gassner, flux_hll, FluxHLL(min_max_speed_davis)]
 
       for f_std in fluxes
         f_rot = FluxRotated(f_std)
@@ -836,7 +1205,7 @@ isdir(outdir) && rm(outdir, recursive=true)
                           SVector(-1.2, 0.3, 1.4)]
       u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1, 0.0),
                   SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2, 0.2),]
-      fluxes = [flux_central, flux_hindenlang_gassner]
+      fluxes = [flux_central, flux_hindenlang_gassner, flux_hll, FluxHLL(min_max_speed_davis)]
 
       for f_std in fluxes
         f_rot = FluxRotated(f_std)

From dd91d7ed7fe99f437d8d0261cf7f9c43eb32c95b Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Fri, 14 Jul 2023 07:33:12 +0200
Subject: [PATCH 079/163] set version to v0.5.32

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 828f4778f74..f3ede1c74b4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.32-pre"
+version = "0.5.32"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 0816ed0b62679bcd656dc38bad68034843632ba1 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Fri, 14 Jul 2023 07:33:26 +0200
Subject: [PATCH 080/163] set development version to v0.5.33-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index f3ede1c74b4..4a289380850 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.32"
+version = "0.5.33-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 905c8e29ef30bdb2648fa3073166ad0887cd1278 Mon Sep 17 00:00:00 2001
From: Andrew Winters <andrew.ross.winters@liu.se>
Date: Fri, 14 Jul 2023 17:47:20 +0200
Subject: [PATCH 081/163] Merge wet/dry capability to `main` (#1501)

* add dummy commit in order to open a dev to main PR

* [WIP] Wet/dry capabilities for 2D shallow water equations (#1340)

* HR of Chen and Noelle (1D) and edit SWE struct

* Overload limiter (SWE 1D) to cut off waterheight

* New indicatorHG (SWE 1D) to apply FV on dry cells

* Threshold in rhs! before calculation (SWE 1D)

* New lake_at_rest_error for SWE 1D

* New wet/dry elixirs for testing scheme for SWE 1D

* HR of Chen and Noelle (2D) and edit SWE struct

* Overload limiter (SWE 2D) to cut off waterheight

* New indicatorHG (SWE 2D) to apply FV on dry cells

* Threshold in rhs! before calculation (SWE 2D)

* New lake_at_rest_error for SWE 2D

* New wet/dry elixirs for testing scheme for SWE 2D

* Elixir SWE 2D: 3 mounds, problem with boundaries

* Fixed MethodError; apply_thresholds! too strict

* Fixed MethodError; apply_thresholds! too strict

* Move threshold on volume integral in stage_limiter

* Indentation, spacing and comments adjustment

* Renaming numerical HLL type flux (SWE 1D)

* Move threshold on volume integral in stage_limiter

* Renaming numerical HLL type flux (SWE 2D)

* Indentation, spacing and comments adjustment

* Describing docs for Chen and Noelle HR (SWE 1D)

* Edit SWE 1D elixirs, error-based solver and docs

* Including tests on new SWE 1D elixirs

* Describing docs for Chen and Noelle HR (SWE 2D)

* Edit SWE 2D elixirs, error-based solver and docs

* Including tests on new SWE 2D elixirs

* New/reorganize positivity limiter (SWE 2D)

* New/reorganize positivity limiter (SWE 1D)

* Editing docs SWE 1D

* Editing docs SWE 2D

* Rearrange cut off at interfaces, edit tests SWE 1D

* Edit docs, add Ref

* Edit docs and indenting (SWE 2D)

* Rearrange cut off at interfaces, edit tests SWE 2D

* Remove tree/structured mesh elixir from repo SWE2D

* Create unstructured mesh elixir SWE 2D

* Add 1D lake-at-rest-error logic to pass 1D tests

* Add 2D lake-at-rest-error logic to pass 2D tests

* Fixed typo. Confusing name, but correct math

* Correction of comments and docstrings

* Correction of comments and docstrings

* Rename mesh file in elixir for UnstructuredMesh

* Update test_unstructured_2d.jl

forgot an end statement for the new test

* Fixing typos

* fix dispatching error on new lake-at-rest error calculation. See if this fixes broken tests

* Editing initial condition in parabolic bowl elixir

* Delete unnecessary variable in elixir

* adjust lake-at-rest error computation strategy. move specialized version of error into the wet-dry elixir as the new functionality was only needed in this speacial case. update corresponding test values as the bottom is now truly discontinuous

* update structured mesh version of the wet-dry well-balancedness test

* fix typos

* update values in parabolic bowl test on StructuredMesh

* update parabolic bowl test on TreeMesh

* revert the 1D computation of the lake-at-rest error to the standard way. This will change once the 1D wet/dry merges

* Reset lake-at-rest error computation strategy.
New version of error only in wet-dry elixir (special case)
Update test values as the bottom is now truly discontinuous

* Fix typo

* Shorten test run for parabolic bowl 1D

* Choose lower resolution for parabolic bowl
and update test values

* Further reduce resolution for parabolic bowl
and update test values

* adjust special initial conditions and well-balancedness error routines to avoid the need of element IDs

* Remove MPI from well-balanced test

* simplify workaround to set discontinuous initial data

* Simplify workaround to set discontinuity

* Change structure of Chen&Noelle flux

* Fix typos and indenting

* Adjust call of solve and use ode_default_options

* Edit docstring

* Replace boolean with if, remove set_node_vars
Shorten test runs on TreeMesh and UnstructuredMesh

* Change structure of Chen&Noelle flux

* Fix typos and indenting

* Adjust call of solve and use ode_default_options

* Edit docstring

* Replace boolean with if, remove set_node_vars
Shorten test runs on TreeMesh and UnstructuredMesh

* Update comment regarding H0 for lake-at-rest error

* Add the original source to the parabolic bowl test

* Update comment regarding H0 for lake-at-rest error

* Add the original source to the parabolic bowl test

* New sc indicator especially for SWE

* Remove threshold parameter from SWE limiter call

* update some docstrings

* remove type instability in positivty limiter

* typo fix

* move safety check for dry state in the new positivity limiter into the same element loop

* more docstring updates

* remove dummy comment added in the dev initial commit

* adjust default threshold values to be precision agnostic

* update comment on the default threshold value in the new TreeMesh elixirs

* update comments for the three new TreeMesh examples

* update IC comment for three mound test

* update IC comments for new StructuredMesh2D tests

* update comment on shallow water constructor

* adjust comments in the shallow_water_2d file

* adjust comment regarding threshold_limiter in the new elixirs

* fix typos found by SpellCheck

* Edit docs

* Import Printf macros for printing wb error

* Remove type instability in Chen & Noelle HR

* Change logic for setting SC indicator to one

* Change logic for default values of SWE struct

* Outsource HG shock capturing indicator for SWE
Create different function to compute indicator
Edit comments
Change wet/dry clipping to if-else logic

* Move limiterthreshold into function & edit docs
Threshold was a passed variable in elixir before.
Now, it is taken right from the SWE struct in the limiter
Edit docs

* Move new limiter safety check in same element loop

* Adjust default threshold values

* Remove type instability

* Import Printf package for terminal output

* Edit docs

* Add Printf package to the test/Project.toml
Used for printing lake-at-rest error in well-balancedness test

* Add Printf package to the test/Project.toml
Used for printing lake-at-rest error in well-balancedness test

* Typo fix in elixir_shallowwater_well_balanced_wet_dry.jl

* Typo fix in elixir_shallowwater_well_balanced_wet_dry.jl

* unify new code with required formatting

* fix weird formatting and add 'format: noindent' where missing. fix crashing structured mesh run

* add unit test for new show routine

* apply JuliaFormatter

* simplify elixir as we can set discontinuous ICs in 1D. Also update beach test values

* dummy commit to check push access

* remove dummy comment

* typo fix

---------

Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* adjust comments and remove duplicate code

* add TODOs for code pieces that should move to TrixiShallowWater package

* remove accidentally added file

* apply formatter to avoid errors with new comments

* move TODO comments to avoid errors in Documentation build

* Apply suggestions from code review

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* remove unnecessary analysis quantities from several new elixirs

* rename local threshold variable in new indicator to avoid confusion

* update NEWS.md with wetting and drying feature

* fix fomartting issue from conflict resolution

---------

Co-authored-by: svengoldberg <102215246+svengoldberg@users.noreply.github.com>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 NEWS.md                                       |   1 +
 .../elixir_shallowwater_conical_island.jl     | 113 ++++++++
 .../elixir_shallowwater_parabolic_bowl.jl     | 119 ++++++++
 ...ixir_shallowwater_well_balanced_wet_dry.jl | 200 +++++++++++++
 .../elixir_shallowwater_beach.jl              | 121 ++++++++
 .../elixir_shallowwater_parabolic_bowl.jl     | 117 ++++++++
 ...ixir_shallowwater_well_balanced_wet_dry.jl | 165 +++++++++++
 .../elixir_shallowwater_conical_island.jl     | 116 ++++++++
 .../elixir_shallowwater_parabolic_bowl.jl     | 120 ++++++++
 ...ixir_shallowwater_well_balanced_wet_dry.jl | 198 +++++++++++++
 ...ixir_shallowwater_three_mound_dam_break.jl | 139 +++++++++
 src/Trixi.jl                                  |   9 +-
 src/callbacks_stage/callbacks_stage.jl        |   2 +
 .../positivity_shallow_water.jl               |  89 ++++++
 .../positivity_shallow_water_dg1d.jl          |  89 ++++++
 .../positivity_shallow_water_dg2d.jl          |  90 ++++++
 src/equations/numerical_fluxes.jl             |  23 ++
 src/equations/shallow_water_1d.jl             | 192 ++++++++++++-
 src/equations/shallow_water_2d.jl             | 270 +++++++++++++++++-
 src/equations/shallow_water_two_layer_1d.jl   |   2 +
 src/equations/shallow_water_two_layer_2d.jl   |  96 ++++---
 src/solvers/dgsem_tree/indicators.jl          |  73 ++++-
 src/solvers/dgsem_tree/indicators_1d.jl       | 109 +++++++
 src/solvers/dgsem_tree/indicators_2d.jl       | 110 +++++++
 test/Project.toml                             |   1 +
 test/test_structured_2d.jl                    |  25 +-
 test/test_tree_1d_shallowwater.jl             |  23 ++
 test/test_tree_1d_shallowwater_twolayer.jl    |   2 +
 test/test_tree_2d_shallowwater.jl             |  24 ++
 test/test_tree_2d_shallowwater_twolayer.jl    |  20 +-
 test/test_unit.jl                             |   4 +
 test/test_unstructured_2d.jl                  |  18 ++
 32 files changed, 2608 insertions(+), 72 deletions(-)
 create mode 100644 examples/structured_2d_dgsem/elixir_shallowwater_conical_island.jl
 create mode 100644 examples/structured_2d_dgsem/elixir_shallowwater_parabolic_bowl.jl
 create mode 100644 examples/structured_2d_dgsem/elixir_shallowwater_well_balanced_wet_dry.jl
 create mode 100644 examples/tree_1d_dgsem/elixir_shallowwater_beach.jl
 create mode 100644 examples/tree_1d_dgsem/elixir_shallowwater_parabolic_bowl.jl
 create mode 100644 examples/tree_1d_dgsem/elixir_shallowwater_well_balanced_wet_dry.jl
 create mode 100644 examples/tree_2d_dgsem/elixir_shallowwater_conical_island.jl
 create mode 100644 examples/tree_2d_dgsem/elixir_shallowwater_parabolic_bowl.jl
 create mode 100644 examples/tree_2d_dgsem/elixir_shallowwater_well_balanced_wet_dry.jl
 create mode 100644 examples/unstructured_2d_dgsem/elixir_shallowwater_three_mound_dam_break.jl
 create mode 100644 src/callbacks_stage/positivity_shallow_water.jl
 create mode 100644 src/callbacks_stage/positivity_shallow_water_dg1d.jl
 create mode 100644 src/callbacks_stage/positivity_shallow_water_dg2d.jl

diff --git a/NEWS.md b/NEWS.md
index 35c7039b2ef..8e374d9ce99 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -10,6 +10,7 @@ for human readability.
 
 - Experimental support for 3D parabolic diffusion terms has been added.
 - Capability to set truly discontinuous initial conditions in 1D.
+- Wetting and drying feature and examples for 1D and 2D shallow water equations
 
 #### Changed
 
diff --git a/examples/structured_2d_dgsem/elixir_shallowwater_conical_island.jl b/examples/structured_2d_dgsem/elixir_shallowwater_conical_island.jl
new file mode 100644
index 00000000000..44bc7a12b35
--- /dev/null
+++ b/examples/structured_2d_dgsem/elixir_shallowwater_conical_island.jl
@@ -0,0 +1,113 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+ ###############################################################################
+ # Semidiscretization of the shallow water equations
+#
+# TODO: TrixiShallowWater: wet/dry example elixir
+
+equations = ShallowWaterEquations2D(gravity_constant=9.81, H0=1.4)
+
+"""
+    initial_condition_conical_island(x, t, equations::ShallowWaterEquations2D)
+
+Initial condition for the [`ShallowWaterEquations2D`](@ref) to test the [`hydrostatic_reconstruction_chen_noelle`](@ref)
+and its handling of discontinuous water heights at the start in combination with wetting and
+drying. The bottom topography is given by a conical island in the middle of the domain. Around that
+island, there is a cylindrical water column at t=0 and the rest of the domain is dry. This
+discontinuous water height is smoothed by a logistic function. This simulation uses periodic
+boundary conditions.
+"""
+function initial_condition_conical_island(x, t, equations::ShallowWaterEquations2D)
+  # Set the background values
+
+  v1 = 0.0
+  v2 = 0.0
+
+  x1, x2 = x
+  b = max(0.1, 1.0 - 4.0 * sqrt(x1^2 + x2^2))
+
+  # use a logistic function to transfer water height value smoothly
+  L  = equations.H0    # maximum of function
+  x0 = 0.3   # center point of function
+  k  = -25.0 # sharpness of transfer
+
+  H = max(b, L/(1.0 + exp(-k*(sqrt(x1^2+x2^2) - x0))))
+
+  # It is mandatory to shift the water level at dry areas to make sure the water height h
+  # stays positive. The system would not be stable for h set to a hard 0 due to division by h in
+  # the computation of velocity, e.g., (h v1) / h. Therefore, a small dry state threshold
+  # with a default value of 500*eps() ≈ 1e-13 in double precision, is set in the constructor above
+  # for the ShallowWaterEquations and added to the initial condition if h = 0.
+  # This default value can be changed within the constructor call depending on the simulation setup.
+  H = max(H, b + equations.threshold_limiter)
+  return prim2cons(SVector(H, v1, v2, b), equations)
+end
+
+initial_condition = initial_condition_conical_island
+
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
+surface_flux = (FluxHydrostaticReconstruction(flux_hll_chen_noelle, hydrostatic_reconstruction_chen_noelle),
+                flux_nonconservative_chen_noelle)
+
+basis = LobattoLegendreBasis(4)
+
+indicator_sc = IndicatorHennemannGassnerShallowWater(equations, basis,
+                                                     alpha_max=0.5,
+                                                     alpha_min=0.001,
+                                                     alpha_smooth=true,
+                                                     variable=waterheight_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg=volume_flux,
+                                                 volume_flux_fv=surface_flux)
+
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+###############################################################################
+# Get the StructuredMesh and setup a periodic mesh
+
+coordinates_min = (-1.0, -1.0)
+coordinates_max = (1.0,  1.0)
+
+cells_per_dimension = (16, 16)
+
+mesh = StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max)
+
+# Create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solver
+
+tspan = (0.0, 10.0)
+ode = semidiscretize(semi, tspan)
+
+###############################################################################
+# Callbacks
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(interval=100,
+                                     save_initial_solution=true,
+                                     save_final_solution=true)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution)
+
+###############################################################################
+# run the simulation
+
+stage_limiter! = PositivityPreservingLimiterShallowWater(variables=(Trixi.waterheight,))
+
+sol = solve(ode, SSPRK43(stage_limiter!);
+            ode_default_options()..., callback=callbacks);
+
+summary_callback() # print the timer summary
\ No newline at end of file
diff --git a/examples/structured_2d_dgsem/elixir_shallowwater_parabolic_bowl.jl b/examples/structured_2d_dgsem/elixir_shallowwater_parabolic_bowl.jl
new file mode 100644
index 00000000000..15cfe6698fc
--- /dev/null
+++ b/examples/structured_2d_dgsem/elixir_shallowwater_parabolic_bowl.jl
@@ -0,0 +1,119 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# Semidiscretization of the shallow water equations
+#
+# TODO: TrixiShallowWater: wet/dry example elixir
+
+equations = ShallowWaterEquations2D(gravity_constant=9.81)
+
+"""
+    initial_condition_parabolic_bowl(x, t, equations:: ShallowWaterEquations2D)
+
+Well-known initial condition to test the [`hydrostatic_reconstruction_chen_noelle`](@ref) and its
+wet-dry mechanics. This test has an analytical solution. The initial condition is defined by the
+analytical solution at time t=0. The bottom topography defines a bowl and the water level is given
+by an oscillating lake.
+
+The original test and its analytical solution were first presented in
+- William C. Thacker (1981)
+  Some exact solutions to the nonlinear shallow-water wave equations
+  [DOI: 10.1017/S0022112081001882](https://doi.org/10.1017/S0022112081001882).
+
+The particular setup below is taken from Section 6.2 of
+- Niklas Wintermeyer, Andrew R. Winters, Gregor J. Gassner and Timothy Warburton (2018)
+  An entropy stable discontinuous Galerkin method for the shallow water equations on
+  curvilinear meshes with wet/dry fronts accelerated by GPUs
+  [DOI: 10.1016/j.jcp.2018.08.038](https://doi.org/10.1016/j.jcp.2018.08.038).
+"""
+function initial_condition_parabolic_bowl(x, t, equations:: ShallowWaterEquations2D)
+  a = 1.0
+  h_0 = 0.1
+  sigma = 0.5
+  ω = sqrt(2 * equations.gravity * h_0) / a
+
+  v1 = -sigma * ω * sin(ω * t)
+  v2 = sigma * ω * cos(ω * t)
+
+  b = h_0 * ((x[1])^2 + (x[2])^2) / a^2
+
+  H = sigma * h_0 / a^2 * (2 * x[1] * cos(ω * t) + 2 * x[2] * sin(ω * t) - sigma) + h_0
+
+  # It is mandatory to shift the water level at dry areas to make sure the water height h
+  # stays positive. The system would not be stable for h set to a hard 0 due to division by h in
+  # the computation of velocity, e.g., (h v1) / h. Therefore, a small dry state threshold
+  # with a default value of 500*eps() ≈ 1e-13 in double precision, is set in the constructor above
+  # for the ShallowWaterEquations and added to the initial condition if h = 0.
+  # This default value can be changed within the constructor call depending on the simulation setup.
+  H = max(H, b + equations.threshold_limiter)
+  return prim2cons(SVector(H, v1, v2, b), equations)
+end
+
+initial_condition = initial_condition_parabolic_bowl
+
+
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
+surface_flux = (FluxHydrostaticReconstruction(flux_hll_chen_noelle, hydrostatic_reconstruction_chen_noelle),
+                flux_nonconservative_chen_noelle)
+
+basis = LobattoLegendreBasis(4)
+
+indicator_sc = IndicatorHennemannGassnerShallowWater(equations, basis,
+                                                     alpha_max=0.6,
+                                                     alpha_min=0.001,
+                                                     alpha_smooth=true,
+                                                     variable=waterheight_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg=volume_flux,
+                                                 volume_flux_fv=surface_flux)
+
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+
+###############################################################################
+
+coordinates_min = (-2.0, -2.0)
+coordinates_max = (2.0, 2.0)
+
+cells_per_dimension = (150, 150)
+
+mesh = StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max)
+
+# create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=false,
+                                     extra_analysis_integrals=(energy_kinetic,
+                                                               energy_internal))
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(interval=100,
+                                     save_initial_solution=true,
+                                     save_final_solution=true)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution)
+
+stage_limiter! = PositivityPreservingLimiterShallowWater(variables=(Trixi.waterheight,))
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, SSPRK43(stage_limiter!);
+            ode_default_options()..., callback=callbacks);
+
+summary_callback() # print the timer summary
diff --git a/examples/structured_2d_dgsem/elixir_shallowwater_well_balanced_wet_dry.jl b/examples/structured_2d_dgsem/elixir_shallowwater_well_balanced_wet_dry.jl
new file mode 100644
index 00000000000..b18b02e0b4c
--- /dev/null
+++ b/examples/structured_2d_dgsem/elixir_shallowwater_well_balanced_wet_dry.jl
@@ -0,0 +1,200 @@
+
+using OrdinaryDiffEq
+using Trixi
+using Printf: @printf, @sprintf
+
+###############################################################################
+# Semidiscretization of the shallow water equations
+#
+# TODO: TrixiShallowWater: wet/dry example elixir
+
+
+equations = ShallowWaterEquations2D(gravity_constant=9.812)
+
+"""
+    initial_condition_well_balanced_chen_noelle(x, t, equations:: ShallowWaterEquations2D)
+
+Initial condition with a complex (discontinuous) bottom topography to test the well-balanced
+property for the [`hydrostatic_reconstruction_chen_noelle`](@ref) including dry areas within the
+domain. The errors from the analysis callback are not important but the error for this
+lake-at-rest test case `∑|H0-(h+b)|` should be around machine roundoff.
+
+The initial condition is taken from Section 5.2 of the paper:
+- Guoxian Chen and Sebastian Noelle (2017)
+  A new hydrostatic reconstruction scheme based on subcell reconstructions
+  [DOI:10.1137/15M1053074](https://dx.doi.org/10.1137/15M1053074)
+"""
+function initial_condition_complex_bottom_well_balanced(x, t, equations:: ShallowWaterEquations2D)
+  v1 = 0
+  v2 = 0
+  b = sin(4 * pi * x[1]) + 3
+
+  if x[1] >= 0.5
+    b = sin(4 * pi * x[1]) + 1
+  end
+
+  H = max(b, 2.5)
+
+  if x[1] >= 0.5
+    H = max(b, 1.5)
+  end
+
+  # It is mandatory to shift the water level at dry areas to make sure the water height h
+  # stays positive. The system would not be stable for h set to a hard 0 due to division by h in
+  # the computation of velocity, e.g., (h v1) / h. Therefore, a small dry state threshold
+  # with a default value of 500*eps() ≈ 1e-13 in double precision, is set in the constructor above
+  # for the ShallowWaterEquations and added to the initial condition if h = 0.
+  # This default value can be changed within the constructor call depending on the simulation setup.
+  H = max(H, b + equations.threshold_limiter)
+  return prim2cons(SVector(H, v1, v2, b), equations)
+end
+
+initial_condition = initial_condition_complex_bottom_well_balanced
+
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
+
+surface_flux = (FluxHydrostaticReconstruction(flux_hll_chen_noelle, hydrostatic_reconstruction_chen_noelle),
+                flux_nonconservative_chen_noelle)
+
+basis = LobattoLegendreBasis(3)
+
+indicator_sc = IndicatorHennemannGassnerShallowWater(equations, basis,
+                                                     alpha_max=0.5,
+                                                     alpha_min=0.001,
+                                                     alpha_smooth=true,
+                                                     variable=waterheight_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg=volume_flux,
+                                                 volume_flux_fv=surface_flux)
+
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+
+###############################################################################
+# Create the StructuredMesh for the domain [0, 1]^2
+
+coordinates_min = (0.0, 0.0)
+coordinates_max = (1.0, 1.0)
+
+cells_per_dimension = (16, 16)
+
+mesh = StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max)
+
+
+# create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 10.0)
+ode = semidiscretize(semi, tspan)
+
+###############################################################################
+# Workaround to set a discontinuous water and bottom topography for
+# debugging and testing. Essentially, this is a slight augmentation of the
+# `compute_coefficients` where the `x` node value passed here is slightly
+# perturbed to the left / right in order to set a true discontinuity that avoids
+# the doubled value of the LGL nodes at a particular element interface.
+#
+# Note! The errors from the analysis callback are not important but the error
+# for this lake at rest test case `∑|H0-(h+b)|` should be near machine roundoff.
+
+# point to the data we want to augment
+u = Trixi.wrap_array(ode.u0, semi)
+# reset the initial condition
+for element in eachelement(semi.solver, semi.cache)
+  for j in eachnode(semi.solver), i in eachnode(semi.solver)
+    x_node = Trixi.get_node_coords(semi.cache.elements.node_coordinates, equations, semi.solver, i, j, element)
+    # We know that the discontinuity is a vertical line. Slightly augment the x value by a factor
+    # of unit roundoff to avoid the repeted value from the LGL nodes at at interface.
+    if i == 1
+      x_node = SVector(nextfloat(x_node[1]) , x_node[2])
+    elseif i == nnodes(semi.solver)
+      x_node = SVector(prevfloat(x_node[1]) , x_node[2])
+    end
+    u_node = initial_condition_complex_bottom_well_balanced(x_node, first(tspan), equations)
+    Trixi.set_node_vars!(u, u_node, equations, semi.solver, i, j, element)
+  end
+end
+
+###############################################################################
+# Callbacks
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=false)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(interval=1000,
+                                     save_initial_solution=true,
+                                     save_final_solution=true)
+
+stepsize_callback = StepsizeCallback(cfl=1.0)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution, stepsize_callback)
+
+stage_limiter! = PositivityPreservingLimiterShallowWater(variables=(Trixi.waterheight,))
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, SSPRK43(stage_limiter!); dt=1.0,
+            ode_default_options()..., callback=callbacks, adaptive=false);
+
+summary_callback() # print the timer summary
+
+###############################################################################
+# Workaround to compute the well-balancedness error for this particular problem
+# that has two reference water heights. One for a lake to the left of the
+# discontinuous bottom topography `H0_upper = 2.5` and another for a lake to the
+# right of the discontinuous bottom topography `H0_lower = 1.5`.
+
+# Declare a special version of the function to compute the lake-at-rest error
+# OBS! The reference water height values are hardcoded for convenience.
+function lake_at_rest_error_two_level(u, x, equations::ShallowWaterEquations2D)
+    h, _, _, b = u
+
+  # For well-balancedness testing with possible wet/dry regions the reference
+  # water height `H0` accounts for the possibility that the bottom topography
+  # can emerge out of the water as well as for the threshold offset to avoid
+  # division by a "hard" zero water heights as well.
+  if x[1] < 0.5
+    H0_wet_dry = max( 2.5 , b + equations.threshold_limiter )
+  else
+    H0_wet_dry = max( 1.5 , b + equations.threshold_limiter )
+  end
+
+  return abs(H0_wet_dry - (h + b))
+end
+
+# point to the data we want to analyze
+u = Trixi.wrap_array(sol[end], semi)
+# Perform the actual integration of the well-balancedness error over the domain
+l1_well_balance_error = Trixi.integrate_via_indices(u, mesh, equations, semi.solver, semi.cache; normalize=true) do u, i, j, element, equations, solver
+  x_node = Trixi.get_node_coords(semi.cache.elements.node_coordinates, equations, solver, i, j, element)
+  # We know that the discontinuity is a vertical line. Slightly augment the x value by a factor
+  # of unit roundoff to avoid the repeted value from the LGL nodes at at interface.
+  if i == 1
+    x_node = SVector(nextfloat(x_node[1]) , x_node[2])
+  elseif i == nnodes(semi.solver)
+    x_node = SVector(prevfloat(x_node[1]) , x_node[2])
+  end
+  u_local = Trixi.get_node_vars(u, equations, solver, i, j, element)
+  return lake_at_rest_error_two_level(u_local, x_node, equations)
+end
+
+# report the well-balancedness lake-at-rest error to the screen
+println("─"^100)
+println(" Lake-at-rest error for '", Trixi.get_name(equations), "' with ", summary(solver),
+                  " at final time " * @sprintf("%10.8e", tspan[end]))
+
+@printf(" %-12s:", Trixi.pretty_form_utf(lake_at_rest_error))
+@printf("  % 10.8e", l1_well_balance_error)
+println()
+println("─"^100)
diff --git a/examples/tree_1d_dgsem/elixir_shallowwater_beach.jl b/examples/tree_1d_dgsem/elixir_shallowwater_beach.jl
new file mode 100644
index 00000000000..1288bc5e66a
--- /dev/null
+++ b/examples/tree_1d_dgsem/elixir_shallowwater_beach.jl
@@ -0,0 +1,121 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# Semidiscretization of the shallow water equations
+#
+# TODO: TrixiShallowWater: wet/dry example elixir
+
+equations = ShallowWaterEquations1D(gravity_constant=9.812)
+
+"""
+    initial_condition_beach(x, t, equations:: ShallowWaterEquations1D)
+Initial condition to simulate a wave running towards a beach and crashing. Difficult test
+including both wetting and drying in the domain using slip wall boundary conditions.
+The bottom topography is altered to be differentiable on the domain [0,8] and
+differs from the reference below.
+
+The water height and speed functions used here, are adapted from the initial condition
+found in section 5.2 of the paper:
+  - Andreas Bollermann, Sebastian Noelle, Maria Lukáčová-Medvid’ová (2011)
+    Finite volume evolution Galerkin methods for the shallow water equations with dry beds\n
+    [DOI: 10.4208/cicp.220210.020710a](https://dx.doi.org/10.4208/cicp.220210.020710a)
+"""
+function initial_condition_beach(x, t, equations:: ShallowWaterEquations1D)
+  D = 1
+  delta = 0.02
+  gamma = sqrt((3 * delta) / (4 * D))
+  x_a = sqrt((4 * D) / (3 * delta)) * acosh(sqrt(20))
+
+  f = D + 40 * delta * sech(gamma * (8 * x[1] - x_a))^2
+
+  # steep curved beach
+  b = 0.01 + 99 / 409600 * 4^x[1]
+
+  if x[1] >= 6
+    H = b
+    v = 0.0
+  else
+    H = f
+    v = sqrt(equations.gravity / D) * H
+  end
+
+  # It is mandatory to shift the water level at dry areas to make sure the water height h
+  # stays positive. The system would not be stable for h set to a hard 0 due to division by h in
+  # the computation of velocity, e.g., (h v) / h. Therefore, a small dry state threshold
+  # with a default value of 500*eps() ≈ 1e-13 in double precision, is set in the constructor above
+  # for the ShallowWaterEquations and added to the initial condition if h = 0.
+  # This default value can be changed within the constructor call depending on the simulation setup.
+  H = max(H, b + equations.threshold_limiter)
+  return prim2cons(SVector(H, v, b), equations)
+end
+
+initial_condition = initial_condition_beach
+boundary_condition = boundary_condition_slip_wall
+
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
+surface_flux = (FluxHydrostaticReconstruction(flux_hll_chen_noelle, hydrostatic_reconstruction_chen_noelle),
+                flux_nonconservative_chen_noelle)
+
+basis = LobattoLegendreBasis(3)
+
+indicator_sc = IndicatorHennemannGassnerShallowWater(equations, basis,
+                                                     alpha_max=0.5,
+                                                     alpha_min=0.001,
+                                                     alpha_smooth=true,
+                                                     variable=waterheight_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg=volume_flux,
+                                                 volume_flux_fv=surface_flux)
+
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+###############################################################################
+# Create the TreeMesh for the domain [0, 8]
+
+coordinates_min = 0.0
+coordinates_max = 8.0
+
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=7,
+                n_cells_max=10_000,
+                periodicity=false)
+
+# create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions=boundary_condition)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 10.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=false,
+                                     extra_analysis_integrals=(energy_kinetic,
+                                                               energy_internal))
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(dt=0.5,
+                                     save_initial_solution=true,
+                                     save_final_solution=true)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution)
+
+stage_limiter! = PositivityPreservingLimiterShallowWater(variables=(Trixi.waterheight,))
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, SSPRK43(stage_limiter!);
+            ode_default_options()..., callback=callbacks);
+
+summary_callback() # print the timer summary
\ No newline at end of file
diff --git a/examples/tree_1d_dgsem/elixir_shallowwater_parabolic_bowl.jl b/examples/tree_1d_dgsem/elixir_shallowwater_parabolic_bowl.jl
new file mode 100644
index 00000000000..916bba76ece
--- /dev/null
+++ b/examples/tree_1d_dgsem/elixir_shallowwater_parabolic_bowl.jl
@@ -0,0 +1,117 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# Semidiscretization of the shallow water equations
+#
+# TODO: TrixiShallowWater: wet/dry example elixir
+
+equations = ShallowWaterEquations1D(gravity_constant=9.81)
+
+"""
+    initial_condition_parabolic_bowl(x, t, equations:: ShallowWaterEquations1D)
+
+Well-known initial condition to test the [`hydrostatic_reconstruction_chen_noelle`](@ref) and its
+wet-dry mechanics. This test has analytical solutions. The initial condition is defined by the
+analytical solution at time t=0. The bottom topography defines a bowl and the water level is given
+by an oscillating lake.
+
+The original test and its analytical solution in two dimensions were first presented in
+- William C. Thacker (1981)
+  Some exact solutions to the nonlinear shallow-water wave equations
+  [DOI: 10.1017/S0022112081001882](https://doi.org/10.1017/S0022112081001882).
+
+The particular setup below is taken from Section 6.2 of
+- Niklas Wintermeyer, Andrew R. Winters, Gregor J. Gassner and Timothy Warburton (2018)
+  An entropy stable discontinuous Galerkin method for the shallow water equations on
+  curvilinear meshes with wet/dry fronts accelerated by GPUs
+  [DOI: 10.1016/j.jcp.2018.08.038](https://doi.org/10.1016/j.jcp.2018.08.038).
+"""
+function initial_condition_parabolic_bowl(x, t, equations:: ShallowWaterEquations1D)
+  a = 1
+  h_0 = 0.1
+  sigma = 0.5
+  ω = sqrt(2 * equations.gravity * h_0) / a
+
+  v = -sigma * ω * sin(ω * t)
+
+  b = h_0 * x[1]^2 / a^2
+
+  H = sigma * h_0 / a^2 * (2 * x[1] * cos(ω * t) - sigma) + h_0
+
+  # It is mandatory to shift the water level at dry areas to make sure the water height h
+  # stays positive. The system would not be stable for h set to a hard 0 due to division by h in
+  # the computation of velocity, e.g., (h v) / h. Therefore, a small dry state threshold
+  # with a default value of 500*eps() ≈ 1e-13 in double precision, is set in the constructor above
+  # for the ShallowWaterEquations and added to the initial condition if h = 0.
+  # This default value can be changed within the constructor call depending on the simulation setup.
+  H = max(H, b + equations.threshold_limiter)
+  return prim2cons(SVector(H, v, b), equations)
+end
+
+initial_condition = initial_condition_parabolic_bowl
+
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
+surface_flux = (FluxHydrostaticReconstruction(flux_hll_chen_noelle, hydrostatic_reconstruction_chen_noelle),
+                flux_nonconservative_chen_noelle)
+
+basis = LobattoLegendreBasis(5)
+
+indicator_sc = IndicatorHennemannGassnerShallowWater(equations, basis,
+                                                     alpha_max=0.5,
+                                                     alpha_min=0.001,
+                                                     alpha_smooth=true,
+                                                     variable=waterheight_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg=volume_flux,
+                                                 volume_flux_fv=surface_flux)
+
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+###############################################################################
+# Create the TreeMesh for the domain [-2, 2]
+
+coordinates_min = -2.0
+coordinates_max = 2.0
+
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=6,
+                n_cells_max=10_000)
+
+# create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 10.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=false,
+                                     extra_analysis_integrals=(energy_kinetic,
+                                                               energy_internal))
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(interval=1000,
+                                     save_initial_solution=true,
+                                     save_final_solution=true)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution)
+
+stage_limiter! = PositivityPreservingLimiterShallowWater(variables=(Trixi.waterheight,))
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, SSPRK43(stage_limiter!);
+            ode_default_options()..., callback=callbacks);
+
+summary_callback() # print the timer summary
\ No newline at end of file
diff --git a/examples/tree_1d_dgsem/elixir_shallowwater_well_balanced_wet_dry.jl b/examples/tree_1d_dgsem/elixir_shallowwater_well_balanced_wet_dry.jl
new file mode 100644
index 00000000000..8de46c61794
--- /dev/null
+++ b/examples/tree_1d_dgsem/elixir_shallowwater_well_balanced_wet_dry.jl
@@ -0,0 +1,165 @@
+
+using OrdinaryDiffEq
+using Trixi
+using Printf: @printf, @sprintf
+
+###############################################################################
+# Semidiscretization of the shallow water equations
+#
+# TODO: TrixiShallowWater: wet/dry example elixir
+
+equations = ShallowWaterEquations1D(gravity_constant=9.812)
+
+"""
+    initial_condition_complex_bottom_well_balanced(x, t, equations:: ShallowWaterEquations1D)
+
+Initial condition with a complex (discontinuous) bottom topography to test the well-balanced
+property for the [`hydrostatic_reconstruction_chen_noelle`](@ref) including dry areas within the
+domain. The errors from the analysis callback are not important but the error for this
+lake-at-rest test case `∑|H0-(h+b)|` should be around machine roundoff.
+
+The initial condition is taken from Section 5.2 of the paper:
+- Guoxian Chen and Sebastian Noelle (2017)
+  A new hydrostatic reconstruction scheme based on subcell reconstructions
+  [DOI:10.1137/15M1053074](https://dx.doi.org/10.1137/15M1053074)
+"""
+function initial_condition_complex_bottom_well_balanced(x, t, equations:: ShallowWaterEquations1D)
+  v = 0.0
+  b = sin(4 * pi * x[1]) + 3
+
+  if x[1] >= 0.5
+    b = sin(4 * pi * x[1]) + 1
+  end
+
+  H = max(b, 2.5)
+
+  if x[1] >= 0.5
+    H = max(b, 1.5)
+  end
+
+  # It is mandatory to shift the water level at dry areas to make sure the water height h
+  # stays positive. The system would not be stable for h set to a hard 0 due to division by h in
+  # the computation of velocity, e.g., (h v) / h. Therefore, a small dry state threshold
+  # with a default value of 500*eps() ≈ 1e-13 in double precision, is set in the constructor above
+  # for the ShallowWaterEquations and added to the initial condition if h = 0.
+  # This default value can be changed within the constructor call depending on the simulation setup.
+  H = max(H, b + equations.threshold_limiter)
+  return prim2cons(SVector(H, v, b), equations)
+end
+
+initial_condition = initial_condition_complex_bottom_well_balanced
+
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
+surface_flux = (FluxHydrostaticReconstruction(flux_hll_chen_noelle, hydrostatic_reconstruction_chen_noelle),
+                flux_nonconservative_chen_noelle)
+
+basis = LobattoLegendreBasis(3)
+
+indicator_sc = IndicatorHennemannGassnerShallowWater(equations, basis,
+                                                     alpha_max=0.5,
+                                                     alpha_min=0.001,
+                                                     alpha_smooth=true,
+                                                     variable=waterheight_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg=volume_flux,
+                                                 volume_flux_fv=surface_flux)
+
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+###############################################################################
+# Create the TreeMesh for the domain [0, 1]
+
+coordinates_min = 0.0
+coordinates_max = 1.0
+
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=6,
+                n_cells_max=10_000)
+
+# create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 25.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 5000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=false)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(interval=5000,
+                                     save_initial_solution=true,
+                                     save_final_solution=true)
+
+stepsize_callback = StepsizeCallback(cfl=1.5)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution,
+                        stepsize_callback)
+
+stage_limiter! = PositivityPreservingLimiterShallowWater(variables=(Trixi.waterheight,))
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, SSPRK43(stage_limiter!); dt=1.0,
+            ode_default_options()..., callback=callbacks, adaptive=false);
+
+summary_callback() # print the timer summary
+
+###############################################################################
+# Workaround to compute the well-balancedness error for this particular problem
+# that has two reference water heights. One for a lake to the left of the
+# discontinuous bottom topography `H0_upper = 2.5` and another for a lake to the
+# right of the discontinuous bottom topography `H0_lower = 1.5`.
+
+# Declare a special version of the function to compute the lake-at-rest error
+# OBS! The reference water height values are hardcoded for convenience.
+function lake_at_rest_error_two_level(u, x, equations::ShallowWaterEquations1D)
+     h, _, b = u
+
+   # For well-balancedness testing with possible wet/dry regions the reference
+   # water height `H0` accounts for the possibility that the bottom topography
+   # can emerge out of the water as well as for the threshold offset to avoid
+   # division by a "hard" zero water heights as well.
+   if x[1] < 0.5
+      H0_wet_dry = max( 2.5 , b + equations.threshold_limiter )
+   else
+      H0_wet_dry = max( 1.5 , b + equations.threshold_limiter )
+   end
+
+   return abs(H0_wet_dry - (h + b))
+ end
+
+# point to the data we want to analyze
+u = Trixi.wrap_array(sol[end], semi)
+# Perform the actual integration of the well-balancedness error over the domain
+l1_well_balance_error = Trixi.integrate_via_indices(u, mesh, equations, semi.solver, semi.cache; normalize=true) do u, i, element, equations, solver
+  x_node = Trixi.get_node_coords(semi.cache.elements.node_coordinates, equations, solver, i, element)
+   # We know that the discontinuity is a vertical line. Slightly augment the x value by a factor
+   # of unit roundoff to avoid the repeted value from the LGL nodes at at interface.
+   if i == 1
+      x_node = SVector(nextfloat(x_node[1]))
+   elseif i == nnodes(semi.solver)
+      x_node = SVector(prevfloat(x_node[1]))
+   end
+   u_local = Trixi.get_node_vars(u, equations, solver, i, element)
+  return lake_at_rest_error_two_level(u_local, x_node, equations)
+end
+
+# report the well-balancedness lake-at-rest error to the screen
+println("─"^100)
+println(" Lake-at-rest error for '", Trixi.get_name(equations), "' with ", summary(solver),
+        " at final time " * @sprintf("%10.8e", tspan[end]))
+
+@printf(" %-12s:", Trixi.pretty_form_utf(lake_at_rest_error))
+@printf("  % 10.8e", l1_well_balance_error)
+println()
+println("─"^100)
\ No newline at end of file
diff --git a/examples/tree_2d_dgsem/elixir_shallowwater_conical_island.jl b/examples/tree_2d_dgsem/elixir_shallowwater_conical_island.jl
new file mode 100644
index 00000000000..7c60e35b03e
--- /dev/null
+++ b/examples/tree_2d_dgsem/elixir_shallowwater_conical_island.jl
@@ -0,0 +1,116 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the shallow water equations
+#
+# TODO: TrixiShallowWater: wet/dry example elixir
+
+equations = ShallowWaterEquations2D(gravity_constant=9.81, H0=1.4)
+
+"""
+    initial_condition_conical_island(x, t, equations::ShallowWaterEquations2D)
+
+Initial condition for the [`ShallowWaterEquations2D`](@ref) to test the [`hydrostatic_reconstruction_chen_noelle`](@ref)
+and its handling of discontinuous water heights at the start in combination with wetting and
+drying. The bottom topography is given by a conical island in the middle of the domain. Around that
+island, there is a cylindrical water column at t=0 and the rest of the domain is dry. This
+discontinuous water height is smoothed by a logistic function. This simulation uses a Dirichlet
+boundary condition with the initial values. Due to the dry cells at the boundary, this has the
+effect of an outflow which can be seen in the simulation.
+"""
+function initial_condition_conical_island(x, t, equations::ShallowWaterEquations2D)
+  # Set the background values
+
+  v1 = 0.0
+  v2 = 0.0
+
+  x1, x2 = x
+  b = max(0.1, 1.0 - 4.0 * sqrt(x1^2 + x2^2))
+
+  # use a logistic function to transfer water height value smoothly
+  L  = equations.H0    # maximum of function
+  x0 = 0.3   # center point of function
+  k  = -25.0 # sharpness of transfer
+
+  H = max(b, L/(1.0 + exp(-k*(sqrt(x1^2+x2^2) - x0))))
+
+  # It is mandatory to shift the water level at dry areas to make sure the water height h
+  # stays positive. The system would not be stable for h set to a hard 0 due to division by h in
+  # the computation of velocity, e.g., (h v1) / h. Therefore, a small dry state threshold
+  # with a default value of 500*eps() ≈ 1e-13 in double precision, is set in the constructor above
+  # for the ShallowWaterEquations and added to the initial condition if h = 0.
+  # This default value can be changed within the constructor call depending on the simulation setup.
+  H = max(H, b + equations.threshold_limiter)
+  return prim2cons(SVector(H, v1, v2, b), equations)
+end
+
+initial_condition = initial_condition_conical_island
+boundary_conditions = BoundaryConditionDirichlet(initial_condition)
+
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
+surface_flux = (FluxHydrostaticReconstruction(flux_hll_chen_noelle, hydrostatic_reconstruction_chen_noelle),
+                flux_nonconservative_chen_noelle)
+
+basis = LobattoLegendreBasis(4)
+
+indicator_sc = IndicatorHennemannGassnerShallowWater(equations, basis,
+                                                     alpha_max=0.5,
+                                                     alpha_min=0.001,
+                                                     alpha_smooth=true,
+                                                     variable=waterheight_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg=volume_flux,
+                                                 volume_flux_fv=surface_flux)
+
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+###############################################################################
+# Get the TreeMesh and setup a mesh
+
+coordinates_min = (-1.0, -1.0)
+coordinates_max = (1.0, 1.0)
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=4,
+                n_cells_max=10_000,
+                periodicity=false)
+
+# Create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions=boundary_conditions)
+
+###############################################################################
+# ODE solver
+
+tspan = (0.0, 10.0)
+ode = semidiscretize(semi, tspan)
+
+###############################################################################
+# Callbacks
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(interval=100,
+                                     save_initial_solution=true,
+                                     save_final_solution=true)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution)
+
+###############################################################################
+# run the simulation
+
+stage_limiter! = PositivityPreservingLimiterShallowWater(variables=(Trixi.waterheight,))
+
+sol = solve(ode, SSPRK43(stage_limiter!);
+            ode_default_options()..., callback=callbacks);
+
+summary_callback() # print the timer summary
diff --git a/examples/tree_2d_dgsem/elixir_shallowwater_parabolic_bowl.jl b/examples/tree_2d_dgsem/elixir_shallowwater_parabolic_bowl.jl
new file mode 100644
index 00000000000..03dcf017266
--- /dev/null
+++ b/examples/tree_2d_dgsem/elixir_shallowwater_parabolic_bowl.jl
@@ -0,0 +1,120 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# Semidiscretization of the shallow water equations
+#
+# TODO: TrixiShallowWater: wet/dry example elixir
+
+equations = ShallowWaterEquations2D(gravity_constant=9.81)
+
+"""
+    initial_condition_parabolic_bowl(x, t, equations:: ShallowWaterEquations2D)
+
+Well-known initial condition to test the [`hydrostatic_reconstruction_chen_noelle`](@ref) and its
+wet-dry mechanics. This test has an analytical solution. The initial condition is defined by the
+analytical solution at time t=0. The bottom topography defines a bowl and the water level is given
+by an oscillating lake.
+
+The original test and its analytical solution were first presented in
+- William C. Thacker (1981)
+  Some exact solutions to the nonlinear shallow-water wave equations
+  [DOI: 10.1017/S0022112081001882](https://doi.org/10.1017/S0022112081001882).
+
+The particular setup below is taken from Section 6.2 of
+- Niklas Wintermeyer, Andrew R. Winters, Gregor J. Gassner and Timothy Warburton (2018)
+  An entropy stable discontinuous Galerkin method for the shallow water equations on
+  curvilinear meshes with wet/dry fronts accelerated by GPUs
+  [DOI: 10.1016/j.jcp.2018.08.038](https://doi.org/10.1016/j.jcp.2018.08.038).
+"""
+function initial_condition_parabolic_bowl(x, t, equations:: ShallowWaterEquations2D)
+  a = 1.0
+  h_0 = 0.1
+  sigma = 0.5
+  ω = sqrt(2 * equations.gravity * h_0) / a
+
+  v1 = -sigma * ω * sin(ω * t)
+  v2 = sigma * ω * cos(ω * t)
+
+  b = h_0 * ((x[1])^2 + (x[2])^2) / a^2
+
+  H = sigma * h_0 / a^2 * (2 * x[1] * cos(ω * t) + 2 * x[2] * sin(ω * t) - sigma) + h_0
+
+  # It is mandatory to shift the water level at dry areas to make sure the water height h
+  # stays positive. The system would not be stable for h set to a hard 0 due to division by h in
+  # the computation of velocity, e.g., (h v1) / h. Therefore, a small dry state threshold
+  # with a default value of 500*eps() ≈ 1e-13 in double precision, is set in the constructor above
+  # for the ShallowWaterEquations and added to the initial condition if h = 0.
+  # This default value can be changed within the constructor call depending on the simulation setup.
+  H = max(H, b + equations.threshold_limiter)
+  return prim2cons(SVector(H, v1, v2, b), equations)
+end
+
+initial_condition = initial_condition_parabolic_bowl
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
+surface_flux = (FluxHydrostaticReconstruction(flux_hll_chen_noelle, hydrostatic_reconstruction_chen_noelle),
+                flux_nonconservative_chen_noelle)
+
+basis = LobattoLegendreBasis(7)
+
+indicator_sc = IndicatorHennemannGassnerShallowWater(equations, basis,
+                                                     alpha_max=0.6,
+                                                     alpha_min=0.001,
+                                                     alpha_smooth=true,
+                                                     variable=waterheight_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg=volume_flux,
+                                                 volume_flux_fv=surface_flux)
+
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+
+###############################################################################
+# Create the TreeMesh for the domain [-2, 2]^2
+
+coordinates_min = (-2.0, -2.0)
+coordinates_max = (2.0, 2.0)
+
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=5,
+                n_cells_max=10_000)
+
+# create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=false,
+                                     extra_analysis_integrals=(energy_kinetic,
+                                                               energy_internal))
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(interval=100,
+                                     save_initial_solution=true,
+                                     save_final_solution=true)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution)
+
+stage_limiter! = PositivityPreservingLimiterShallowWater(variables=(Trixi.waterheight,))
+
+###############################################################################
+# run the simulation
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution)
+
+sol = solve(ode, SSPRK43(stage_limiter!);
+            ode_default_options()..., callback=callbacks);
+
+summary_callback() # print the timer summary
diff --git a/examples/tree_2d_dgsem/elixir_shallowwater_well_balanced_wet_dry.jl b/examples/tree_2d_dgsem/elixir_shallowwater_well_balanced_wet_dry.jl
new file mode 100644
index 00000000000..6fede2fa4ea
--- /dev/null
+++ b/examples/tree_2d_dgsem/elixir_shallowwater_well_balanced_wet_dry.jl
@@ -0,0 +1,198 @@
+
+using OrdinaryDiffEq
+using Trixi
+using Printf: @printf, @sprintf
+
+###############################################################################
+# Semidiscretization of the shallow water equations
+#
+# TODO: TrixiShallowWater: wet/dry example elixir
+
+equations = ShallowWaterEquations2D(gravity_constant=9.812)
+
+"""
+    initial_condition_well_balanced_chen_noelle(x, t, equations:: ShallowWaterEquations2D)
+
+Initial condition with a complex (discontinuous) bottom topography to test the well-balanced
+property for the [`hydrostatic_reconstruction_chen_noelle`](@ref) including dry areas within the
+domain. The errors from the analysis callback are not important but the error for this
+lake-at-rest test case `∑|H0-(h+b)|` should be around machine roundoff.
+
+The initial condition is taken from Section 5.2 of the paper:
+- Guoxian Chen and Sebastian Noelle (2017)
+  A new hydrostatic reconstruction scheme based on subcell reconstructions
+  [DOI:10.1137/15M1053074](https://dx.doi.org/10.1137/15M1053074)
+"""
+function initial_condition_complex_bottom_well_balanced(x, t, equations::ShallowWaterEquations2D)
+  v1 = 0
+  v2 = 0
+  b = sin(4 * pi * x[1]) + 3
+
+  if x[1] >= 0.5
+    b = sin(4 * pi * x[1]) + 1
+  end
+
+  H = max(b, 2.5)
+  if x[1] >= 0.5
+    H = max(b, 1.5)
+  end
+
+  # It is mandatory to shift the water level at dry areas to make sure the water height h
+  # stays positive. The system would not be stable for h set to a hard 0 due to division by h in
+  # the computation of velocity, e.g., (h v1) / h. Therefore, a small dry state threshold
+  # with a default value of 500*eps() ≈ 1e-13 in double precision, is set in the constructor above
+  # for the ShallowWaterEquations and added to the initial condition if h = 0.
+  # This default value can be changed within the constructor call depending on the simulation setup.
+  H = max(H, b + equations.threshold_limiter)
+  return prim2cons(SVector(H, v1, v2, b), equations)
+end
+
+initial_condition = initial_condition_complex_bottom_well_balanced
+
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
+surface_flux = (FluxHydrostaticReconstruction(flux_hll_chen_noelle, hydrostatic_reconstruction_chen_noelle),
+                flux_nonconservative_chen_noelle)
+
+basis = LobattoLegendreBasis(3)
+
+indicator_sc = IndicatorHennemannGassnerShallowWater(equations, basis,
+                                                     alpha_max=0.5,
+                                                     alpha_min=0.001,
+                                                     alpha_smooth=true,
+                                                     variable=waterheight_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg=volume_flux,
+                                                 volume_flux_fv=surface_flux)
+
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+###############################################################################
+# Create the TreeMesh for the domain [0, 1]^2
+
+coordinates_min = (0.0, 0.0)
+coordinates_max = (1.0, 1.0)
+
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=3,
+                n_cells_max=10_000)
+
+# create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 50.0)
+ode = semidiscretize(semi, tspan)
+
+###############################################################################
+# Workaround to set a discontinuous water and bottom topography for
+# debugging and testing. Essentially, this is a slight augmentation of the
+# `compute_coefficients` where the `x` node value passed here is slightly
+# perturbed to the left / right in order to set a true discontinuity that avoids
+# the doubled value of the LGL nodes at a particular element interface.
+#
+# Note! The errors from the analysis callback are not important but the error
+# for this lake at rest test case `∑|H0-(h+b)|` should be near machine roundoff.
+
+# point to the data we want to augment
+u = Trixi.wrap_array(ode.u0, semi)
+# reset the initial condition
+for element in eachelement(semi.solver, semi.cache)
+  for j in eachnode(semi.solver), i in eachnode(semi.solver)
+    x_node = Trixi.get_node_coords(semi.cache.elements.node_coordinates, equations, semi.solver, i, j, element)
+    # We know that the discontinuity is a vertical line. Slightly augment the x value by a factor
+    # of unit roundoff to avoid the repeted value from the LGL nodes at at interface.
+    if i == 1
+      x_node = SVector(nextfloat(x_node[1]) , x_node[2])
+    elseif i == nnodes(semi.solver)
+      x_node = SVector(prevfloat(x_node[1]) , x_node[2])
+    end
+    u_node = initial_condition_complex_bottom_well_balanced(x_node, first(tspan), equations)
+    Trixi.set_node_vars!(u, u_node, equations, semi.solver, i, j, element)
+  end
+end
+
+###############################################################################
+# Callbacks
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=false)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(interval=1000,
+                                     save_initial_solution=true,
+                                     save_final_solution=true)
+
+stepsize_callback = StepsizeCallback(cfl=2.0)
+
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution,
+                        stepsize_callback)
+
+stage_limiter! = PositivityPreservingLimiterShallowWater(variables=(Trixi.waterheight,))
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, SSPRK43(stage_limiter!); dt=1.0,
+            ode_default_options()..., callback=callbacks, adaptive=false);
+
+summary_callback() # print the timer summary
+
+###############################################################################
+# Workaround to compute the well-balancedness error for this particular problem
+# that has two reference water heights. One for a lake to the left of the
+# discontinuous bottom topography `H0_upper = 2.5` and another for a lake to the
+# right of the discontinuous bottom topography `H0_lower = 1.5`.
+
+# Declare a special version of the function to compute the lake-at-rest error
+# OBS! The reference water height values are hardcoded for convenience.
+function lake_at_rest_error_two_level(u, x, equations::ShallowWaterEquations2D)
+    h, _, _, b = u
+
+  # For well-balancedness testing with possible wet/dry regions the reference
+  # water height `H0` accounts for the possibility that the bottom topography
+  # can emerge out of the water as well as for the threshold offset to avoid
+  # division by a "hard" zero water heights as well.
+
+  if x[1] < 0.5
+    H0_wet_dry = max( 2.5 , b + equations.threshold_limiter )
+  else
+    H0_wet_dry = max( 1.5 , b + equations.threshold_limiter )
+  end
+
+  return abs(H0_wet_dry - (h + b))
+end
+
+# point to the data we want to analyze
+u = Trixi.wrap_array(sol[end], semi)
+# Perform the actual integration of the well-balancedness error over the domain
+l1_well_balance_error = Trixi.integrate_via_indices(u, mesh, equations, semi.solver, semi.cache; normalize=true) do u, i, j, element, equations, solver
+  x_node = Trixi.get_node_coords(semi.cache.elements.node_coordinates, equations, solver, i, j, element)
+  # We know that the discontinuity is a vertical line. Slightly augment the x value by a factor
+  # of unit roundoff to avoid the repeted value from the LGL nodes at at interface.
+  if i == 1
+    x_node = SVector(nextfloat(x_node[1]) , x_node[2])
+  elseif i == nnodes(semi.solver)
+    x_node = SVector(prevfloat(x_node[1]) , x_node[2])
+  end
+  u_local = Trixi.get_node_vars(u, equations, solver, i, j, element)
+  return lake_at_rest_error_two_level(u_local, x_node, equations)
+end
+
+# report the well-balancedness lake-at-rest error to the screen
+println("─"^100)
+println(" Lake-at-rest error for '", Trixi.get_name(equations), "' with ", summary(solver),
+        " at final time " * @sprintf("%10.8e", tspan[end]))
+
+@printf(" %-12s:", Trixi.pretty_form_utf(lake_at_rest_error))
+@printf("  % 10.8e", l1_well_balance_error)
+println()
+println("─"^100)
diff --git a/examples/unstructured_2d_dgsem/elixir_shallowwater_three_mound_dam_break.jl b/examples/unstructured_2d_dgsem/elixir_shallowwater_three_mound_dam_break.jl
new file mode 100644
index 00000000000..65b0fcae462
--- /dev/null
+++ b/examples/unstructured_2d_dgsem/elixir_shallowwater_three_mound_dam_break.jl
@@ -0,0 +1,139 @@
+
+using Downloads: download
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the shallow water equations
+#
+# TODO: TrixiShallowWater: wet/dry example elixir
+
+
+equations = ShallowWaterEquations2D(gravity_constant=9.81, H0=1.875,
+                                    threshold_limiter=1e-12, threshold_wet=1e-14)
+
+
+"""
+    initial_condition_three_mounds(x, t, equations::ShallowWaterEquations2D)
+
+Initial condition simulating a dam break. The bottom topography is given by one large and two smaller
+mounds. The mounds are flooded by the water for t > 0. To smooth the discontinuity, a logistic function
+is applied.
+
+The initial conditions is taken from Section 6.3 of the paper:
+- Niklas Wintermeyer, Andrew R. Winters, Gregor J. Gassner and Timothy Warburton (2018)
+  An entropy stable discontinuous Galerkin method for the shallow water equations on
+  curvilinear meshes with wet/dry fronts accelerated by GPUs\n
+  [DOI: 10.1016/j.jcp.2018.08.038](https://doi.org/10.1016/j.jcp.2018.08.038)
+"""
+function initial_condition_three_mounds(x, t, equations::ShallowWaterEquations2D)
+
+  # Set the background values
+  v1 = 0.0
+  v2 = 0.0
+
+  x1, x2 = x
+  M_1 = 1 - 0.1 * sqrt( (x1 - 30.0)^2 + (x2 - 22.5)^2 )
+  M_2 = 1 - 0.1 * sqrt( (x1 - 30.0)^2 + (x2 - 7.5)^2 )
+  M_3 = 2.8 - 0.28 * sqrt( (x1 - 47.5)^2 + (x2 - 15.0)^2 )
+
+  b = max(0.0, M_1, M_2, M_3)
+
+  # use a logistic function to transfer water height value smoothly
+  L  = equations.H0    # maximum of function
+  x0 = 8  # center point of function
+  k  = -75.0 # sharpness of transfer
+
+  H = max(b, L / (1.0 + exp(-k * (x1 - x0))))
+
+  # Avoid division by zero by adjusting the initial condition with a small dry state threshold
+  # that defaults to 500*eps() ≈ 1e-13 in double precision and is set in the constructor above
+  # for the ShallowWaterEquations struct.
+  H = max(H, b + equations.threshold_limiter)
+  return prim2cons(SVector(H, v1, v2, b), equations)
+end
+
+initial_condition = initial_condition_three_mounds
+
+function boundary_condition_outflow(u_inner, normal_direction::AbstractVector, x, t,
+                                    surface_flux_function, equations::ShallowWaterEquations2D)
+  # Impulse and bottom from inside, height from external state
+  u_outer = SVector(equations.threshold_wet, u_inner[2], u_inner[3], u_inner[4])
+
+  # calculate the boundary flux
+  flux = surface_flux_function(u_inner, u_outer, normal_direction, equations)
+
+  return flux
+end
+
+boundary_conditions = Dict( :Bottom => boundary_condition_slip_wall,
+                            :Top    => boundary_condition_slip_wall,
+                            :Right  => boundary_condition_outflow,
+                            :Left   => boundary_condition_slip_wall )
+
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
+surface_flux = (FluxHydrostaticReconstruction(flux_hll_chen_noelle, hydrostatic_reconstruction_chen_noelle),
+                flux_nonconservative_chen_noelle)
+
+basis = LobattoLegendreBasis(4)
+
+indicator_sc = IndicatorHennemannGassnerShallowWater(equations, basis,
+                                                     alpha_max=0.5,
+                                                     alpha_min=0.001,
+                                                     alpha_smooth=true,
+                                                     variable=waterheight_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg=volume_flux,
+                                                 volume_flux_fv=surface_flux)
+
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+###############################################################################
+# Get the unstructured quad mesh from a file (downloads the file if not available locally)
+
+default_meshfile = joinpath(@__DIR__, "mesh_three_mound.mesh")
+
+isfile(default_meshfile) || download("https://gist.githubusercontent.com/svengoldberg/c3c87fecb3fc6e46be7f0d1c7cb35f83/raw/e817ecd9e6c4686581d63c46128f9b6468d396d3/mesh_three_mound.mesh",
+                                      default_meshfile)
+
+meshfile = default_meshfile
+
+mesh = UnstructuredMesh2D(meshfile)
+
+# Create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver;
+                                    boundary_conditions=boundary_conditions)
+
+###############################################################################
+# ODE solver
+
+tspan = (0.0, 20.0)
+ode = semidiscretize(semi, tspan)
+
+###############################################################################
+# Callbacks
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(interval=100,
+                                     save_initial_solution=true,
+                                     save_final_solution=true)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution)
+
+###############################################################################
+# run the simulation
+
+stage_limiter! = PositivityPreservingLimiterShallowWater(variables=(Trixi.waterheight,))
+
+sol = solve(ode, SSPRK43(stage_limiter!);
+            ode_default_options()..., callback=callbacks);
+summary_callback() # print the timer summary
diff --git a/src/Trixi.jl b/src/Trixi.jl
index 34a1977d4f5..cf6158e29eb 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -162,9 +162,13 @@ export flux, flux_central, flux_lax_friedrichs, flux_hll, flux_hllc, flux_hlle,
        flux_fjordholm_etal, flux_nonconservative_fjordholm_etal, flux_es_fjordholm_etal,
        flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal,
        hydrostatic_reconstruction_audusse_etal, flux_nonconservative_audusse_etal,
+# TODO: TrixiShallowWater: move anything with "chen_noelle" to new file
+       hydrostatic_reconstruction_chen_noelle, flux_nonconservative_chen_noelle,
+       flux_hll_chen_noelle,
        FluxPlusDissipation, DissipationGlobalLaxFriedrichs, DissipationLocalLaxFriedrichs,
        FluxLaxFriedrichs, max_abs_speed_naive,
        FluxHLL, min_max_speed_naive, min_max_speed_davis, min_max_speed_einfeldt,
+       min_max_speed_chen_noelle,
        FluxLMARS,
        FluxRotated,
        flux_shima_etal_turbo, flux_ranocha_turbo,
@@ -215,6 +219,8 @@ export DG,
        VolumeIntegralFluxDifferencing,
        VolumeIntegralPureLGLFiniteVolume,
        VolumeIntegralShockCapturingHG, IndicatorHennemannGassner,
+# TODO: TrixiShallowWater: move new indicator
+       IndicatorHennemannGassnerShallowWater,
        VolumeIntegralUpwind,
        SurfaceIntegralWeakForm, SurfaceIntegralStrongForm,
        SurfaceIntegralUpwind,
@@ -248,7 +254,8 @@ export ControllerThreeLevel, ControllerThreeLevelCombined,
        IndicatorNeuralNetwork, NeuralNetworkPerssonPeraire, NeuralNetworkRayHesthaven,
        NeuralNetworkCNN
 
-export PositivityPreservingLimiterZhangShu
+# TODO: TrixiShallowWater: move new limiter
+export PositivityPreservingLimiterZhangShu, PositivityPreservingLimiterShallowWater
 
 export trixi_include, examples_dir, get_examples, default_example,
        default_example_unstructured, ode_default_options
diff --git a/src/callbacks_stage/callbacks_stage.jl b/src/callbacks_stage/callbacks_stage.jl
index 7609f9b341d..ab0f34efb78 100644
--- a/src/callbacks_stage/callbacks_stage.jl
+++ b/src/callbacks_stage/callbacks_stage.jl
@@ -6,4 +6,6 @@
 #! format: noindent
 
 include("positivity_zhang_shu.jl")
+# TODO: TrixiShallowWater: move specific limiter file
+include("positivity_shallow_water.jl")
 end # @muladd
diff --git a/src/callbacks_stage/positivity_shallow_water.jl b/src/callbacks_stage/positivity_shallow_water.jl
new file mode 100644
index 00000000000..36276026fe9
--- /dev/null
+++ b/src/callbacks_stage/positivity_shallow_water.jl
@@ -0,0 +1,89 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+# TODO: TrixiShallowWater: generic wet/dry limiter
+
+"""
+    PositivityPreservingLimiterShallowWater(; variables)
+
+The limiter is specifically designed for the shallow water equations.
+It is applied to all scalar `variables` in their given order
+using the defined `threshold_limiter` from the [`ShallowWaterEquations1D`](@ref) struct
+or the [`ShallowWaterEquations2D`](@ref) struct to determine the minimal acceptable values.
+The order of the `variables` is important and might have a strong influence
+on the robustness.
+
+As opposed to the standard version of the [`PositivityPreservingLimiterZhangShu`](@ref),
+nodes with a water height below the `threshold_limiter` are treated in a special way.
+To avoid numerical problems caused by velocities close to zero,
+the velocity is cut off, such that the node can be identified as "dry". The special feature of the
+`ShallowWaterEquations` used here is that the bottom topography is stored as an additional
+quantity in the solution vector `u`. However, the value of the bottom topography
+should not be changed. That is why, it is not limited.
+
+After the limiting process is applied to all degrees of freedom, for safety reasons,
+the `threshold_limiter` is applied again on all the DG nodes in order to avoid water height below.
+In the case where the cell mean value is below the threshold before applying the limiter,
+there could still be dry nodes afterwards due to the logic of the limiter.
+
+This fully-discrete positivity-preserving limiter is based on the work of
+- Zhang, Shu (2011)
+  Maximum-principle-satisfying and positivity-preserving high-order schemes
+  for conservation laws: survey and new developments
+  [doi: 10.1098/rspa.2011.0153](https://doi.org/10.1098/rspa.2011.0153)
+"""
+struct PositivityPreservingLimiterShallowWater{N, Variables <: NTuple{N, Any}}
+    variables::Variables
+end
+
+function PositivityPreservingLimiterShallowWater(; variables)
+    PositivityPreservingLimiterShallowWater(variables)
+end
+
+function (limiter!::PositivityPreservingLimiterShallowWater)(u_ode, integrator,
+                                                             semi::AbstractSemidiscretization,
+                                                             t)
+    u = wrap_array(u_ode, semi)
+    @trixi_timeit timer() "positivity-preserving limiter" limiter_shallow_water!(u,
+                                                                                 limiter!.variables,
+                                                                                 mesh_equations_solver_cache(semi)...)
+end
+
+# Iterate over tuples in a type-stable way using "lispy tuple programming",
+# similar to https://stackoverflow.com/a/55849398:
+# Iterating over tuples of different functions isn't type-stable in general
+# but accessing the first element of a tuple is type-stable. Hence, it's good
+# to process one element at a time and replace iteration by recursion here.
+# Note that you shouldn't use this with too many elements per tuple since the
+# compile times can increase otherwise - but a handful of elements per tuple
+# is definitely fine.
+function limiter_shallow_water!(u, variables::NTuple{N, Any},
+                                mesh,
+                                equations::Union{ShallowWaterEquations1D,
+                                                 ShallowWaterEquations2D},
+                                solver, cache) where {N}
+    variable = first(variables)
+    remaining_variables = Base.tail(variables)
+
+    limiter_shallow_water!(u, equations.threshold_limiter, variable, mesh, equations,
+                           solver, cache)
+    limiter_shallow_water!(u, remaining_variables, mesh, equations, solver, cache)
+    return nothing
+end
+
+# terminate the type-stable iteration over tuples
+function limiter_shallow_water!(u, variables::Tuple{},
+                                mesh,
+                                equations::Union{ShallowWaterEquations1D,
+                                                 ShallowWaterEquations2D},
+                                solver, cache)
+    nothing
+end
+
+include("positivity_shallow_water_dg1d.jl")
+include("positivity_shallow_water_dg2d.jl")
+end # @muladd
diff --git a/src/callbacks_stage/positivity_shallow_water_dg1d.jl b/src/callbacks_stage/positivity_shallow_water_dg1d.jl
new file mode 100644
index 00000000000..13c6866e895
--- /dev/null
+++ b/src/callbacks_stage/positivity_shallow_water_dg1d.jl
@@ -0,0 +1,89 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+# TODO: TrixiShallowWater: 1D wet/dry limiter should move
+
+function limiter_shallow_water!(u, threshold::Real, variable,
+                                mesh::AbstractMesh{1},
+                                equations::ShallowWaterEquations1D,
+                                dg::DGSEM, cache)
+    @unpack weights = dg.basis
+
+    @threaded for element in eachelement(dg, cache)
+        # determine minimum value
+        value_min = typemax(eltype(u))
+        for i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, element)
+            value_min = min(value_min, variable(u_node, equations))
+        end
+
+        # detect if limiting is necessary
+        value_min < threshold || continue
+
+        # compute mean value
+        u_mean = zero(get_node_vars(u, equations, dg, 1, element))
+        for i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, element)
+            u_mean += u_node * weights[i]
+        end
+        # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2
+        u_mean = u_mean / 2^ndims(mesh)
+
+        # We compute the value directly with the mean values, as we assume that
+        # Jensen's inequality holds (e.g. pressure for compressible Euler equations).
+        value_mean = variable(u_mean, equations)
+        theta = (value_mean - threshold) / (value_mean - value_min)
+        for i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, element)
+
+            # Cut off velocity in case that the waterheight is smaller than the threshold
+
+            h_node, h_v_node, b_node = u_node
+            h_mean, h_v_mean, _ = u_mean # b_mean is not used as b_node must not be overwritten
+
+            # Set them both to zero to apply linear combination correctly
+            if h_node <= threshold
+                h_v_node = zero(eltype(u))
+                h_v_mean = zero(eltype(u))
+            end
+
+            u_node = SVector(h_node, h_v_node, b_node)
+            u_mean = SVector(h_mean, h_v_mean, b_node)
+
+            # When velocity is cut off, the only averaged value is the waterheight,
+            # because the velocity is set to zero and this value is passed.
+            # Otherwise, the velocity is averaged, as well.
+            # Note that the auxiliary bottom topography variable `b` is never limited.
+            set_node_vars!(u, theta * u_node + (1 - theta) * u_mean,
+                           equations, dg, i, element)
+        end
+    end
+
+    # "Safety" application of the wet/dry thresholds over all the DG nodes
+    # on the current `element` after the limiting above in order to avoid dry nodes.
+    # If the value_mean < threshold before applying limiter, there
+    # could still be dry nodes afterwards due to logic of the limiting
+    @threaded for element in eachelement(dg, cache)
+        for i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, element)
+
+            h, hv, b = u_node
+
+            if h <= threshold
+                h = threshold
+                hv = zero(eltype(u))
+            end
+
+            u_node = SVector(h, hv, b)
+
+            set_node_vars!(u, u_node, equations, dg, i, element)
+        end
+    end
+
+    return nothing
+end
+end # @muladd
diff --git a/src/callbacks_stage/positivity_shallow_water_dg2d.jl b/src/callbacks_stage/positivity_shallow_water_dg2d.jl
new file mode 100644
index 00000000000..da3a25fdcf4
--- /dev/null
+++ b/src/callbacks_stage/positivity_shallow_water_dg2d.jl
@@ -0,0 +1,90 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+# TODO: TrixiShallowWater: 2D wet/dry limiter should move
+
+function limiter_shallow_water!(u, threshold::Real, variable,
+                                mesh::AbstractMesh{2},
+                                equations::ShallowWaterEquations2D, dg::DGSEM, cache)
+    @unpack weights = dg.basis
+
+    @threaded for element in eachelement(dg, cache)
+        # determine minimum value
+        value_min = typemax(eltype(u))
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, element)
+            value_min = min(value_min, variable(u_node, equations))
+        end
+
+        # detect if limiting is necessary
+        value_min < threshold || continue
+
+        # compute mean value
+        u_mean = zero(get_node_vars(u, equations, dg, 1, 1, element))
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, element)
+            u_mean += u_node * weights[i] * weights[j]
+        end
+        # note that the reference element is [-1,1]^ndims(dg), thus the weights sum to 2
+        u_mean = u_mean / 2^ndims(mesh)
+
+        # We compute the value directly with the mean values, as we assume that
+        # Jensen's inequality holds (e.g. pressure for compressible Euler equations).
+        value_mean = variable(u_mean, equations)
+        theta = (value_mean - threshold) / (value_mean - value_min)
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, element)
+
+            # Cut off velocity in case that the water height is smaller than the threshold
+
+            h_node, h_v1_node, h_v2_node, b_node = u_node
+            h_mean, h_v1_mean, h_v2_mean, _ = u_mean # b_mean is not used as it must not be overwritten
+
+            if h_node <= threshold
+                h_v1_node = zero(eltype(u))
+                h_v2_node = zero(eltype(u))
+                h_v1_mean = zero(eltype(u))
+                h_v2_mean = zero(eltype(u))
+            end
+
+            u_node = SVector(h_node, h_v1_node, h_v2_node, b_node)
+            u_mean = SVector(h_mean, h_v1_mean, h_v2_mean, b_node)
+
+            # When velocities are cut off, the only averaged value is the water height,
+            # because the velocities are set to zero and this value is passed.
+            # Otherwise, the velocities are averaged, as well.
+            # Note that the auxiliary bottom topography variable `b` is never limited.
+            set_node_vars!(u, theta * u_node + (1 - theta) * u_mean,
+                           equations, dg, i, j, element)
+        end
+    end
+
+    # "Safety" application of the wet/dry thresholds over all the DG nodes
+    # on the current `element` after the limiting above in order to avoid dry nodes.
+    # If the value_mean < threshold before applying limiter, there
+    # could still be dry nodes afterwards due to logic of the limiting
+    @threaded for element in eachelement(dg, cache)
+        for j in eachnode(dg), i in eachnode(dg)
+            u_node = get_node_vars(u, equations, dg, i, j, element)
+
+            h, h_v1, h_v2, b = u_node
+
+            if h <= threshold
+                h = threshold
+                h_v1 = zero(eltype(u))
+                h_v2 = zero(eltype(u))
+            end
+
+            u_node = SVector(h, h_v1, h_v2, b)
+
+            set_node_vars!(u, u_node, equations, dg, i, j, element)
+        end
+    end
+
+    return nothing
+end
+end # @muladd
diff --git a/src/equations/numerical_fluxes.jl b/src/equations/numerical_fluxes.jl
index abd9d66c490..87010275f2c 100644
--- a/src/equations/numerical_fluxes.jl
+++ b/src/equations/numerical_fluxes.jl
@@ -304,6 +304,29 @@ See [`FluxHLL`](@ref).
 """
 const flux_hll = FluxHLL()
 
+# TODO: TrixiShallowWater: move the chen_noelle flux structure to the new package
+
+# An empty version of the `min_max_speed_chen_noelle` function is declared here
+# in order to create a dimension agnostic version of `flux_hll_chen_noelle`.
+# The full description of this wave speed estimate can be found in the docstrings
+# for `min_max_speed_chen_noelle` in `shallow_water_1d.jl` or `shallow_water_2d.jl`.
+function min_max_speed_chen_noelle end
+
+"""
+    flux_hll_chen_noelle = FluxHLL(min_max_speed_chen_noelle)
+
+An instance of [`FluxHLL`](@ref) specific to the shallow water equations that
+uses the wave speed estimates from [`min_max_speed_chen_noelle`](@ref).
+This HLL flux is guaranteed to have zero numerical mass flux out of a "dry" element,
+maintain positivity of the water height, and satisfy an entropy inequality.
+
+For complete details see Section 2.4 of the following reference
+- Guoxian Chen and Sebastian Noelle (2017)
+  A new hydrostatic reconstruction scheme based on subcell reconstructions
+  [DOI: 10.1137/15M1053074](https://doi.org/10.1137/15M1053074)
+"""
+const flux_hll_chen_noelle = FluxHLL(min_max_speed_chen_noelle)
+
 """
     flux_shima_etal_turbo(u_ll, u_rr, orientation_or_normal_direction, equations)
 
diff --git a/src/equations/shallow_water_1d.jl b/src/equations/shallow_water_1d.jl
index c33b31fca81..57bcb1212e1 100644
--- a/src/equations/shallow_water_1d.jl
+++ b/src/equations/shallow_water_1d.jl
@@ -6,7 +6,7 @@
 #! format: noindent
 
 @doc raw"""
-    ShallowWaterEquations1D(gravity, H0)
+    ShallowWaterEquations1D(; gravity, H0 = 0, threshold_limiter = nothing threshold_wet = nothing)
 
 Shallow water equations (SWE) in one space dimension. The equations are given by
 ```math
@@ -24,6 +24,12 @@ also defines the total water height as ``H = h + b``.
 The additional quantity ``H_0`` is also available to store a reference value for the total water height that
 is useful to set initial conditions or test the "lake-at-rest" well-balancedness.
 
+Also, there are two thresholds which prevent numerical problems as well as instabilities. Both of them do not
+have to be passed, as default values are defined within the struct. The first one, `threshold_limiter`, is
+used in [`PositivityPreservingLimiterShallowWater`](@ref) on the water height, as a (small) shift on the initial
+condition and cutoff before the next time step. The second one, `threshold_wet`, is applied on the water height to
+define when the flow is "wet" before calculating the numerical flux.
+
 The bottom topography function ``b(x)`` is set inside the initial condition routine
 for a particular problem setup. To test the conservative form of the SWE one can set the bottom topography
 variable `b` to zero.
@@ -45,16 +51,35 @@ References for the SWE are many but a good introduction is available in Chapter
   [DOI: 10.1017/CBO9780511791253](https://doi.org/10.1017/CBO9780511791253)
 """
 struct ShallowWaterEquations1D{RealT <: Real} <: AbstractShallowWaterEquations{1, 3}
+    # TODO: TrixiShallowWater: where should the `threshold_limiter` and `threshold_wet` live?
+    # how to "properly" export these constants across the two packages?
     gravity::RealT # gravitational constant
     H0::RealT      # constant "lake-at-rest" total water height
+    # `threshold_limiter` used in `PositivityPreservingLimiterShallowWater` on water height,
+    # as a (small) shift on the initial condition and cutoff before the next time step.
+    # Default is 500*eps() which in double precision is ≈1e-13.
+    threshold_limiter::RealT
+    # `threshold_wet` applied on water height to define when the flow is "wet"
+    # before calculating the numerical flux.
+    # Default is 5*eps() which in double precision is ≈1e-15.
+    threshold_wet::RealT
 end
 
 # Allow for flexibility to set the gravitational constant within an elixir depending on the
 # application where `gravity_constant=1.0` or `gravity_constant=9.81` are common values.
 # The reference total water height H0 defaults to 0.0 but is used for the "lake-at-rest"
-# well-balancedness test cases
-function ShallowWaterEquations1D(; gravity_constant, H0 = 0.0)
-    ShallowWaterEquations1D(gravity_constant, H0)
+# well-balancedness test cases.
+# Strict default values for thresholds that performed well in many numerical experiments
+function ShallowWaterEquations1D(; gravity_constant, H0 = zero(gravity_constant),
+                                 threshold_limiter = nothing, threshold_wet = nothing)
+    T = promote_type(typeof(gravity_constant), typeof(H0))
+    if threshold_limiter === nothing
+        threshold_limiter = 500 * eps(T)
+    end
+    if threshold_wet === nothing
+        threshold_wet = 5 * eps(T)
+    end
+    ShallowWaterEquations1D(gravity_constant, H0, threshold_limiter, threshold_wet)
 end
 
 have_nonconservative_terms(::ShallowWaterEquations1D) = True()
@@ -307,6 +332,54 @@ Further details on the hydrostatic reconstruction and its motivation can be foun
                    z)
 end
 
+# TODO: TrixiShallowWater: move wet/dry specific routine
+"""
+    flux_nonconservative_chen_noelle(u_ll, u_rr,
+                                     orientation::Integer,
+                                     equations::ShallowWaterEquations1D)
+
+Non-symmetric two-point surface flux that discretizes the nonconservative (source) term.
+The discretization uses the `hydrostatic_reconstruction_chen_noelle` on the conservative
+variables.
+
+Should be used together with [`FluxHydrostaticReconstruction`](@ref) and
+[`hydrostatic_reconstruction_chen_noelle`](@ref) in the surface flux to ensure consistency.
+
+Further details on the hydrostatic reconstruction and its motivation can be found in
+- Guoxian Chen and Sebastian Noelle (2017)
+  A new hydrostatic reconstruction scheme based on subcell reconstructions
+  [DOI:10.1137/15M1053074](https://dx.doi.org/10.1137/15M1053074)
+"""
+@inline function flux_nonconservative_chen_noelle(u_ll, u_rr,
+                                                  orientation::Integer,
+                                                  equations::ShallowWaterEquations1D)
+
+    # Pull the water height and bottom topography on the left
+    h_ll, _, b_ll = u_ll
+    h_rr, _, b_rr = u_rr
+
+    H_ll = h_ll + b_ll
+    H_rr = h_rr + b_rr
+
+    b_star = min(max(b_ll, b_rr), min(H_ll, H_rr))
+
+    # Create the hydrostatic reconstruction for the left solution state
+    u_ll_star, _ = hydrostatic_reconstruction_chen_noelle(u_ll, u_rr, equations)
+
+    # Copy the reconstructed water height for easier to read code
+    h_ll_star = u_ll_star[1]
+
+    z = zero(eltype(u_ll))
+    # Includes two parts:
+    #   (i)  Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid
+    #        cross-averaging across a discontinuous bottom topography
+    #   (ii) True surface part that uses `h_ll` and `h_ll_star` to handle discontinuous bathymetry
+    return SVector(z,
+                   equations.gravity * h_ll * b_ll -
+                   equations.gravity * (h_ll_star + h_ll) * (b_ll - b_star),
+                   z)
+end
+
 """
     flux_fjordholm_etal(u_ll, u_rr, orientation,
                         equations::ShallowWaterEquations1D)
@@ -381,7 +454,7 @@ end
 
 A particular type of hydrostatic reconstruction on the water height to guarantee well-balancedness
 for a general bottom topography [`ShallowWaterEquations1D`](@ref). The reconstructed solution states
-`u_ll_star` and `u_rr_star` variables are used to evaluate the surface numerical flux at the interface.
+`u_ll_star` and `u_rr_star` variables are then used to evaluate the surface numerical flux at the interface.
 Use in combination with the generic numerical flux routine [`FluxHydrostaticReconstruction`](@ref).
 
 Further details on this hydrostatic reconstruction and its motivation can be found in
@@ -410,6 +483,67 @@ Further details on this hydrostatic reconstruction and its motivation can be fou
     return u_ll_star, u_rr_star
 end
 
+# TODO: TrixiShallowWater: move wet/dry specific routine
+"""
+    hydrostatic_reconstruction_chen_noelle(u_ll, u_rr, orientation::Integer,
+                                           equations::ShallowWaterEquations1D)
+
+A particular type of hydrostatic reconstruction of the water height to guarantee well-balancedness
+for a general bottom topography of the [`ShallowWaterEquations1D`](@ref). The reconstructed solution states
+`u_ll_star` and `u_rr_star` variables are used to evaluate the surface numerical flux at the interface.
+The key idea is a linear reconstruction of the bottom and water height at the interfaces using subcells.
+Use in combination with the generic numerical flux routine [`FluxHydrostaticReconstruction`](@ref).
+
+Further details on this hydrostatic reconstruction and its motivation can be found in
+- Guoxian Chen and Sebastian Noelle (2017)
+  A new hydrostatic reconstruction scheme based on subcell reconstructions
+  [DOI:10.1137/15M1053074](https://dx.doi.org/10.1137/15M1053074)
+"""
+@inline function hydrostatic_reconstruction_chen_noelle(u_ll, u_rr,
+                                                        equations::ShallowWaterEquations1D)
+    # Unpack left and right water heights and bottom topographies
+    h_ll, _, b_ll = u_ll
+    h_rr, _, b_rr = u_rr
+
+    # Get the velocities on either side
+    v_ll = velocity(u_ll, equations)
+    v_rr = velocity(u_rr, equations)
+
+    H_ll = b_ll + h_ll
+    H_rr = b_rr + h_rr
+
+    b_star = min(max(b_ll, b_rr), min(H_ll, H_rr))
+
+    # Compute the reconstructed water heights
+    h_ll_star = min(H_ll - b_star, h_ll)
+    h_rr_star = min(H_rr - b_star, h_rr)
+
+    # Set the water height to be at least the value stored in the variable threshold after
+    # the hydrostatic reconstruction is applied and before the numerical flux is calculated
+    # to avoid numerical problem with arbitrary small values. Interfaces with a water height
+    # lower or equal to the threshold can be declared as dry.
+    # The default value for `threshold_wet` is ≈ 5*eps(), or 1e-15 in double precision, is set
+    # in the `ShallowWaterEquations1D` struct. This threshold value can be changed in the constructor
+    # call of this equation struct in an elixir.
+    threshold = equations.threshold_wet
+
+    if (h_ll_star <= threshold)
+        h_ll_star = threshold
+        v_ll = zero(v_ll)
+    end
+
+    if (h_rr_star <= threshold)
+        h_rr_star = threshold
+        v_rr = zero(v_rr)
+    end
+
+    # Create the conservative variables using the reconstruted water heights
+    u_ll_star = SVector(h_ll_star, h_ll_star * v_ll, b_ll)
+    u_rr_star = SVector(h_rr_star, h_rr_star * v_rr, b_rr)
+
+    return u_ll_star, u_rr_star
+end
+
 # Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the
 # maximum velocity magnitude plus the maximum speed of sound
 @inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
@@ -474,6 +608,39 @@ end
     return λ_min, λ_max
 end
 
+# TODO: TrixiShallowWater: move wet/dry specific routine
+"""
+    min_max_speed_chen_noelle(u_ll, u_rr, orientation::Integer,
+                              equations::ShallowWaterEquations1D)
+
+The approximated speeds for the HLL type numerical flux used by Chen and Noelle for their
+hydrostatic reconstruction. As they state in the paper, these speeds are chosen for the numerical
+flux to ensure positivity and to satisfy an entropy inequality.
+
+Further details on this hydrostatic reconstruction and its motivation can be found in
+- Guoxian Chen and Sebastian Noelle (2017)
+  A new hydrostatic reconstruction scheme based on subcell reconstructions
+  [DOI:10.1137/15M1053074](https://dx.doi.org/10.1137/15M1053074)
+"""
+@inline function min_max_speed_chen_noelle(u_ll, u_rr, orientation::Integer,
+                                           equations::ShallowWaterEquations1D)
+    # Get the velocity quantities
+    v_ll = velocity(u_ll, equations)
+    v_rr = velocity(u_rr, equations)
+
+    # Calculate the wave celerity on the left and right
+    h_ll = waterheight(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+
+    a_ll = sqrt(equations.gravity * h_ll)
+    a_rr = sqrt(equations.gravity * h_rr)
+
+    λ_min = min(v_ll - a_ll, v_rr - a_rr, zero(eltype(u_ll)))
+    λ_max = max(v_ll + a_ll, v_rr + a_rr, zero(eltype(u_ll)))
+
+    return λ_min, λ_max
+end
+
 # More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
 @inline function min_max_speed_davis(u_ll, u_rr, orientation::Integer,
                                      equations::ShallowWaterEquations1D)
@@ -636,9 +803,20 @@ end
 end
 
 # Calculate the error for the "lake-at-rest" test case where H = h+b should
-# be a constant value over time
+# be a constant value over time. Note, assumes there is a single reference
+# water height `H0` with which to compare.
+#
+# TODO: TrixiShallowWater: where should `threshold_limiter` live? May need
+# to modify or have different versions of the `lake_at_rest_error` function
 @inline function lake_at_rest_error(u, equations::ShallowWaterEquations1D)
     h, _, b = u
-    return abs(equations.H0 - (h + b))
+
+    # For well-balancedness testing with possible wet/dry regions the reference
+    # water height `H0` accounts for the possibility that the bottom topography
+    # can emerge out of the water as well as for the threshold offset to avoid
+    # division by a "hard" zero water heights as well.
+    H0_wet_dry = max(equations.H0, b + equations.threshold_limiter)
+
+    return abs(H0_wet_dry - (h + b))
 end
 end # @muladd
diff --git a/src/equations/shallow_water_2d.jl b/src/equations/shallow_water_2d.jl
index 9e227cd4a77..a81fddeed49 100644
--- a/src/equations/shallow_water_2d.jl
+++ b/src/equations/shallow_water_2d.jl
@@ -6,7 +6,7 @@
 #! format: noindent
 
 @doc raw"""
-    ShallowWaterEquations2D(gravity, H0)
+    ShallowWaterEquations2D(; gravity, H0 = 0, threshold_limiter = nothing, threshold_wet = nothing)
 
 Shallow water equations (SWE) in two space dimensions. The equations are given by
 ```math
@@ -27,6 +27,12 @@ also defines the total water height as ``H = h + b``.
 The additional quantity ``H_0`` is also available to store a reference value for the total water height that
 is useful to set initial conditions or test the "lake-at-rest" well-balancedness.
 
+Also, there are two thresholds which prevent numerical problems as well as instabilities. Both of them do not
+have to be passed, as default values are defined within the struct. The first one, `threshold_limiter`, is
+used in [`PositivityPreservingLimiterShallowWater`](@ref) on the water height, as a (small) shift on the initial
+condition and cutoff before the next time step. The second one, `threshold_wet`, is applied on the water height to
+define when the flow is "wet" before calculating the numerical flux.
+
 The bottom topography function ``b(x,y)`` is set inside the initial condition routine
 for a particular problem setup. To test the conservative form of the SWE one can set the bottom topography
 variable `b` to zero.
@@ -48,16 +54,35 @@ References for the SWE are many but a good introduction is available in Chapter
   [DOI: 10.1017/CBO9780511791253](https://doi.org/10.1017/CBO9780511791253)
 """
 struct ShallowWaterEquations2D{RealT <: Real} <: AbstractShallowWaterEquations{2, 4}
+    # TODO: TrixiShallowWater: where should the `threshold_limiter` and `threshold_wet` live?
+    # how to "properly" export these constants across the two packages?
     gravity::RealT # gravitational constant
     H0::RealT      # constant "lake-at-rest" total water height
+    # `threshold_limiter` used in `PositivityPreservingLimiterShallowWater` on water height,
+    # as a (small) shift on the initial condition and cutoff before the next time step.
+    # Default is 500*eps() which in double precision is ≈1e-13.
+    threshold_limiter::RealT
+    # `threshold_wet` applied on water height to define when the flow is "wet"
+    # before calculating the numerical flux.
+    # Default is 5*eps() which in double precision is ≈1e-15.
+    threshold_wet::RealT
 end
 
 # Allow for flexibility to set the gravitational constant within an elixir depending on the
 # application where `gravity_constant=1.0` or `gravity_constant=9.81` are common values.
 # The reference total water height H0 defaults to 0.0 but is used for the "lake-at-rest"
-# well-balancedness test cases
-function ShallowWaterEquations2D(; gravity_constant, H0 = 0.0)
-    ShallowWaterEquations2D(gravity_constant, H0)
+# well-balancedness test cases.
+# Strict default values for thresholds that performed well in many numerical experiments
+function ShallowWaterEquations2D(; gravity_constant, H0 = zero(gravity_constant),
+                                 threshold_limiter = nothing, threshold_wet = nothing)
+    T = promote_type(typeof(gravity_constant), typeof(H0))
+    if threshold_limiter === nothing
+        threshold_limiter = 500 * eps(T)
+    end
+    if threshold_wet === nothing
+        threshold_wet = 5 * eps(T)
+    end
+    ShallowWaterEquations2D(gravity_constant, H0, threshold_limiter, threshold_wet)
 end
 
 have_nonconservative_terms(::ShallowWaterEquations2D) = True()
@@ -431,6 +456,69 @@ Further details for the hydrostatic reconstruction and its motivation can be fou
     return u_ll_star, u_rr_star
 end
 
+# TODO: TrixiShallowWater: move wet/dry specific routine
+"""
+    hydrostatic_reconstruction_chen_noelle(u_ll, u_rr, orientation::Integer,
+                                           equations::ShallowWaterEquations2D)
+
+A particular type of hydrostatic reconstruction of the water height to guarantee well-balancedness
+for a general bottom topography of the [`ShallowWaterEquations2D`](@ref). The reconstructed solution states
+`u_ll_star` and `u_rr_star` variables are then used to evaluate the surface numerical flux at the interface.
+The key idea is a linear reconstruction of the bottom and water height at the interfaces using subcells.
+Use in combination with the generic numerical flux routine [`FluxHydrostaticReconstruction`](@ref).
+
+Further details on this hydrostatic reconstruction and its motivation can be found in
+- Guoxian Chen and Sebastian Noelle (2017)
+  A new hydrostatic reconstruction scheme based on subcell reconstructions
+  [DOI:10.1137/15M1053074](https://dx.doi.org/10.1137/15M1053074)
+"""
+@inline function hydrostatic_reconstruction_chen_noelle(u_ll, u_rr,
+                                                        equations::ShallowWaterEquations2D)
+    # Unpack left and right water heights and bottom topographies
+    h_ll, _, _, b_ll = u_ll
+    h_rr, _, _, b_rr = u_rr
+
+    # Get the velocities on either side
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    H_ll = b_ll + h_ll
+    H_rr = b_rr + h_rr
+
+    b_star = min(max(b_ll, b_rr), min(H_ll, H_rr))
+
+    # Compute the reconstructed water heights
+    h_ll_star = min(H_ll - b_star, h_ll)
+    h_rr_star = min(H_rr - b_star, h_rr)
+
+    # Set the water height to be at least the value stored in the variable threshold after
+    # the hydrostatic reconstruction is applied and before the numerical flux is calculated
+    # to avoid numerical problem with arbitrary small values. Interfaces with a water height
+    # lower or equal to the threshold can be declared as dry.
+    # The default value for `threshold_wet` is ≈5*eps(), or 1e-15 in double precision, is set
+    # in the `ShallowWaterEquations2D` struct. This threshold value can be changed in the constructor
+    # call of this equation struct in an elixir.
+    threshold = equations.threshold_wet
+
+    if (h_ll_star <= threshold)
+        h_ll_star = threshold
+        v1_ll = zero(v1_ll)
+        v2_ll = zero(v2_ll)
+    end
+
+    if (h_rr_star <= threshold)
+        h_rr_star = threshold
+        v1_rr = zero(v1_rr)
+        v2_rr = zero(v2_rr)
+    end
+
+    # Create the conservative variables using the reconstruted water heights
+    u_ll_star = SVector(h_ll_star, h_ll_star * v1_ll, h_ll_star * v2_ll, b_ll)
+    u_rr_star = SVector(h_rr_star, h_rr_star * v1_rr, h_rr_star * v2_rr, b_rr)
+
+    return u_ll_star, u_rr_star
+end
+
 """
     flux_nonconservative_audusse_etal(u_ll, u_rr, orientation::Integer,
                                       equations::ShallowWaterEquations2D)
@@ -516,6 +604,104 @@ end
     return SVector(f1, f2, f3, f4)
 end
 
+# TODO: TrixiShallowWater: move wet/dry specific routine
+"""
+    flux_nonconservative_chen_noelle(u_ll, u_rr,
+                                     orientation::Integer,
+                                     equations::ShallowWaterEquations2D)
+    flux_nonconservative_chen_noelle(u_ll, u_rr,
+                                     normal_direction_ll      ::AbstractVector,
+                                     normal_direction_average ::AbstractVector,
+                                     equations::ShallowWaterEquations2D)
+
+Non-symmetric two-point surface flux that discretizes the nonconservative (source) term.
+The discretization uses the [`hydrostatic_reconstruction_chen_noelle`](@ref) on the conservative
+variables.
+
+Should be used together with [`FluxHydrostaticReconstruction`](@ref) and
+[`hydrostatic_reconstruction_chen_noelle`](@ref) in the surface flux to ensure consistency.
+
+Further details on the hydrostatic reconstruction and its motivation can be found in
+- Guoxian Chen and Sebastian Noelle (2017)
+  A new hydrostatic reconstruction scheme based on subcell reconstructions
+  [DOI:10.1137/15M1053074](https://dx.doi.org/10.1137/15M1053074)
+"""
+@inline function flux_nonconservative_chen_noelle(u_ll, u_rr, orientation::Integer,
+                                                  equations::ShallowWaterEquations2D)
+    # Pull the water height and bottom topography on the left
+    h_ll, _, _, b_ll = u_ll
+    h_rr, _, _, b_rr = u_rr
+
+    H_ll = h_ll + b_ll
+    H_rr = h_rr + b_rr
+
+    b_star = min(max(b_ll, b_rr), min(H_ll, H_rr))
+
+    # Create the hydrostatic reconstruction for the left solution state
+    u_ll_star, _ = hydrostatic_reconstruction_chen_noelle(u_ll, u_rr, equations)
+
+    # Copy the reconstructed water height for easier to read code
+    h_ll_star = u_ll_star[1]
+
+    z = zero(eltype(u_ll))
+    # Includes two parts:
+    #   (i)  Diagonal (consistent) term from the volume flux that uses `b_ll` to avoid
+    #        cross-averaging across a discontinuous bottom topography
+    #   (ii) True surface part that uses `h_ll` and `h_ll_star` to handle discontinuous bathymetry
+    g = equations.gravity
+    if orientation == 1
+        f = SVector(z,
+                    g * h_ll * b_ll - g * (h_ll_star + h_ll) * (b_ll - b_star),
+                    z, z)
+    else # orientation == 2
+        f = SVector(z, z,
+                    g * h_ll * b_ll - g * (h_ll_star + h_ll) * (b_ll - b_star),
+                    z)
+    end
+
+    return f
+end
+
+@inline function flux_nonconservative_chen_noelle(u_ll, u_rr,
+                                                  normal_direction_ll::AbstractVector,
+                                                  normal_direction_average::AbstractVector,
+                                                  equations::ShallowWaterEquations2D)
+    # Pull the water height and bottom topography on the left
+    h_ll, _, _, b_ll = u_ll
+    h_rr, _, _, b_rr = u_rr
+
+    H_ll = h_ll + b_ll
+    H_rr = h_rr + b_rr
+
+    b_star = min(max(b_ll, b_rr), min(H_ll, H_rr))
+
+    # Create the hydrostatic reconstruction for the left solution state
+    u_ll_star, _ = hydrostatic_reconstruction_chen_noelle(u_ll, u_rr, equations)
+
+    # Copy the reconstructed water height for easier to read code
+    h_ll_star = u_ll_star[1]
+
+    # Comes in two parts:
+    #   (i)  Diagonal (consistent) term from the volume flux that uses `normal_direction_average`
+    #        but we use `b_ll` to avoid cross-averaging across a discontinuous bottom topography
+
+    f2 = normal_direction_average[1] * equations.gravity * h_ll * b_ll
+    f3 = normal_direction_average[2] * equations.gravity * h_ll * b_ll
+
+    #   (ii) True surface part that uses `normal_direction_ll`, `h_ll` and `h_ll_star`
+    #        to handle discontinuous bathymetry
+
+    f2 -= normal_direction_ll[1] * equations.gravity * (h_ll_star + h_ll) *
+          (b_ll - b_star)
+    f3 -= normal_direction_ll[2] * equations.gravity * (h_ll_star + h_ll) *
+          (b_ll - b_star)
+
+    # First and last equations do not have a nonconservative flux
+    f1 = f4 = zero(eltype(u_ll))
+
+    return SVector(f1, f2, f3, f4)
+end
+
 """
     flux_fjordholm_etal(u_ll, u_rr, orientation_or_normal_direction,
                         equations::ShallowWaterEquations2D)
@@ -762,6 +948,67 @@ end
     return λ_min, λ_max
 end
 
+# TODO: TrixiShallowWater: move wet/dry specific routine
+"""
+    min_max_speed_chen_noelle(u_ll, u_rr, orientation::Integer,
+                              equations::ShallowWaterEquations2D)
+    min_max_speed_chen_noelle(u_ll, u_rr, normal_direction::AbstractVector,
+                              equations::ShallowWaterEquations2D)
+
+Special estimate of the minimal and maximal wave speed of the shallow water equations for
+the left and right states `u_ll, u_rr`. These approximate speeds are used for the HLL-type
+numerical flux [`flux_hll_chen_noelle`](@ref). These wave speed estimates
+together with a particular hydrostatic reconstruction technique guarantee
+that the numerical flux is positive and satisfies an entropy inequality.
+
+Further details on this hydrostatic reconstruction and its motivation can be found in
+the reference below. The definition of the wave speeds are given in Equation (2.20).
+- Guoxian Chen and Sebastian Noelle (2017)
+  A new hydrostatic reconstruction scheme based on subcell reconstructions
+  [DOI:10.1137/15M1053074](https://dx.doi.org/10.1137/15M1053074)
+"""
+@inline function min_max_speed_chen_noelle(u_ll, u_rr, orientation::Integer,
+                                           equations::ShallowWaterEquations2D)
+    h_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    a_ll = sqrt(equations.gravity * h_ll)
+    a_rr = sqrt(equations.gravity * h_rr)
+
+    if orientation == 1 # x-direction
+        λ_min = min(v1_ll - a_ll, v1_rr - a_rr, zero(eltype(u_ll)))
+        λ_max = max(v1_ll + a_ll, v1_rr + a_rr, zero(eltype(u_ll)))
+    else # y-direction
+        λ_min = min(v2_ll - a_ll, v2_rr - a_rr, zero(eltype(u_ll)))
+        λ_max = max(v2_ll + a_ll, v2_rr + a_rr, zero(eltype(u_ll)))
+    end
+
+    return λ_min, λ_max
+end
+
+@inline function min_max_speed_chen_noelle(u_ll, u_rr, normal_direction::AbstractVector,
+                                           equations::ShallowWaterEquations2D)
+    h_ll = waterheight(u_ll, equations)
+    v1_ll, v2_ll = velocity(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    v1_rr, v2_rr = velocity(u_rr, equations)
+
+    v_normal_ll = v1_ll * normal_direction[1] + v2_ll * normal_direction[2]
+    v_normal_rr = v1_rr * normal_direction[1] + v2_rr * normal_direction[2]
+
+    norm_ = norm(normal_direction)
+
+    a_ll = sqrt(equations.gravity * h_ll) * norm_
+    a_rr = sqrt(equations.gravity * h_rr) * norm_
+
+    λ_min = min(v_normal_ll - a_ll, v_normal_rr - a_rr, zero(eltype(u_ll)))
+    λ_max = max(v_normal_ll + a_ll, v_normal_rr + a_rr, zero(eltype(u_ll)))
+
+    return λ_min, λ_max
+end
+
 # More refined estimates for minimum and maximum wave speeds for HLL-type fluxes
 @inline function min_max_speed_davis(u_ll, u_rr, orientation::Integer,
                                      equations::ShallowWaterEquations2D)
@@ -1008,9 +1255,20 @@ end
 end
 
 # Calculate the error for the "lake-at-rest" test case where H = h+b should
-# be a constant value over time
+# be a constant value over time. Note, assumes there is a single reference
+# water height `H0` with which to compare.
+#
+# TODO: TrixiShallowWater: where should `threshold_limiter` live? May need
+# to modify or have different versions of the `lake_at_rest_error` function
 @inline function lake_at_rest_error(u, equations::ShallowWaterEquations2D)
     h, _, _, b = u
-    return abs(equations.H0 - (h + b))
+
+    # For well-balancedness testing with possible wet/dry regions the reference
+    # water height `H0` accounts for the possibility that the bottom topography
+    # can emerge out of the water as well as for the threshold offset to avoid
+    # division by a "hard" zero water heights as well.
+    H0_wet_dry = max(equations.H0, b + equations.threshold_limiter)
+
+    return abs(H0_wet_dry - (h + b))
 end
 end # @muladd
diff --git a/src/equations/shallow_water_two_layer_1d.jl b/src/equations/shallow_water_two_layer_1d.jl
index e126eec7c25..4b64481cca3 100644
--- a/src/equations/shallow_water_two_layer_1d.jl
+++ b/src/equations/shallow_water_two_layer_1d.jl
@@ -5,6 +5,8 @@
 @muladd begin
 #! format: noindent
 
+# TODO: TrixiShallowWater: 1D two layer equations should move to new package
+
 @doc raw"""
     ShallowWaterTwoLayerEquations1D(gravity, H0, rho_upper, rho_lower)
 
diff --git a/src/equations/shallow_water_two_layer_2d.jl b/src/equations/shallow_water_two_layer_2d.jl
index a54831c711f..87249e91948 100644
--- a/src/equations/shallow_water_two_layer_2d.jl
+++ b/src/equations/shallow_water_two_layer_2d.jl
@@ -5,48 +5,50 @@
 @muladd begin
 #! format: noindent
 
+# TODO: TrixiShallowWater: 2D two layer equations should move to new package
+
 @doc raw"""
     ShallowWaterTwoLayerEquations2D(gravity, H0, rho_upper, rho_lower)
 
 Two-Layer Shallow water equations (2LSWE) in two space dimension. The equations are given by
 ```math
 \begin{alignat*}{8}
-&\frac{\partial}{\partial t}h_{upper}        
+&\frac{\partial}{\partial t}h_{upper}
 &&+ \frac{\partial}{\partial x}\left(h_{upper} v_{1,upper}\right)
-&&+ \frac{\partial}{\partial y}\left(h_{upper} v_{2,upper}\right)  \quad 
+&&+ \frac{\partial}{\partial y}\left(h_{upper} v_{2,upper}\right)  \quad
 &&= \quad 0 \\
-&\frac{\partial}{\partial t}\left(h_{upper} v_{1,upper}\right)  
-&&+ \frac{\partial}{\partial x}\left(h_{upper} v_{1,upper}^2 + \frac{gh_{upper}^2}{2}\right) 
-&&+ \frac{\partial}{\partial y}\left(h_{upper} v_{1,upper} v_{2,upper}\right) \quad 
+&\frac{\partial}{\partial t}\left(h_{upper} v_{1,upper}\right)
+&&+ \frac{\partial}{\partial x}\left(h_{upper} v_{1,upper}^2 + \frac{gh_{upper}^2}{2}\right)
+&&+ \frac{\partial}{\partial y}\left(h_{upper} v_{1,upper} v_{2,upper}\right) \quad
 &&= -gh_{upper}\frac{\partial}{\partial x}\left(b+h_{lower}\right) \\
-&\frac{\partial}{\partial t}\left(h_{upper} v_{2,upper}\right) 
-&&+ \frac{\partial}{\partial x}\left(h_{upper} v_{1,upper} v_{2,upper}\right) 
-&&+ \frac{\partial}{\partial y}\left(h_{upper} v_{2,upper}^2 + \frac{gh_{upper}^2}{2}\right) 
+&\frac{\partial}{\partial t}\left(h_{upper} v_{2,upper}\right)
+&&+ \frac{\partial}{\partial x}\left(h_{upper} v_{1,upper} v_{2,upper}\right)
+&&+ \frac{\partial}{\partial y}\left(h_{upper} v_{2,upper}^2 + \frac{gh_{upper}^2}{2}\right)
 &&= -gh_{upper}\frac{\partial}{\partial y}\left(b+h_{lower}\right)\\
-&\frac{\partial}{\partial t}h_{lower}  
-&&+ \frac{\partial}{\partial x}\left(h_{lower} v_{1,lower}\right) 
-&&+ \frac{\partial}{\partial y}\left(h_{lower} v_{2,lower}\right) 
+&\frac{\partial}{\partial t}h_{lower}
+&&+ \frac{\partial}{\partial x}\left(h_{lower} v_{1,lower}\right)
+&&+ \frac{\partial}{\partial y}\left(h_{lower} v_{2,lower}\right)
 &&= \quad 0 \\
-&\frac{\partial}{\partial t}\left(h_{lower} v_{1,lower}\right) 
-&&+ \frac{\partial}{\partial x}\left(h_{lower} v_{1,lower}^2 + \frac{gh_{lower}^2}{2}\right) 
-&&+ \frac{\partial}{\partial y}\left(h_{lower} v_{1,lower} v_{2,lower}\right) 
+&\frac{\partial}{\partial t}\left(h_{lower} v_{1,lower}\right)
+&&+ \frac{\partial}{\partial x}\left(h_{lower} v_{1,lower}^2 + \frac{gh_{lower}^2}{2}\right)
+&&+ \frac{\partial}{\partial y}\left(h_{lower} v_{1,lower} v_{2,lower}\right)
 &&= -gh_{lower}\frac{\partial}{\partial x}\left(b+\frac{\rho_{upper}}{\rho_{lower}} h_{upper}\right)\\
-&\frac{\partial}{\partial t}\left(h_{lower} v_{2,lower}\right)  
-&&+ \frac{\partial}{\partial x}\left(h_{lower} v_{1,lower} v_{2,lower}\right) 
-&&+ \frac{\partial}{\partial y}\left(h_{lower} v_{2,lower}^2 + \frac{gh_{lower}^2}{2}\right) 
+&\frac{\partial}{\partial t}\left(h_{lower} v_{2,lower}\right)
+&&+ \frac{\partial}{\partial x}\left(h_{lower} v_{1,lower} v_{2,lower}\right)
+&&+ \frac{\partial}{\partial y}\left(h_{lower} v_{2,lower}^2 + \frac{gh_{lower}^2}{2}\right)
 &&= -gh_{lower}\frac{\partial}{\partial y}\left(b+\frac{\rho_{upper}}{\rho_{lower}} h_{upper}\right)
 \end{alignat*}
 ```
-The unknown quantities of the 2LSWE are the water heights of the lower layer ``h_{lower}`` and the 
-upper 
+The unknown quantities of the 2LSWE are the water heights of the lower layer ``h_{lower}`` and the
+upper
 layer ``h_{upper}`` and the respective velocities in x-direction ``v_{1,lower}`` and ``v_{1,upper}`` and in y-direction
-``v_{2,lower}`` and ``v_{2,upper}``. The gravitational constant is denoted by `g`, the layer densitites by 
-``\rho_{upper}``and ``\rho_{lower}`` and the (possibly) variable bottom topography function by ``b(x)``. 
-Conservative variable water height ``h_{lower}`` is measured from the bottom topography ``b`` and ``h_{upper}`` 
-relative to ``h_{lower}``, therefore one also defines the total water heights as ``H_{lower} = h_{lower} + b`` and 
+``v_{2,lower}`` and ``v_{2,upper}``. The gravitational constant is denoted by `g`, the layer densitites by
+``\rho_{upper}``and ``\rho_{lower}`` and the (possibly) variable bottom topography function by ``b(x)``.
+Conservative variable water height ``h_{lower}`` is measured from the bottom topography ``b`` and ``h_{upper}``
+relative to ``h_{lower}``, therefore one also defines the total water heights as ``H_{lower} = h_{lower} + b`` and
 ``H_{upper} = h_{upper} + h_{lower} + b``.
 
-The densities must be chosen such that ``\rho_{upper} < \rho_{lower}``, to make sure that the heavier fluid 
+The densities must be chosen such that ``\rho_{upper} < \rho_{lower}``, to make sure that the heavier fluid
 ``\rho_{lower}`` is in the bottom layer and the lighter fluid ``\rho_{upper}`` in the upper layer.
 
 The additional quantity ``H_0`` is also available to store a reference value for the total water
@@ -55,13 +57,13 @@ height that is useful to set initial conditions or test the "lake-at-rest" well-
 The bottom topography function ``b(x)`` is set inside the initial condition routine
 for a particular problem setup.
 
-In addition to the unknowns, Trixi currently stores the bottom topography values at the 
-approximation points despite being fixed in time. This is done for convenience of computing the 
-bottom topography gradients on the fly during the approximation as well as computing auxiliary 
+In addition to the unknowns, Trixi currently stores the bottom topography values at the
+approximation points despite being fixed in time. This is done for convenience of computing the
+bottom topography gradients on the fly during the approximation as well as computing auxiliary
 quantities like the total water height ``H`` or the entropy variables.
 This affects the implementation and use of these equations in various ways:
 * The flux values corresponding to the bottom topography must be zero.
-* The bottom topography values must be included when defining initial conditions, boundary 
+* The bottom topography values must be included when defining initial conditions, boundary
   conditions or source terms.
 * [`AnalysisCallback`](@ref) analyzes this variable.
 * Trixi's visualization tools will visualize the bottom topography by default.
@@ -113,7 +115,7 @@ end
     initial_condition_convergence_test(x, t, equations::ShallowWaterTwoLayerEquations2D)
 
 A smooth initial condition used for convergence tests in combination with
-[`source_terms_convergence_test`](@ref). Constants must be set to ``rho_{upper} = 0.9``, 
+[`source_terms_convergence_test`](@ref). Constants must be set to ``rho_{upper} = 0.9``,
 ``rho_{lower} = 1.0``, ``g = 10.0``.
 """
 function initial_condition_convergence_test(x, t,
@@ -141,7 +143,7 @@ Source terms used for convergence tests in combination with
 """
 @inline function source_terms_convergence_test(u, x, t,
                                                equations::ShallowWaterTwoLayerEquations2D)
-    # Same settings as in `initial_condition_convergence_test`. 
+    # Same settings as in `initial_condition_convergence_test`.
     # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]^2]
     ω = 2.0 * pi * sqrt(2.0)
 
@@ -325,7 +327,7 @@ end
 
 Non-symmetric two-point volume flux discretizing the nonconservative (source) term
 that contains the gradient of the bottom topography [`ShallowWaterTwoLayerEquations2D`](@ref) and an
-additional term that couples the momentum of both layers. This is a slightly modified version 
+additional term that couples the momentum of both layers. This is a slightly modified version
 to account for the additional source term compared to the standard SWE described in the paper.
 
 Further details are available in the paper:
@@ -345,7 +347,7 @@ Further details are available in the paper:
     z = zero(eltype(u_ll))
 
     # Bottom gradient nonconservative term: (0, g*h_upper*(b + h_lower)_x, g*h_upper*(b + h_lower)_y ,
-    #                                        0, g*h_lower*(b + r*h_upper)_x, 
+    #                                        0, g*h_lower*(b + r*h_upper)_x,
     #                                        g*h_lower*(b + r*h_upper)_y, 0)
     if orientation == 1
         f = SVector(z,
@@ -397,8 +399,8 @@ end
 !!! warning "Experimental code"
     This numerical flux is experimental and may change in any future release.
 
-Non-symmetric two-point surface flux discretizing the nonconservative (source) term that contains 
-the gradients of the bottom topography and an additional term that couples the momentum of both 
+Non-symmetric two-point surface flux discretizing the nonconservative (source) term that contains
+the gradients of the bottom topography and an additional term that couples the momentum of both
 layers [`ShallowWaterTwoLayerEquations2D`](@ref).
 
 Further details are available in the paper:
@@ -506,13 +508,13 @@ end
     flux_fjordholm_etal(u_ll, u_rr, orientation,
                         equations::ShallowWaterTwoLayerEquations2D)
 
-Total energy conservative (mathematical entropy for two-layer shallow water equations). When the 
-bottom topography is nonzero this should only be used as a surface flux otherwise the scheme will 
+Total energy conservative (mathematical entropy for two-layer shallow water equations). When the
+bottom topography is nonzero this should only be used as a surface flux otherwise the scheme will
 not be well-balanced. For well-balancedness in the volume flux use [`flux_wintermeyer_etal`](@ref).
 
 Details are available in Eq. (4.1) in the paper:
 - Ulrik S. Fjordholm, Siddhartha Mishra and Eitan Tadmor (2011)
-  Well-balanced and energy stable schemes for the shallow water equations with discontinuous 
+  Well-balanced and energy stable schemes for the shallow water equations with discontinuous
   topography [DOI: 10.1016/j.jcp.2011.03.042](https://doi.org/10.1016/j.jcp.2011.03.042)
 and the application to two layers is shown in the paper:
 - Ulrik Skre Fjordholm (2012)
@@ -606,11 +608,11 @@ end
 """
     flux_wintermeyer_etal(u_ll, u_rr, orientation,
                           equations::ShallowWaterTwoLayerEquations2D)
-                          
+
 Total energy conservative (mathematical entropy for two-layer shallow water equations) split form.
 When the bottom topography is nonzero this scheme will be well-balanced when used as a `volume_flux`.
 The `surface_flux` should still use, e.g., [`flux_fjordholm_etal`](@ref). To obtain the flux for the
-two-layer shallow water equations the flux that is described in the paper for the normal shallow 
+two-layer shallow water equations the flux that is described in the paper for the normal shallow
 water equations is used within each layer.
 
 Further details are available in Theorem 1 of the paper:
@@ -696,9 +698,9 @@ end
     flux_es_fjordholm_etal(u_ll, u_rr, orientation_or_normal_direction,
                            equations::ShallowWaterTwoLayerEquations1D)
 
-Entropy stable surface flux for the two-layer shallow water equations. Uses the entropy conservative 
+Entropy stable surface flux for the two-layer shallow water equations. Uses the entropy conservative
 [`flux_fjordholm_etal`](@ref) and adds a Lax-Friedrichs type dissipation dependent on the jump of entropy
-variables. 
+variables.
 
 Further details are available in the paper:
 - Ulrik Skre Fjordholm (2012)
@@ -723,7 +725,7 @@ formulation.
     q_rr = cons2entropy(u_rr, equations)
     q_ll = cons2entropy(u_ll, equations)
 
-    # Average values from left and right 
+    # Average values from left and right
     u_avg = (u_ll + u_rr) / 2
 
     # Introduce variables for better readability
@@ -791,10 +793,10 @@ formulation.
 end
 
 # Calculate approximation for maximum wave speed for local Lax-Friedrichs-type dissipation as the
-# maximum velocity magnitude plus the maximum speed of sound. This function uses approximate 
-# eigenvalues using the speed of the barotropic mode as there is no simple way to calculate them 
-# analytically. 
-# 
+# maximum velocity magnitude plus the maximum speed of sound. This function uses approximate
+# eigenvalues using the speed of the barotropic mode as there is no simple way to calculate them
+# analytically.
+#
 # A good overview of the derivation is given in:
 # -  Jonas Nycander, Andrew McC. Hogg, Leela M. Frankcombe (2008)
 #    Open boundary conditions for nonlinear channel Flows
@@ -914,7 +916,7 @@ end
 
 # Convert conservative variables to entropy variables
 # Note, only the first four are the entropy variables, the fifth entry still just carries the bottom
-# topography values for convenience. 
+# topography values for convenience.
 # In contrast to general usage the entropy variables are denoted with q instead of w, because w is
 # already used for velocity in y-Direction
 @inline function cons2entropy(u, equations::ShallowWaterTwoLayerEquations2D)
diff --git a/src/solvers/dgsem_tree/indicators.jl b/src/solvers/dgsem_tree/indicators.jl
index b8f8a796f2b..4b83e9c1a9e 100644
--- a/src/solvers/dgsem_tree/indicators.jl
+++ b/src/solvers/dgsem_tree/indicators.jl
@@ -92,6 +92,77 @@ end
 
 function Base.show(io::IO, ::MIME"text/plain", indicator::IndicatorHennemannGassner)
     @nospecialize indicator # reduce precompilation time
+    setup = [
+        "indicator variable" => indicator.variable,
+        "max. α" => indicator.alpha_max,
+        "min. α" => indicator.alpha_min,
+        "smooth α" => (indicator.alpha_smooth ? "yes" : "no"),
+    ]
+    summary_box(io, "IndicatorHennemannGassner", setup)
+end
+
+# TODO: TrixiShallowWater: move the new indicator and all associated routines to the new package
+"""
+    IndicatorHennemannGassnerShallowWater(equations::AbstractEquations, basis;
+                                          alpha_max=0.5,
+                                          alpha_min=0.001,
+                                          alpha_smooth=true,
+                                          variable)
+
+Modified version of the [`IndicatorHennemannGassner`](@ref)
+indicator used for shock-capturing for shallow water equations. After
+the element-wise values for the blending factors are computed an additional check
+is made to see if the element is partially wet. In this case, partially wet elements
+are set to use the pure finite volume scheme that is guaranteed to be well-balanced
+for this wet/dry transition state of the flow regime.
+
+See also [`VolumeIntegralShockCapturingHG`](@ref).
+
+## References
+
+- Hennemann, Gassner (2020)
+  "A provably entropy stable subcell shock capturing approach for high order split form DG"
+  [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
+"""
+struct IndicatorHennemannGassnerShallowWater{RealT <: Real, Variable, Cache} <:
+       AbstractIndicator
+    alpha_max::RealT
+    alpha_min::RealT
+    alpha_smooth::Bool
+    variable::Variable
+    cache::Cache
+end
+
+# this method is used when the indicator is constructed as for shock-capturing volume integrals
+# of the shallow water equations
+# It modifies the shock-capturing indicator to use full FV method in dry cells
+function IndicatorHennemannGassnerShallowWater(equations::AbstractShallowWaterEquations,
+                                               basis;
+                                               alpha_max = 0.5,
+                                               alpha_min = 0.001,
+                                               alpha_smooth = true,
+                                               variable)
+    alpha_max, alpha_min = promote(alpha_max, alpha_min)
+    cache = create_cache(IndicatorHennemannGassner, equations, basis)
+    IndicatorHennemannGassnerShallowWater{typeof(alpha_max), typeof(variable),
+                                          typeof(cache)}(alpha_max, alpha_min,
+                                                         alpha_smooth, variable, cache)
+end
+
+function Base.show(io::IO, indicator::IndicatorHennemannGassnerShallowWater)
+    @nospecialize indicator # reduce precompilation time
+
+    print(io, "IndicatorHennemannGassnerShallowWater(")
+    print(io, indicator.variable)
+    print(io, ", alpha_max=", indicator.alpha_max)
+    print(io, ", alpha_min=", indicator.alpha_min)
+    print(io, ", alpha_smooth=", indicator.alpha_smooth)
+    print(io, ")")
+end
+
+function Base.show(io::IO, ::MIME"text/plain",
+                   indicator::IndicatorHennemannGassnerShallowWater)
+    @nospecialize indicator # reduce precompilation time
 
     if get(io, :compact, false)
         show(io, indicator)
@@ -102,7 +173,7 @@ function Base.show(io::IO, ::MIME"text/plain", indicator::IndicatorHennemannGass
             "min. α" => indicator.alpha_min,
             "smooth α" => (indicator.alpha_smooth ? "yes" : "no"),
         ]
-        summary_box(io, "IndicatorHennemannGassner", setup)
+        summary_box(io, "IndicatorHennemannGassnerShallowWater", setup)
     end
 end
 
diff --git a/src/solvers/dgsem_tree/indicators_1d.jl b/src/solvers/dgsem_tree/indicators_1d.jl
index e722584bb2e..8b57348861c 100644
--- a/src/solvers/dgsem_tree/indicators_1d.jl
+++ b/src/solvers/dgsem_tree/indicators_1d.jl
@@ -24,6 +24,115 @@ function create_cache(typ::Type{IndicatorHennemannGassner}, mesh,
     create_cache(typ, equations, dg.basis)
 end
 
+# Modified indicator for ShallowWaterEquations1D to apply full FV method on cells
+# containing some "dry" LGL nodes. That is, if an element is partially "wet" then it becomes a
+# full FV element.
+#
+# TODO: TrixiShallowWater: move new indicator type
+function (indicator_hg::IndicatorHennemannGassnerShallowWater)(u::AbstractArray{<:Any, 3
+                                                                                },
+                                                               mesh,
+                                                               equations::ShallowWaterEquations1D,
+                                                               dg::DGSEM, cache;
+                                                               kwargs...)
+    @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg
+    @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded = indicator_hg.cache
+    # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
+    #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
+    #       or just `resize!` whenever we call the relevant methods as we do now?
+    resize!(alpha, nelements(dg, cache))
+    if alpha_smooth
+        resize!(alpha_tmp, nelements(dg, cache))
+    end
+
+    # magic parameters
+    threshold = 0.5 * 10^(-1.8 * (nnodes(dg))^0.25)
+    parameter_s = log((1 - 0.0001) / 0.0001)
+
+    # If the water height `h` at one LGL node is lower than `threshold_partially_wet`
+    # the indicator sets the element-wise blending factor alpha[element] = 1
+    # via the local variable `indicator_wet`. In turn, this ensures that a pure
+    # FV method is used in partially wet cells and guarantees the well-balanced property.
+    #
+    # Hard-coded cut-off value of `threshold_partially_wet = 1e-4` was determined through many numerical experiments.
+    # Overall idea is to increase robustness when computing the velocity on (nearly) dry cells which
+    # could be "dangerous" due to division of conservative variables, e.g., v = hv / h.
+    # Here, the impact of the threshold on the number of cells being updated with FV is not that
+    # significant. However, its impact on the robustness is very significant.
+    # The value can be seen as a trade-off between accuracy and stability.
+    # Well-balancedness of the scheme on partially wet cells with hydrostatic reconstruction
+    # can only be proven for the FV method (see Chen and Noelle).
+    # Therefore we set alpha to one regardless of its given maximum value.
+    threshold_partially_wet = 1e-4
+
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
+        modal = modal_threaded[Threads.threadid()]
+
+        # (Re-)set dummy variable for alpha_dry
+        indicator_wet = 1
+
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, element)
+            h, _, _ = u_local
+
+            if h <= threshold_partially_wet
+                indicator_wet = 0
+            end
+
+            indicator[i] = indicator_hg.variable(u_local, equations)
+        end
+
+        # Convert to modal representation
+        multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre,
+                                       indicator)
+
+        # Calculate total energies for all modes, without highest, without two highest
+        total_energy = zero(eltype(modal))
+        for i in 1:nnodes(dg)
+            total_energy += modal[i]^2
+        end
+        total_energy_clip1 = zero(eltype(modal))
+        for i in 1:(nnodes(dg) - 1)
+            total_energy_clip1 += modal[i]^2
+        end
+        total_energy_clip2 = zero(eltype(modal))
+        for i in 1:(nnodes(dg) - 2)
+            total_energy_clip2 += modal[i]^2
+        end
+
+        # Calculate energy in higher modes
+        energy = max((total_energy - total_energy_clip1) / total_energy,
+                     (total_energy_clip1 - total_energy_clip2) / total_energy_clip1)
+
+        alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
+
+        # Take care of the case close to pure DG
+        if alpha_element < alpha_min
+            alpha_element = zero(alpha_element)
+        end
+
+        # Take care of the case close to pure FV
+        if alpha_element > 1 - alpha_min
+            alpha_element = one(alpha_element)
+        end
+
+        # Clip the maximum amount of FV allowed or set to one depending on indicator_wet
+        if indicator_wet == 0
+            alpha[element] = 1
+        else # Element is not defined as dry but wet
+            alpha[element] = min(alpha_max, alpha_element)
+        end
+    end
+
+    if alpha_smooth
+        apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
+    end
+
+    return alpha
+end
+
 # Use this function barrier and unpack inside to avoid passing closures to Polyester.jl
 # with @batch (@threaded).
 # Otherwise, @threaded does not work here with Julia ARM on macOS.
diff --git a/src/solvers/dgsem_tree/indicators_2d.jl b/src/solvers/dgsem_tree/indicators_2d.jl
index 085cb71ad0c..f7c78547174 100644
--- a/src/solvers/dgsem_tree/indicators_2d.jl
+++ b/src/solvers/dgsem_tree/indicators_2d.jl
@@ -28,6 +28,116 @@ function create_cache(typ::Type{IndicatorHennemannGassner}, mesh,
     create_cache(typ, equations, dg.basis)
 end
 
+# Modified indicator for ShallowWaterEquations2D to apply full FV method on cells
+# containing some "dry" LGL nodes. That is, if an element is partially "wet" then it becomes a
+# full FV element.
+#
+# TODO: TrixiShallowWater: move new indicator type
+function (indicator_hg::IndicatorHennemannGassnerShallowWater)(u::AbstractArray{<:Any, 4
+                                                                                },
+                                                               mesh,
+                                                               equations::ShallowWaterEquations2D,
+                                                               dg::DGSEM, cache;
+                                                               kwargs...)
+    @unpack alpha_max, alpha_min, alpha_smooth, variable = indicator_hg
+    @unpack alpha, alpha_tmp, indicator_threaded, modal_threaded, modal_tmp1_threaded = indicator_hg.cache
+    # TODO: Taal refactor, when to `resize!` stuff changed possibly by AMR?
+    #       Shall we implement `resize!(semi::AbstractSemidiscretization, new_size)`
+    #       or just `resize!` whenever we call the relevant methods as we do now?
+    resize!(alpha, nelements(dg, cache))
+    if alpha_smooth
+        resize!(alpha_tmp, nelements(dg, cache))
+    end
+
+    # magic parameters
+    threshold = 0.5 * 10^(-1.8 * (nnodes(dg))^0.25)
+    parameter_s = log((1 - 0.0001) / 0.0001)
+
+    # If the water height `h` at one LGL node is lower than `threshold_partially_wet`
+    # the indicator sets the element-wise blending factor alpha[element] = 1
+    # via the local variable `indicator_wet`. In turn, this ensures that a pure
+    # FV method is used in partially wet cells and guarantees the well-balanced property.
+    #
+    # Hard-coded cut-off value of `threshold_partially_wet = 1e-4` was determined through many numerical experiments.
+    # Overall idea is to increase robustness when computing the velocity on (nearly) dry cells which
+    # could be "dangerous" due to division of conservative variables, e.g., v1 = hv1 / h.
+    # Here, the impact of the threshold on the number of cells being updated with FV is not that
+    # significant. However, its impact on the robustness is very significant.
+    # The value can be seen as a trade-off between accuracy and stability.
+    # Well-balancedness of the scheme on partially wet cells with hydrostatic reconstruction
+    # can only be proven for the FV method (see Chen and Noelle).
+    # Therefore we set alpha to be one regardless of its given value from the modal indicator.
+    threshold_partially_wet = 1e-4
+
+    @threaded for element in eachelement(dg, cache)
+        indicator = indicator_threaded[Threads.threadid()]
+        modal = modal_threaded[Threads.threadid()]
+        modal_tmp1 = modal_tmp1_threaded[Threads.threadid()]
+
+        # (Re-)set dummy variable for alpha_dry
+        indicator_wet = 1
+
+        # Calculate indicator variables at Gauss-Lobatto nodes
+        for j in eachnode(dg), i in eachnode(dg)
+            u_local = get_node_vars(u, equations, dg, i, j, element)
+            h, _, _, _ = u_local
+
+            if h <= threshold_partially_wet
+                indicator_wet = 0
+            end
+
+            indicator[i, j] = indicator_hg.variable(u_local, equations)
+        end
+
+        # Convert to modal representation
+        multiply_scalar_dimensionwise!(modal, dg.basis.inverse_vandermonde_legendre,
+                                       indicator, modal_tmp1)
+
+        # Calculate total energies for all modes, without highest, without two highest
+        total_energy = zero(eltype(modal))
+        for j in 1:nnodes(dg), i in 1:nnodes(dg)
+            total_energy += modal[i, j]^2
+        end
+        total_energy_clip1 = zero(eltype(modal))
+        for j in 1:(nnodes(dg) - 1), i in 1:(nnodes(dg) - 1)
+            total_energy_clip1 += modal[i, j]^2
+        end
+        total_energy_clip2 = zero(eltype(modal))
+        for j in 1:(nnodes(dg) - 2), i in 1:(nnodes(dg) - 2)
+            total_energy_clip2 += modal[i, j]^2
+        end
+
+        # Calculate energy in higher modes
+        energy = max((total_energy - total_energy_clip1) / total_energy,
+                     (total_energy_clip1 - total_energy_clip2) / total_energy_clip1)
+
+        alpha_element = 1 / (1 + exp(-parameter_s / threshold * (energy - threshold)))
+
+        # Take care of the case close to pure DG
+        if alpha_element < alpha_min
+            alpha_element = zero(alpha_element)
+        end
+
+        # Take care of the case close to pure FV
+        if alpha_element > 1 - alpha_min
+            alpha_element = one(alpha_element)
+        end
+
+        # Clip the maximum amount of FV allowed or set to 1 depending on indicator_wet
+        if indicator_wet == 0
+            alpha[element] = 1
+        else # Element is not defined as dry but wet
+            alpha[element] = min(alpha_max, alpha_element)
+        end
+    end
+
+    if alpha_smooth
+        apply_smoothing!(mesh, alpha, alpha_tmp, dg, cache)
+    end
+
+    return alpha
+end
+
 # Use this function barrier and unpack inside to avoid passing closures to Polyester.jl
 # with @batch (@threaded).
 # Otherwise, @threaded does not work here with Julia ARM on macOS.
diff --git a/test/Project.toml b/test/Project.toml
index 7d386415227..cae1d4ff396 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -17,6 +17,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
diff --git a/test/test_structured_2d.jl b/test/test_structured_2d.jl
index 16fc72f0a46..75937ba82ad 100644
--- a/test/test_structured_2d.jl
+++ b/test/test_structured_2d.jl
@@ -1,5 +1,7 @@
 module TestExamplesStructuredMesh2D
 
+# TODO: TrixiShallowWater: move any wet/dry tests to new package
+
 using Test
 using Trixi
 
@@ -20,7 +22,7 @@ isdir(outdir) && rm(outdir, recursive=true)
   end
 
   @trixi_testset "elixir_advection_coupled.jl" begin
-    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_coupled.jl"),    
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_coupled.jl"),
       l2   = [7.816742843181738e-6, 7.816742843196112e-6],
       linf = [6.314906965543265e-5, 6.314906965410039e-5],
       coverage_override = (maxiters=10^5,))
@@ -270,6 +272,27 @@ isdir(outdir) && rm(outdir, recursive=true)
       tspan = (0.0, 0.25))
   end
 
+  @trixi_testset "elixir_shallowwater_well_balanced_wet_dry.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_well_balanced_wet_dry.jl"),
+      l2   = [0.019731646454942086, 1.0694532773278277e-14, 1.1969913383405568e-14, 0.0771517260037954],
+      linf = [0.4999999999998892, 6.067153702623552e-14, 4.4849667259339357e-14, 1.9999999999999993],
+      tspan = (0.0, 0.25))
+  end
+
+  @trixi_testset "elixir_shallowwater_conical_island.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_conical_island.jl"),
+      l2   = [0.04593154164306353, 0.1644534881916908, 0.16445348819169076, 0.0011537702354532122],
+      linf = [0.21100717610846442, 0.9501592344310412, 0.950159234431041, 0.021790250683516296],
+      tspan = (0.0, 0.025))
+  end
+
+  @trixi_testset "elixir_shallowwater_parabolic_bowl.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_parabolic_bowl.jl"),
+      l2   = [0.00015285369980313484, 1.9536806395943226e-5, 9.936906607758672e-5, 5.0686313334616055e-15],
+      linf = [0.003316119030459211, 0.0005075409427972817, 0.001986721761060583, 4.701794509287538e-14],
+      tspan = (0.0, 0.025), cells_per_dimension = (40, 40))
+  end
+
   @trixi_testset "elixir_mhd_ec_shockcapturing.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_ec_shockcapturing.jl"),
       l2   = [0.0364192725149364, 0.0426667193422069, 0.04261673001449095, 0.025884071405646924,
diff --git a/test/test_tree_1d_shallowwater.jl b/test/test_tree_1d_shallowwater.jl
index 1c3bac1fab6..cafa17edd4c 100644
--- a/test/test_tree_1d_shallowwater.jl
+++ b/test/test_tree_1d_shallowwater.jl
@@ -1,5 +1,7 @@
 module TestExamples1DShallowWater
 
+# TODO: TrixiShallowWater: move any wet/dry tests to new package
+
 using Test
 using Trixi
 
@@ -38,6 +40,13 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
       tspan = (0.0, 0.25))
   end
 
+  @trixi_testset "elixir_shallowwater_well_balanced_wet_dry.jl with FluxHydrostaticReconstruction" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_well_balanced_wet_dry.jl"),
+      l2   = [0.00965787167169024, 5.345454081916856e-14, 0.03857583749209928],
+      linf = [0.4999999999998892, 2.2447689894899726e-13, 1.9999999999999714],
+      tspan = (0.0, 0.25))
+  end
+
   @trixi_testset "elixir_shallowwater_source_terms.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_source_terms.jl"),
       l2   = [0.0022363707373868713, 0.01576799981934617, 4.436491725585346e-5],
@@ -88,6 +97,20 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
       linf = [1.1209754279344226, 1.3230788645853582, 0.8646939843534251],
       tspan = (0.0, 0.05))
   end
+
+  @trixi_testset "elixir_shallowwater_beach.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_beach.jl"),
+      l2   = [0.17979210479598923, 1.2377495706611434, 6.289818963361573e-8],
+      linf = [0.845938394800688, 3.3740800777086575, 4.4541473087633676e-7],
+      tspan = (0.0, 0.05))
+  end
+
+  @trixi_testset "elixir_shallowwater_parabolic_bowl.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_parabolic_bowl.jl"),
+      l2   = [8.965981683033589e-5, 1.8565707397810857e-5, 4.1043039226164336e-17],
+      linf = [0.00041080213807871235, 0.00014823261488938177, 2.220446049250313e-16],
+      tspan = (0.0, 0.05))
+  end
 end
 
 end # module
diff --git a/test/test_tree_1d_shallowwater_twolayer.jl b/test/test_tree_1d_shallowwater_twolayer.jl
index 0d8a83806f9..8372d0d4676 100644
--- a/test/test_tree_1d_shallowwater_twolayer.jl
+++ b/test/test_tree_1d_shallowwater_twolayer.jl
@@ -1,5 +1,7 @@
 module TestExamples1DShallowWaterTwoLayer
 
+# TODO: TrixiShallowWater: move two layer tests to new package
+
 using Test
 using Trixi
 
diff --git a/test/test_tree_2d_shallowwater.jl b/test/test_tree_2d_shallowwater.jl
index f465a177a67..7670d28f43a 100644
--- a/test/test_tree_2d_shallowwater.jl
+++ b/test/test_tree_2d_shallowwater.jl
@@ -1,5 +1,7 @@
 module TestExamples2DShallowWater
 
+# TODO: TrixiShallowWater: move any wet/dry tests to new package
+
 using Test
 using Trixi
 
@@ -37,6 +39,13 @@ EXAMPLES_DIR = joinpath(examples_dir(), "tree_2d_dgsem")
       tspan = (0.0, 0.25))
   end
 
+  @trixi_testset "elixir_shallowwater_well_balanced_wet_dry.jl with FluxHydrostaticReconstruction" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_well_balanced_wet_dry.jl"),
+      l2   = [0.030186039395610056, 2.513287752536758e-14, 1.3631397744897607e-16, 0.10911781485920438],
+      linf = [0.49999999999993505, 5.5278950497971455e-14, 7.462550826772548e-16, 2.0],
+      tspan = (0.0, 0.25))
+  end
+
   @trixi_testset "elixir_shallowwater_source_terms.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_source_terms.jl"),
       l2   = [0.001868474306068482, 0.01731687445878443, 0.017649083171490863, 6.274146767717023e-5],
@@ -57,6 +66,21 @@ EXAMPLES_DIR = joinpath(examples_dir(), "tree_2d_dgsem")
       linf = [0.015156105797771602, 0.07964811135780492, 0.0839787097210376, 0.0001819675955490041],
       tspan = (0.0, 0.025), surface_flux=(flux_hll, flux_nonconservative_fjordholm_etal))
   end
+
+  @trixi_testset "elixir_shallowwater_conical_island.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_conical_island.jl"),
+        l2   = [0.0459315416430658, 0.1644534881916991, 0.16445348819169914, 0.0011537702354532694],
+        linf = [0.21100717610846464, 0.9501592344310412, 0.9501592344310417, 0.021790250683516282],
+        tspan = (0.0, 0.025))
+  end
+
+  @trixi_testset "elixir_shallowwater_parabolic_bowl.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_parabolic_bowl.jl"),
+      l2   = [0.00025345501281482687, 4.4525120338817177e-5, 0.00015991819160294247, 7.750412064917294e-15],
+      linf = [0.004664246019836723, 0.0004972780116736669, 0.0028735707270457628, 6.866729407306593e-14],
+      tspan = (0.0, 0.025),
+      basis = LobattoLegendreBasis(3))
+  end
 end
 
 end # module
diff --git a/test/test_tree_2d_shallowwater_twolayer.jl b/test/test_tree_2d_shallowwater_twolayer.jl
index 4bb45064714..7ad5b0f7316 100644
--- a/test/test_tree_2d_shallowwater_twolayer.jl
+++ b/test/test_tree_2d_shallowwater_twolayer.jl
@@ -1,5 +1,7 @@
 module TestExamples2DShallowWaterTwoLayer
 
+# TODO: TrixiShallowWater: move two layer tests to new package
+
 using Test
 using Trixi
 
@@ -19,10 +21,10 @@ EXAMPLES_DIR = joinpath(examples_dir(), "tree_2d_dgsem")
 
   @trixi_testset "elixir_shallowwater_twolayer_convergence.jl with flux_es_fjordholm_etal" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_twolayer_convergence.jl"),
-    l2   = [0.00024709443131137236, 0.0019215286339769443, 0.0023833298173254447, 
+    l2   = [0.00024709443131137236, 0.0019215286339769443, 0.0023833298173254447,
           0.00021258247976270914, 0.0011299428031136195, 0.0009191313765262401,
-          8.873630921431545e-6], 
-    linf = [0.0016099763244645793, 0.007659242165565017, 0.009123320235427057, 
+          8.873630921431545e-6],
+    linf = [0.0016099763244645793, 0.007659242165565017, 0.009123320235427057,
             0.0013496983982568267, 0.0035573687287770994, 0.00296823235874899,
             3.361991620143279e-5],
     surface_flux = (flux_es_fjordholm_etal, flux_nonconservative_fjordholm_etal),
@@ -31,19 +33,19 @@ EXAMPLES_DIR = joinpath(examples_dir(), "tree_2d_dgsem")
 
   @trixi_testset "elixir_shallowwater_twolayer_well_balanced.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_twolayer_well_balanced.jl"),
-    l2   = [3.2935164267930016e-16, 4.6800825611195103e-17, 4.843057532147818e-17, 
-          0.0030769233188015013, 1.4809161150389857e-16, 1.509071695038043e-16, 
+    l2   = [3.2935164267930016e-16, 4.6800825611195103e-17, 4.843057532147818e-17,
+          0.0030769233188015013, 1.4809161150389857e-16, 1.509071695038043e-16,
           0.0030769233188014935],
-    linf = [2.248201624865942e-15, 2.346382070278936e-16, 2.208565017494899e-16, 
-            0.026474051138910493, 9.237568031609006e-16, 7.520758026187046e-16, 
+    linf = [2.248201624865942e-15, 2.346382070278936e-16, 2.208565017494899e-16,
+            0.026474051138910493, 9.237568031609006e-16, 7.520758026187046e-16,
             0.026474051138910267],
     tspan = (0.0, 0.25))
   end
 
   @trixi_testset "elixir_shallowwater_twolayer_well_balanced with flux_lax_friedrichs.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_twolayer_well_balanced.jl"),
-    l2    = [2.0525741072929735e-16, 6.000589392730905e-17, 6.102759428478984e-17, 
-             0.0030769233188014905, 1.8421386173122792e-16, 1.8473184927121752e-16, 
+    l2    = [2.0525741072929735e-16, 6.000589392730905e-17, 6.102759428478984e-17,
+             0.0030769233188014905, 1.8421386173122792e-16, 1.8473184927121752e-16,
              0.0030769233188014935],
     linf  = [7.355227538141662e-16, 2.960836949170518e-16, 4.2726562436938764e-16,
              0.02647405113891016, 1.038795478061861e-15, 1.0401789378532516e-15,
diff --git a/test/test_unit.jl b/test/test_unit.jl
index 2ce111b2bf4..e70a9be6a4a 100644
--- a/test/test_unit.jl
+++ b/test/test_unit.jl
@@ -402,6 +402,10 @@ isdir(outdir) && rm(outdir, recursive=true)
     indicator_hg = IndicatorHennemannGassner(1.0, 0.0, true, "variable", "cache")
     @test_nowarn show(stdout, indicator_hg)
 
+    # TODO: TrixiShallowWater: move unit test
+    indicator_hg_swe = IndicatorHennemannGassnerShallowWater(1.0, 0.0, true, "variable", "cache")
+    @test_nowarn show(stdout, indicator_hg_swe)
+
     indicator_loehner = IndicatorLöhner(1.0, "variable", (; cache=nothing))
     @test_nowarn show(stdout, indicator_loehner)
 
diff --git a/test/test_unstructured_2d.jl b/test/test_unstructured_2d.jl
index d4b0d150ca1..fbe88a2a0a3 100644
--- a/test/test_unstructured_2d.jl
+++ b/test/test_unstructured_2d.jl
@@ -1,5 +1,7 @@
 module TestExamplesUnstructuredMesh2D
 
+# TODO: TrixiShallowWater: move any wet/dry and two layer tests
+
 using Test
 using Trixi
 
@@ -134,6 +136,14 @@ isdir(outdir) && rm(outdir, recursive=true)
       tspan = (0.0, 0.025))
   end
 
+  @trixi_testset "elixir_shallowwater_source_terms.jl with flux_hll" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_source_terms.jl"),
+      l2   = [0.0011197139793938727, 0.015430259691311309, 0.017081031802719554, 5.089218476759981e-6],
+      linf = [0.014300809338967824, 0.12783372461224918, 0.17625472321993918, 2.6407324614341476e-5],
+      surface_flux=(flux_hll, flux_nonconservative_fjordholm_etal),
+      tspan = (0.0, 0.025))
+  end
+
   @trixi_testset "elixir_shallowwater_dirichlet.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_dirichlet.jl"),
       l2   = [1.1577518608940115e-5, 4.867189932537344e-13, 4.647273240470541e-13, 1.1577518608933468e-5],
@@ -155,6 +165,14 @@ isdir(outdir) && rm(outdir, recursive=true)
       tspan = (0.0, 0.25))
   end
 
+  @trixi_testset "elixir_shallowwater_three_mound_dam_break.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_three_mound_dam_break.jl"),
+      l2   = [0.0892957892027502, 0.30648836484407915, 2.28712547616214e-15, 0.0008778654298684622],
+      linf = [0.850329472915091, 2.330631694956507, 5.783660020252348e-14, 0.04326237921249021],
+      basis = LobattoLegendreBasis(3),
+      tspan = (0.0, 0.25))
+  end
+
   @trixi_testset "elixir_shallowwater_twolayer_convergence.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_twolayer_convergence.jl"),
       l2    = [0.0007953969898161991, 0.00882074628714633, 0.0024322572528892934,

From c97eb8c47a0a092ab73889c8ce7c838ba7864127 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Sat, 15 Jul 2023 06:51:23 +0200
Subject: [PATCH 082/163] set version to v0.5.33

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 4a289380850..9dd7ecd023f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.33-pre"
+version = "0.5.33"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 10e0fa93320cd8722d482a89a0e944955b0c23ee Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Sat, 15 Jul 2023 06:51:38 +0200
Subject: [PATCH 083/163] set development version to v0.5.34-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 9dd7ecd023f..6c3c7fa0208 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.33"
+version = "0.5.34-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 07981c3e50943a9bdb1a845f918a4e8831714338 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Sat, 15 Jul 2023 12:52:38 +0200
Subject: [PATCH 084/163] throw better error message with MPI for TreeMesh1D
 (#1569)

---
 src/meshes/tree_mesh.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/meshes/tree_mesh.jl b/src/meshes/tree_mesh.jl
index 34794ded852..93ba982bce9 100644
--- a/src/meshes/tree_mesh.jl
+++ b/src/meshes/tree_mesh.jl
@@ -125,9 +125,9 @@ function TreeMesh(coordinates_min::NTuple{NDIMS, Real},
 
     # TODO: MPI, create nice interface for a parallel tree/mesh
     if mpi_isparallel()
-        if mpi_isroot() && NDIMS == 3
+        if mpi_isroot() && NDIMS != 2
             println(stderr,
-                    "ERROR: TreeMesh3D does not support parallel execution with MPI")
+                    "ERROR: The TreeMesh supports parallel execution with MPI only in 2 dimensions")
             MPI.Abort(mpi_comm(), 1)
         end
         TreeType = ParallelTree{NDIMS}

From 932f43358acba90e3de7909c2bd18c8b630c66b9 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Sat, 15 Jul 2023 16:36:48 +0200
Subject: [PATCH 085/163] allow periodic FDSBP operators (#1570)

* enable fully periodic upwind SBP oeprators

* 2D and 3D tests

* comment on PeriodicFDSBP
---
 Project.toml                                  |  2 +-
 .../tree_1d_fdsbp/elixir_advection_upwind.jl  |  3 +-
 .../elixir_advection_upwind_periodic.jl       | 57 +++++++++++++++++++
 src/solvers/fdsbp_tree/fdsbp.jl               |  3 +
 src/solvers/fdsbp_tree/fdsbp_1d.jl            | 28 ++++++++-
 src/solvers/fdsbp_tree/fdsbp_2d.jl            | 28 ++++++++-
 src/solvers/fdsbp_tree/fdsbp_3d.jl            | 28 ++++++++-
 test/test_tree_1d_fdsbp.jl                    | 15 +++++
 test/test_tree_2d_fdsbp.jl                    | 18 ++++++
 test/test_tree_3d_fdsbp.jl                    | 23 +++++++-
 10 files changed, 196 insertions(+), 9 deletions(-)
 create mode 100644 examples/tree_1d_fdsbp/elixir_advection_upwind_periodic.jl

diff --git a/Project.toml b/Project.toml
index 6c3c7fa0208..a49cfb2e254 100644
--- a/Project.toml
+++ b/Project.toml
@@ -79,7 +79,7 @@ StaticArrayInterface = "1.4"
 StaticArrays = "1"
 StrideArrays = "0.1.18"
 StructArrays = "0.6"
-SummationByPartsOperators = "0.5.25"
+SummationByPartsOperators = "0.5.41"
 TimerOutputs = "0.5"
 Triangulate = "2.0"
 TriplotBase = "0.1"
diff --git a/examples/tree_1d_fdsbp/elixir_advection_upwind.jl b/examples/tree_1d_fdsbp/elixir_advection_upwind.jl
index 5c50e1a6c64..18dd818e3ca 100644
--- a/examples/tree_1d_fdsbp/elixir_advection_upwind.jl
+++ b/examples/tree_1d_fdsbp/elixir_advection_upwind.jl
@@ -27,7 +27,8 @@ coordinates_min = -1.0
 coordinates_max =  1.0
 mesh = TreeMesh(coordinates_min, coordinates_max,
                 initial_refinement_level = 4,
-                n_cells_max = 10_000)
+                n_cells_max = 10_000,
+                periodicity = true)
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_sin, solver)
 
diff --git a/examples/tree_1d_fdsbp/elixir_advection_upwind_periodic.jl b/examples/tree_1d_fdsbp/elixir_advection_upwind_periodic.jl
new file mode 100644
index 00000000000..3eb805095f4
--- /dev/null
+++ b/examples/tree_1d_fdsbp/elixir_advection_upwind_periodic.jl
@@ -0,0 +1,57 @@
+# !!! warning "Experimental implementation (upwind SBP)"
+#     This is an experimental feature and may change in future releases.
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linear scalar advection equation equation
+
+equations = LinearScalarAdvectionEquation1D(1.0)
+
+function initial_condition_sin(x, t, equation::LinearScalarAdvectionEquation1D)
+    return SVector(sinpi(x[1] - equations.advection_velocity[1] * t))
+end
+
+D_upw = upwind_operators(SummationByPartsOperators.periodic_derivative_operator,
+                         accuracy_order = 4,
+                         xmin = -1.0, xmax = 1.0,
+                         N = 64)
+flux_splitting = splitting_lax_friedrichs
+solver = FDSBP(D_upw,
+               surface_integral = SurfaceIntegralUpwind(flux_splitting),
+               volume_integral = VolumeIntegralUpwind(flux_splitting))
+
+coordinates_min = -1.0
+coordinates_max =  1.0
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level = 0,
+                n_cells_max = 10_000,
+                periodicity = true)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_sin, solver)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 2.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback)
+
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, RDPK3SpFSAL49(); abstol=1.0e-6, reltol=1.0e-6,
+            ode_default_options()..., callback=callbacks);
+summary_callback() # print the timer summary
diff --git a/src/solvers/fdsbp_tree/fdsbp.jl b/src/solvers/fdsbp_tree/fdsbp.jl
index cbb6fd16243..11b09c6df9c 100644
--- a/src/solvers/fdsbp_tree/fdsbp.jl
+++ b/src/solvers/fdsbp_tree/fdsbp.jl
@@ -27,6 +27,9 @@ The other arguments have the same meaning as in [`DG`](@ref) or [`DGSEM`](@ref).
 """
 const FDSBP = DG{Basis} where {Basis <: AbstractDerivativeOperator}
 
+# Internal abbreviation for easier-to-read dispatch (not exported)
+const PeriodicFDSBP = FDSBP{Basis} where {Basis <: AbstractPeriodicDerivativeOperator}
+
 function FDSBP(D_SBP::AbstractDerivativeOperator; surface_integral, volume_integral)
     # `nothing` is passed as `mortar`
     return DG(D_SBP, nothing, surface_integral, volume_integral)
diff --git a/src/solvers/fdsbp_tree/fdsbp_1d.jl b/src/solvers/fdsbp_tree/fdsbp_1d.jl
index c7712074940..0de0cff4851 100644
--- a/src/solvers/fdsbp_tree/fdsbp_1d.jl
+++ b/src/solvers/fdsbp_tree/fdsbp_1d.jl
@@ -165,6 +165,14 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{1},
     return nothing
 end
 
+# Periodic FDSBP operators need to use a single element without boundaries
+function calc_surface_integral!(du, u, mesh::TreeMesh1D,
+                                equations, surface_integral::SurfaceIntegralStrongForm,
+                                dg::PeriodicFDSBP, cache)
+    @assert nelements(dg, cache) == 1
+    return nothing
+end
+
 # Specialized interface flux computation because the upwind solver does
 # not require a standard numerical flux (Riemann solver). The flux splitting
 # already separates the solution information into right-traveling and
@@ -239,13 +247,25 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{1},
     return nothing
 end
 
+# Periodic FDSBP operators need to use a single element without boundaries
+function calc_surface_integral!(du, u, mesh::TreeMesh1D,
+                                equations, surface_integral::SurfaceIntegralUpwind,
+                                dg::PeriodicFDSBP, cache)
+    @assert nelements(dg, cache) == 1
+    return nothing
+end
+
 # AnalysisCallback
 
 function integrate_via_indices(func::Func, u,
                                mesh::TreeMesh{1}, equations,
                                dg::FDSBP, cache, args...; normalize = true) where {Func}
     # TODO: FD. This is rather inefficient right now and allocates...
-    weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
+    M = SummationByPartsOperators.mass_matrix(dg.basis)
+    if M isa UniformScaling
+        M = M(nnodes(dg))
+    end
+    weights = diag(M)
 
     # Initialize integral with zeros of the right shape
     integral = zero(func(u, 1, 1, equations, dg, args...))
@@ -271,7 +291,11 @@ function calc_error_norms(func, u, t, analyzer,
                           mesh::TreeMesh{1}, equations, initial_condition,
                           dg::FDSBP, cache, cache_analysis)
     # TODO: FD. This is rather inefficient right now and allocates...
-    weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
+    M = SummationByPartsOperators.mass_matrix(dg.basis)
+    if M isa UniformScaling
+        M = M(nnodes(dg))
+    end
+    weights = diag(M)
     @unpack node_coordinates = cache.elements
 
     # Set up data structures
diff --git a/src/solvers/fdsbp_tree/fdsbp_2d.jl b/src/solvers/fdsbp_tree/fdsbp_2d.jl
index 241e0d95342..beff605629a 100644
--- a/src/solvers/fdsbp_tree/fdsbp_2d.jl
+++ b/src/solvers/fdsbp_tree/fdsbp_2d.jl
@@ -201,6 +201,14 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{2},
     return nothing
 end
 
+# Periodic FDSBP operators need to use a single element without boundaries
+function calc_surface_integral!(du, u, mesh::TreeMesh2D,
+                                equations, surface_integral::SurfaceIntegralStrongForm,
+                                dg::PeriodicFDSBP, cache)
+    @assert nelements(dg, cache) == 1
+    return nothing
+end
+
 # Specialized interface flux computation because the upwind solver does
 # not require a standard numerical flux (Riemann solver). The flux splitting
 # already separates the solution information into right-traveling and
@@ -295,12 +303,24 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{2},
     return nothing
 end
 
+# Periodic FDSBP operators need to use a single element without boundaries
+function calc_surface_integral!(du, u, mesh::TreeMesh2D,
+                                equations, surface_integral::SurfaceIntegralUpwind,
+                                dg::PeriodicFDSBP, cache)
+    @assert nelements(dg, cache) == 1
+    return nothing
+end
+
 # AnalysisCallback
 function integrate_via_indices(func::Func, u,
                                mesh::TreeMesh{2}, equations,
                                dg::FDSBP, cache, args...; normalize = true) where {Func}
     # TODO: FD. This is rather inefficient right now and allocates...
-    weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
+    M = SummationByPartsOperators.mass_matrix(dg.basis)
+    if M isa UniformScaling
+        M = M(nnodes(dg))
+    end
+    weights = diag(M)
 
     # Initialize integral with zeros of the right shape
     integral = zero(func(u, 1, 1, 1, equations, dg, args...))
@@ -326,7 +346,11 @@ function calc_error_norms(func, u, t, analyzer,
                           mesh::TreeMesh{2}, equations, initial_condition,
                           dg::FDSBP, cache, cache_analysis)
     # TODO: FD. This is rather inefficient right now and allocates...
-    weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
+    M = SummationByPartsOperators.mass_matrix(dg.basis)
+    if M isa UniformScaling
+        M = M(nnodes(dg))
+    end
+    weights = diag(M)
     @unpack node_coordinates = cache.elements
 
     # Set up data structures
diff --git a/src/solvers/fdsbp_tree/fdsbp_3d.jl b/src/solvers/fdsbp_tree/fdsbp_3d.jl
index a4f69d3d481..0c3f18b6d6e 100644
--- a/src/solvers/fdsbp_tree/fdsbp_3d.jl
+++ b/src/solvers/fdsbp_tree/fdsbp_3d.jl
@@ -237,6 +237,14 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{3},
     return nothing
 end
 
+# Periodic FDSBP operators need to use a single element without boundaries
+function calc_surface_integral!(du, u, mesh::TreeMesh3D,
+                                equations, surface_integral::SurfaceIntegralStrongForm,
+                                dg::PeriodicFDSBP, cache)
+    @assert nelements(dg, cache) == 1
+    return nothing
+end
+
 # Specialized interface flux computation because the upwind solver does
 # not require a standard numerical flux (Riemann solver). The flux splitting
 # already separates the solution information into right-traveling and
@@ -346,13 +354,25 @@ function calc_surface_integral!(du, u, mesh::TreeMesh{3},
     return nothing
 end
 
+# Periodic FDSBP operators need to use a single element without boundaries
+function calc_surface_integral!(du, u, mesh::TreeMesh3D,
+                                equations, surface_integral::SurfaceIntegralUpwind,
+                                dg::PeriodicFDSBP, cache)
+    @assert nelements(dg, cache) == 1
+    return nothing
+end
+
 # AnalysisCallback
 
 function integrate_via_indices(func::Func, u,
                                mesh::TreeMesh{3}, equations,
                                dg::FDSBP, cache, args...; normalize = true) where {Func}
     # TODO: FD. This is rather inefficient right now and allocates...
-    weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
+    M = SummationByPartsOperators.mass_matrix(dg.basis)
+    if M isa UniformScaling
+        M = M(nnodes(dg))
+    end
+    weights = diag(M)
 
     # Initialize integral with zeros of the right shape
     integral = zero(func(u, 1, 1, 1, 1, equations, dg, args...))
@@ -378,7 +398,11 @@ function calc_error_norms(func, u, t, analyzer,
                           mesh::TreeMesh{3}, equations, initial_condition,
                           dg::FDSBP, cache, cache_analysis)
     # TODO: FD. This is rather inefficient right now and allocates...
-    weights = diag(SummationByPartsOperators.mass_matrix(dg.basis))
+    M = SummationByPartsOperators.mass_matrix(dg.basis)
+    if M isa UniformScaling
+        M = M(nnodes(dg))
+    end
+    weights = diag(M)
     @unpack node_coordinates = cache.elements
 
     # Set up data structures
diff --git a/test/test_tree_1d_fdsbp.jl b/test/test_tree_1d_fdsbp.jl
index 118385c34b3..ce0ca660d35 100644
--- a/test/test_tree_1d_fdsbp.jl
+++ b/test/test_tree_1d_fdsbp.jl
@@ -23,6 +23,21 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_fdsbp")
       @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
     end
   end
+
+  @trixi_testset "elixir_advection_upwind_periodic.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_upwind_periodic.jl"),
+      l2   = [1.1672962783692568e-5],
+      linf = [1.650514414558435e-5])
+
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+      t = sol.t[end]
+      u_ode = sol.u[end]
+      du_ode = similar(u_ode)
+      @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
+  end
 end
 
 @testset "Inviscid Burgers" begin
diff --git a/test/test_tree_2d_fdsbp.jl b/test/test_tree_2d_fdsbp.jl
index 7c58ef89a6c..e81c82f3f34 100644
--- a/test/test_tree_2d_fdsbp.jl
+++ b/test/test_tree_2d_fdsbp.jl
@@ -23,6 +23,24 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_2d_fdsbp")
       @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
     end
   end
+
+  @trixi_testset "elixir_advection_extended.jl with periodic operators" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"),
+      l2   = [1.1239649404463432e-5],
+      linf = [1.5895264629195438e-5],
+      D_SBP = SummationByPartsOperators.periodic_derivative_operator(
+        derivative_order = 1, accuracy_order = 4, xmin = 0.0, xmax = 1.0, N = 40),
+      initial_refinement_level = 0)
+
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+      t = sol.t[end]
+      u_ode = sol.u[end]
+      du_ode = similar(u_ode)
+      @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
+  end
 end
 
 @testset "Compressible Euler" begin
diff --git a/test/test_tree_3d_fdsbp.jl b/test/test_tree_3d_fdsbp.jl
index 9dceab38031..106dd007b09 100644
--- a/test/test_tree_3d_fdsbp.jl
+++ b/test/test_tree_3d_fdsbp.jl
@@ -7,7 +7,7 @@ include("test_trixi.jl")
 
 EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_3d_fdsbp")
 
-@testset "Compressible Euler" begin
+@testset "Linear scalar advection" begin
   @trixi_testset "elixir_advection_extended.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"),
       l2   = [0.005355755365412444],
@@ -23,6 +23,27 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_3d_fdsbp")
     end
   end
 
+  @trixi_testset "elixir_advection_extended.jl with periodic operators" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"),
+      l2   = [1.3819894522373702e-8],
+      linf = [3.381866298113323e-8],
+      D_SBP = SummationByPartsOperators.periodic_derivative_operator(
+        derivative_order = 1, accuracy_order = 4, xmin = 0.0, xmax = 1.0, N = 10),
+      initial_refinement_level = 0,
+      tspan = (0.0, 5.0))
+
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+      t = sol.t[end]
+      u_ode = sol.u[end]
+      du_ode = similar(u_ode)
+      @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
+  end
+end
+
+@testset "Compressible Euler" begin
   @trixi_testset "elixir_euler_convergence.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_convergence.jl"),
       l2   = [2.247522803543667e-5, 2.2499169224681058e-5, 2.24991692246826e-5, 2.2499169224684707e-5, 5.814121361417382e-5],

From fd239da5af1ba619fa2457c6318a5f3ab3be59b3 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Sun, 16 Jul 2023 06:11:40 +0200
Subject: [PATCH 086/163] set version to v0.5.34

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index a49cfb2e254..7f2a52b0aaf 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.34-pre"
+version = "0.5.34"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From d96514f6d58e48b33816f58e02959d167954fdea Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Sun, 16 Jul 2023 06:11:55 +0200
Subject: [PATCH 087/163] set development version to v0.5.35-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 7f2a52b0aaf..4c187ed38ff 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.34"
+version = "0.5.35-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 75d70fdf5706ccdc5290303675bcd5ad1cf7d462 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 17 Jul 2023 19:15:37 +0200
Subject: [PATCH 088/163] Bump crate-ci/typos from 1.16.0 to 1.16.1 (#1573)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.16.0 to 1.16.1.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.16.0...v1.16.1)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/SpellCheck.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index bb5a32f72ee..f72c3b0947b 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v3
       - name: Check spelling
-        uses: crate-ci/typos@v1.16.0
+        uses: crate-ci/typos@v1.16.1

From a12f82da43a16d59db16063557fef245c87b6c0e Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Tue, 18 Jul 2023 05:52:17 +0200
Subject: [PATCH 089/163] fix typos in comments (#1572)

---
 examples/tree_1d_fdsbp/elixir_advection_upwind.jl          | 2 +-
 examples/tree_1d_fdsbp/elixir_advection_upwind_periodic.jl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/tree_1d_fdsbp/elixir_advection_upwind.jl b/examples/tree_1d_fdsbp/elixir_advection_upwind.jl
index 18dd818e3ca..1f2498e0866 100644
--- a/examples/tree_1d_fdsbp/elixir_advection_upwind.jl
+++ b/examples/tree_1d_fdsbp/elixir_advection_upwind.jl
@@ -5,7 +5,7 @@ using OrdinaryDiffEq
 using Trixi
 
 ###############################################################################
-# semidiscretization of the linear scalar advection equation equation
+# semidiscretization of the linear scalar advection equation
 
 equations = LinearScalarAdvectionEquation1D(1.0)
 
diff --git a/examples/tree_1d_fdsbp/elixir_advection_upwind_periodic.jl b/examples/tree_1d_fdsbp/elixir_advection_upwind_periodic.jl
index 3eb805095f4..035d3568a80 100644
--- a/examples/tree_1d_fdsbp/elixir_advection_upwind_periodic.jl
+++ b/examples/tree_1d_fdsbp/elixir_advection_upwind_periodic.jl
@@ -5,7 +5,7 @@ using OrdinaryDiffEq
 using Trixi
 
 ###############################################################################
-# semidiscretization of the linear scalar advection equation equation
+# semidiscretization of the linear scalar advection equation
 
 equations = LinearScalarAdvectionEquation1D(1.0)
 

From 375384659cb57a80e253c5e685db8ec298e30d8c Mon Sep 17 00:00:00 2001
From: Michael Schlottke-Lakemper <michael@sloede.com>
Date: Wed, 19 Jul 2023 07:39:37 +0200
Subject: [PATCH 090/163] Add talk announcement for JuliaCon 2023 (#1575)

---
 README.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/README.md b/README.md
index ccd70b6daf8..7eaee8750dd 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,16 @@
   <img width="300px" src="https://trixi-framework.github.io/assets/logo.png">
 </p>
 
+***
+**Trixi.jl at JuliaCon 2023**<br/>
+At this year's JuliaCon, we will be present with an online contribution that involves Trixi.jl:
+
+* [Scaling Trixi.jl to more than 10,000 cores using MPI](https://pretalx.com/juliacon2023/talk/PC8PZ8/),
+  27th July 2023, 10:30–11:30 (US/Eastern), 32-G449 (Kiva)
+
+We are looking forward to seeing you there ♥️
+***
+
 **Trixi.jl** is a numerical simulation framework for hyperbolic conservation
 laws written in [Julia](https://julialang.org). A key objective for the
 framework is to be useful to both scientists and students. Therefore, next to

From b0ec66ea004c84d8487c4318a54933da8c827c92 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Wed, 19 Jul 2023 12:16:53 +0200
Subject: [PATCH 091/163] fix GC time percentage output (#1576)

---
 src/callbacks_step/analysis.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index 8cf43a1d15e..7c453aab633 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -267,7 +267,7 @@ function (analysis_callback::AnalysisCallback)(u_ode, du_ode, integrator, semi)
     gc_time_absolute = 1.0e-9 * (Base.gc_time_ns() - analysis_callback.start_gc_time)
 
     # Compute the percentage of total time that was spent in garbage collection
-    gc_time_percentage = gc_time_absolute / runtime_absolute
+    gc_time_percentage = gc_time_absolute / runtime_absolute * 100
 
     # Obtain the current memory usage of the Julia garbage collector, in MiB, i.e., the total size of
     # objects in memory that have been allocated by the JIT compiler or the user code.

From e0aad3cad1a2581eb79cf4fd1d06e7d7fb2c6379 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Wed, 19 Jul 2023 14:11:35 +0200
Subject: [PATCH 092/163] set version to v0.5.35

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 4c187ed38ff..1818d1c56c9 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.35-pre"
+version = "0.5.35"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 67d137d6712f28bbe99ffef3d003afe96c47aade Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Wed, 19 Jul 2023 14:11:46 +0200
Subject: [PATCH 093/163] set development version to v0.5.36-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 1818d1c56c9..07c4fe55ad4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.35"
+version = "0.5.36-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 1aec5fa7e17a8011baf77bfa1822491693e1986d Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Fri, 21 Jul 2023 11:56:38 +0200
Subject: [PATCH 094/163] test threaded time integration (#1581)

* test threaded time integration

* link to upstream issue in comment
---
 examples/dgmulti_2d/elixir_euler_curved.jl    |  3 +-
 .../elixir_advection_diffusion.jl             |  3 +-
 .../tree_2d_dgsem/elixir_advection_restart.jl |  3 +-
 test/Project.toml                             |  6 ++--
 test/test_threaded.jl                         | 28 +++++++++++++++++++
 5 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/examples/dgmulti_2d/elixir_euler_curved.jl b/examples/dgmulti_2d/elixir_euler_curved.jl
index a3ba62f1cfb..39e3a0a0360 100644
--- a/examples/dgmulti_2d/elixir_euler_curved.jl
+++ b/examples/dgmulti_2d/elixir_euler_curved.jl
@@ -42,7 +42,8 @@ callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
 ###############################################################################
 # run the simulation
 
-sol = solve(ode, RDPK3SpFSAL49(); abstol=1.0e-6, reltol=1.0e-6,
+alg = RDPK3SpFSAL49()
+sol = solve(ode, alg; abstol=1.0e-6, reltol=1.0e-6,
             ode_default_options()..., callback=callbacks);
 
 summary_callback() # print the timer summary
diff --git a/examples/tree_2d_dgsem/elixir_advection_diffusion.jl b/examples/tree_2d_dgsem/elixir_advection_diffusion.jl
index e96e1b5a171..a716bd278b8 100644
--- a/examples/tree_2d_dgsem/elixir_advection_diffusion.jl
+++ b/examples/tree_2d_dgsem/elixir_advection_diffusion.jl
@@ -75,8 +75,9 @@ callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback)
 # run the simulation
 
 # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+alg = RDPK3SpFSAL49()
 time_int_tol = 1.0e-11
-sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
+sol = solve(ode, alg; abstol=time_int_tol, reltol=time_int_tol,
             ode_default_options()..., callback=callbacks)
 
 # Print the timer summary
diff --git a/examples/tree_2d_dgsem/elixir_advection_restart.jl b/examples/tree_2d_dgsem/elixir_advection_restart.jl
index 4ceb5932573..72efb7d0c84 100644
--- a/examples/tree_2d_dgsem/elixir_advection_restart.jl
+++ b/examples/tree_2d_dgsem/elixir_advection_restart.jl
@@ -26,7 +26,8 @@ ode = semidiscretize(semi, tspan, restart_filename);
 # Do not overwrite the initial snapshot written by elixir_advection_extended.jl.
 save_solution.condition.save_initial_solution = false
 
-integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
+alg = CarpenterKennedy2N54(williamson_condition=false)
+integrator = init(ode, alg,
                   dt=dt, # solve needs some value here but it will be overwritten by the stepsize_callback
                   save_everystep=false, callback=callbacks)
 
diff --git a/test/Project.toml b/test/Project.toml
index cae1d4ff396..7115a19b441 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -24,9 +24,9 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 [preferences.OrdinaryDiffEq]
 PrecompileAutoSpecialize = false
 PrecompileAutoSwitch = false
-PrecompileDefaultSpecialize = true
+PrecompileDefaultSpecialize = false
 PrecompileFunctionWrapperSpecialize = false
-PrecompileLowStorage = true
+PrecompileLowStorage = false
 PrecompileNoSpecialize = false
-PrecompileNonStiff = true
+PrecompileNonStiff = false
 PrecompileStiff = false
diff --git a/test/test_threaded.jl b/test/test_threaded.jl
index 1e750707981..323d12d7091 100644
--- a/test/test_threaded.jl
+++ b/test/test_threaded.jl
@@ -18,6 +18,14 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
         linf = [6.314906965243505e-5])
     end
 
+    @trixi_testset "elixir_advection_restart.jl with threaded time integration" begin
+      @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_restart.jl"),
+        alg = CarpenterKennedy2N54(williamson_condition = false, thread = OrdinaryDiffEq.True()),
+        # Expected errors are exactly the same as in the serial test!
+        l2   = [7.81674284320524e-6],
+        linf = [6.314906965243505e-5])
+    end
+
     @trixi_testset "elixir_advection_amr_refine_twice.jl" begin
       @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_amr_refine_twice.jl"),
         l2   = [0.00020547512522578292],
@@ -42,6 +50,15 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
         l2   = [0.061751715597716854, 0.05018223615408711, 0.05018989446443463, 0.225871559730513],
         linf = [0.29347582879608825, 0.31081249232844693, 0.3107380389947736, 1.0540358049885143])
     end
+
+    @trixi_testset "elixir_advection_diffusion.jl" begin
+      @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_diffusion.jl"),
+        initial_refinement_level = 2, tspan = (0.0, 0.4), polydeg = 5,
+        alg = RDPK3SpFSAL49(thread = OrdinaryDiffEq.True()),
+        l2 = [4.0915532997994255e-6],
+        linf = [2.3040850347877395e-5]
+      )
+    end
   end
 
 
@@ -108,6 +125,17 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
       )
     end
 
+    @trixi_testset "elixir_euler_curved.jl with threaded time integration" begin
+      @test_broken false
+      # TODO: This is currently broken and needs to be fixed upstream
+      #       See https://github.com/JuliaSIMD/StrideArrays.jl/issues/77
+      # @test_trixi_include(joinpath(examples_dir(), "dgmulti_2d", "elixir_euler_curved.jl"),
+      #   alg = RDPK3SpFSAL49(thread = OrdinaryDiffEq.True()),
+      #   l2 = [1.720476068165337e-5, 1.592168205710526e-5, 1.592168205812963e-5, 4.894094865697305e-5],
+      #   linf = [0.00010525416930584619, 0.00010003778091061122, 0.00010003778085621029, 0.00036426282101720275]
+      # )
+    end
+
     @trixi_testset "elixir_euler_triangulate_pkg_mesh.jl" begin
       @test_trixi_include(joinpath(examples_dir(), "dgmulti_2d", "elixir_euler_triangulate_pkg_mesh.jl"),
         l2 = [2.344080455438114e-6, 1.8610038753097983e-6, 2.4095165666095305e-6, 6.373308158814308e-6],

From 036eaed82b92be9376c5b610d8d40eddf45ca1fa Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Mon, 24 Jul 2023 13:26:16 +0200
Subject: [PATCH 095/163] reset threads in semidiscretize (#1584)

I added the option to reset the threads from Polyester.jl in semidiscretize.
However, I did not document it in the docstring since we have not documented
that we use Polyester.jl threads in general - and the resetting is specific
to Polyester.jl. I was not sure whether we still would like to keep the option
to change the threading backend any time - although I do not see a good reason
why we should do so.
---
 Project.toml                                  |  2 +-
 src/Trixi.jl                                  |  2 +-
 src/semidiscretization/semidiscretization.jl  | 20 +++++++++++++++++--
 ...semidiscretization_hyperbolic_parabolic.jl | 10 +++++++++-
 4 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/Project.toml b/Project.toml
index 07c4fe55ad4..00bf2718d8b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -65,7 +65,7 @@ MuladdMacro = "0.2.2"
 Octavian = "0.3.5"
 OffsetArrays = "1.3"
 P4est = "0.4"
-Polyester = "0.3.4, 0.5, 0.6, 0.7"
+Polyester = "0.7.5"
 PrecompileTools = "1.1"
 RecipesBase = "1.1"
 Reexport = "1.0"
diff --git a/src/Trixi.jl b/src/Trixi.jl
index cf6158e29eb..b0c872b1904 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -51,7 +51,7 @@ using LoopVectorization: LoopVectorization, @turbo, indices
 using StaticArrayInterface: static_length # used by LoopVectorization
 using MuladdMacro: @muladd
 using Octavian: Octavian, matmul!
-using Polyester: @batch # You know, the cheapest threads you can find...
+using Polyester: Polyester, @batch # You know, the cheapest threads you can find...
 using OffsetArrays: OffsetArray, OffsetVector
 using P4est
 using Setfield: @set
diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl
index ac312c57c89..fbdcd73e2a8 100644
--- a/src/semidiscretization/semidiscretization.jl
+++ b/src/semidiscretization/semidiscretization.jl
@@ -70,7 +70,15 @@ end
 Wrap the semidiscretization `semi` as an ODE problem in the time interval `tspan`
 that can be passed to `solve` from the [SciML ecosystem](https://diffeq.sciml.ai/latest/).
 """
-function semidiscretize(semi::AbstractSemidiscretization, tspan)
+function semidiscretize(semi::AbstractSemidiscretization, tspan;
+                        reset_threads = true)
+    # Optionally reset Polyester.jl threads. See
+    # https://github.com/trixi-framework/Trixi.jl/issues/1583
+    # https://github.com/JuliaSIMD/Polyester.jl/issues/30
+    if reset_threads
+        Polyester.reset_threads!()
+    end
+
     u0_ode = compute_coefficients(first(tspan), semi)
     # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using
     #       mpi_isparallel() && MPI.Barrier(mpi_comm())
@@ -88,7 +96,15 @@ that can be passed to `solve` from the [SciML ecosystem](https://diffeq.sciml.ai
 The initial condition etc. is taken from the `restart_file`.
 """
 function semidiscretize(semi::AbstractSemidiscretization, tspan,
-                        restart_file::AbstractString)
+                        restart_file::AbstractString;
+                        reset_threads = true)
+    # Optionally reset Polyester.jl threads. See
+    # https://github.com/trixi-framework/Trixi.jl/issues/1583
+    # https://github.com/JuliaSIMD/Polyester.jl/issues/30
+    if reset_threads
+        Polyester.reset_threads!()
+    end
+
     u0_ode = load_restart_file(semi, restart_file)
     # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using
     #       mpi_isparallel() && MPI.Barrier(mpi_comm())
diff --git a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl
index f54bc744164..8f1e38c891b 100644
--- a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl
+++ b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl
@@ -274,7 +274,15 @@ The parabolic right-hand side is the first function of the split ODE problem and
 will be used by default by the implicit part of IMEX methods from the
 SciML ecosystem.
 """
-function semidiscretize(semi::SemidiscretizationHyperbolicParabolic, tspan)
+function semidiscretize(semi::SemidiscretizationHyperbolicParabolic, tspan;
+                        reset_threads = true)
+    # Optionally reset Polyester.jl threads. See
+    # https://github.com/trixi-framework/Trixi.jl/issues/1583
+    # https://github.com/JuliaSIMD/Polyester.jl/issues/30
+    if reset_threads
+        Polyester.reset_threads!()
+    end
+
     u0_ode = compute_coefficients(first(tspan), semi)
     # TODO: MPI, do we want to synchronize loading and print debug statements, e.g. using
     #       mpi_isparallel() && MPI.Barrier(mpi_comm())

From 253f63ef042ef3f10ca15c5d21327a3b6ce4bcdc Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Mon, 24 Jul 2023 22:46:27 -0500
Subject: [PATCH 096/163] Fix CI failures related to Makie (#1586)

* unrelated cleanup

* fix CI issues?
---
 ext/TrixiMakieExt.jl | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/ext/TrixiMakieExt.jl b/ext/TrixiMakieExt.jl
index 1eb11f6a422..8cd7576a6e5 100644
--- a/ext/TrixiMakieExt.jl
+++ b/ext/TrixiMakieExt.jl
@@ -335,7 +335,7 @@ end
 # ================== new Makie plot recipes ====================
 
 # This initializes a Makie recipe, which creates a new type definition which Makie uses to create
-# custom `trixiheatmap` plots. See also https://makie.juliaplots.org/stable/recipes.html
+# custom `trixiheatmap` plots. See also https://docs.makie.org/stable/documentation/recipes/
 Makie.@recipe(TrixiHeatmap, plot_data_series) do scene
     Makie.Theme(colormap = default_Makie_colormap())
 end
@@ -346,9 +346,8 @@ function Makie.plot!(myplot::TrixiHeatmap)
     plotting_mesh = global_plotting_triangulation_makie(pds;
                                                         set_z_coordinate_zero = true)
 
-    @unpack variable_id = pds
     pd = pds.plot_data
-    solution_z = vec(StructArrays.component(pd.data, variable_id))
+    solution_z = vec(StructArrays.component(pd.data, pds.variable_id))
     Makie.mesh!(myplot, plotting_mesh, color = solution_z, shading = false,
                 colormap = myplot[:colormap])
     myplot.colorrange = extrema(solution_z)
@@ -411,7 +410,7 @@ function Makie.plot!(fig, pd::PlotData2DTriangulated;
 
         row = row_list[variable_to_plot]
         col = col_list[variable_to_plot]
-        Makie.Colorbar(fig[row, col][1, 2], plt)
+        Makie.Colorbar(fig[row, col][1, 2], colormap = colormap)
 
         ax.aspect = Makie.DataAspect() # equal aspect ratio
         ax.title = variable_name

From 6e7e3b5bfb4e4a232f04a9b0d3c711ad414a56c2 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Tue, 25 Jul 2023 07:08:05 +0200
Subject: [PATCH 097/163] activate previously broken test and add allocation
 tests (#1582)

* activate previously broken test and add allocation tests

* fix allocations in prolong2interfaces!

* fix allocations in calc_sources!

* fix allocations in apply_jacobian!

* fixed FDSBP, elixir_euler_convergence.jl

* elixir_euler_triangulate_pkg_mesh.jl is only broken with multithreading

* Update test_threaded.jl

* Update test_threaded.jl
---
 src/solvers/dgsem_tree/dg_1d.jl |  20 ++--
 src/solvers/dgsem_tree/dg_2d.jl |  25 +++--
 src/solvers/dgsem_tree/dg_3d.jl |  29 +++---
 test/test_threaded.jl           | 175 ++++++++++++++++++++++++++++++--
 4 files changed, 212 insertions(+), 37 deletions(-)

diff --git a/src/solvers/dgsem_tree/dg_1d.jl b/src/solvers/dgsem_tree/dg_1d.jl
index c66f427cce3..b5bb076f3b7 100644
--- a/src/solvers/dgsem_tree/dg_1d.jl
+++ b/src/solvers/dgsem_tree/dg_1d.jl
@@ -385,15 +385,17 @@ end
 function prolong2interfaces!(cache, u,
                              mesh::TreeMesh{1}, equations, surface_integral, dg::DG)
     @unpack interfaces = cache
+    @unpack neighbor_ids = interfaces
+    interfaces_u = interfaces.u
 
     @threaded for interface in eachinterface(dg, cache)
-        left_element = interfaces.neighbor_ids[1, interface]
-        right_element = interfaces.neighbor_ids[2, interface]
+        left_element = neighbor_ids[1, interface]
+        right_element = neighbor_ids[2, interface]
 
         # interface in x-direction
         for v in eachvariable(equations)
-            interfaces.u[1, v, interface] = u[v, nnodes(dg), left_element]
-            interfaces.u[2, v, interface] = u[v, 1, right_element]
+            interfaces_u[1, v, interface] = u[v, nnodes(dg), left_element]
+            interfaces_u[2, v, interface] = u[v, 1, right_element]
         end
     end
 
@@ -621,8 +623,10 @@ end
 
 function apply_jacobian!(du, mesh::Union{TreeMesh{1}, StructuredMesh{1}},
                          equations, dg::DG, cache)
+    @unpack inverse_jacobian = cache.elements
+
     @threaded for element in eachelement(dg, cache)
-        factor = -cache.elements.inverse_jacobian[element]
+        factor = -inverse_jacobian[element]
 
         for i in eachnode(dg)
             for v in eachvariable(equations)
@@ -642,11 +646,13 @@ end
 
 function calc_sources!(du, u, t, source_terms,
                        equations::AbstractEquations{1}, dg::DG, cache)
+    @unpack node_coordinates = cache.elements
+
     @threaded for element in eachelement(dg, cache)
         for i in eachnode(dg)
             u_local = get_node_vars(u, equations, dg, i, element)
-            x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i,
-                                      element)
+            x_local = get_node_coords(node_coordinates, equations, dg,
+                                      i, element)
             du_local = source_terms(u_local, x_local, t, equations)
             add_to_node_vars!(du, du_local, equations, dg, i, element)
         end
diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl
index d3227710686..6c5e0cee0cf 100644
--- a/src/solvers/dgsem_tree/dg_2d.jl
+++ b/src/solvers/dgsem_tree/dg_2d.jl
@@ -529,23 +529,24 @@ end
 function prolong2interfaces!(cache, u,
                              mesh::TreeMesh{2}, equations, surface_integral, dg::DG)
     @unpack interfaces = cache
-    @unpack orientations = interfaces
+    @unpack orientations, neighbor_ids = interfaces
+    interfaces_u = interfaces.u
 
     @threaded for interface in eachinterface(dg, cache)
-        left_element = interfaces.neighbor_ids[1, interface]
-        right_element = interfaces.neighbor_ids[2, interface]
+        left_element = neighbor_ids[1, interface]
+        right_element = neighbor_ids[2, interface]
 
         if orientations[interface] == 1
             # interface in x-direction
             for j in eachnode(dg), v in eachvariable(equations)
-                interfaces.u[1, v, j, interface] = u[v, nnodes(dg), j, left_element]
-                interfaces.u[2, v, j, interface] = u[v, 1, j, right_element]
+                interfaces_u[1, v, j, interface] = u[v, nnodes(dg), j, left_element]
+                interfaces_u[2, v, j, interface] = u[v, 1, j, right_element]
             end
         else # if orientations[interface] == 2
             # interface in y-direction
             for i in eachnode(dg), v in eachvariable(equations)
-                interfaces.u[1, v, i, interface] = u[v, i, nnodes(dg), left_element]
-                interfaces.u[2, v, i, interface] = u[v, i, 1, right_element]
+                interfaces_u[1, v, i, interface] = u[v, i, nnodes(dg), left_element]
+                interfaces_u[2, v, i, interface] = u[v, i, 1, right_element]
             end
         end
     end
@@ -1116,8 +1117,10 @@ end
 
 function apply_jacobian!(du, mesh::TreeMesh{2},
                          equations, dg::DG, cache)
+    @unpack inverse_jacobian = cache.elements
+
     @threaded for element in eachelement(dg, cache)
-        factor = -cache.elements.inverse_jacobian[element]
+        factor = -inverse_jacobian[element]
 
         for j in eachnode(dg), i in eachnode(dg)
             for v in eachvariable(equations)
@@ -1137,11 +1140,13 @@ end
 
 function calc_sources!(du, u, t, source_terms,
                        equations::AbstractEquations{2}, dg::DG, cache)
+    @unpack node_coordinates = cache.elements
+
     @threaded for element in eachelement(dg, cache)
         for j in eachnode(dg), i in eachnode(dg)
             u_local = get_node_vars(u, equations, dg, i, j, element)
-            x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i,
-                                      j, element)
+            x_local = get_node_coords(node_coordinates, equations, dg,
+                                      i, j, element)
             du_local = source_terms(u_local, x_local, t, equations)
             add_to_node_vars!(du, du_local, equations, dg, i, j, element)
         end
diff --git a/src/solvers/dgsem_tree/dg_3d.jl b/src/solvers/dgsem_tree/dg_3d.jl
index 95abb2595e5..acdab900cd1 100644
--- a/src/solvers/dgsem_tree/dg_3d.jl
+++ b/src/solvers/dgsem_tree/dg_3d.jl
@@ -598,32 +598,33 @@ end
 function prolong2interfaces!(cache, u,
                              mesh::TreeMesh{3}, equations, surface_integral, dg::DG)
     @unpack interfaces = cache
-    @unpack orientations = interfaces
+    @unpack orientations, neighbor_ids = interfaces
+    interfaces_u = interfaces.u
 
     @threaded for interface in eachinterface(dg, cache)
-        left_element = interfaces.neighbor_ids[1, interface]
-        right_element = interfaces.neighbor_ids[2, interface]
+        left_element = neighbor_ids[1, interface]
+        right_element = neighbor_ids[2, interface]
 
         if orientations[interface] == 1
             # interface in x-direction
             for k in eachnode(dg), j in eachnode(dg), v in eachvariable(equations)
-                interfaces.u[1, v, j, k, interface] = u[v, nnodes(dg), j, k,
+                interfaces_u[1, v, j, k, interface] = u[v, nnodes(dg), j, k,
                                                         left_element]
-                interfaces.u[2, v, j, k, interface] = u[v, 1, j, k, right_element]
+                interfaces_u[2, v, j, k, interface] = u[v, 1, j, k, right_element]
             end
         elseif orientations[interface] == 2
             # interface in y-direction
             for k in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
-                interfaces.u[1, v, i, k, interface] = u[v, i, nnodes(dg), k,
+                interfaces_u[1, v, i, k, interface] = u[v, i, nnodes(dg), k,
                                                         left_element]
-                interfaces.u[2, v, i, k, interface] = u[v, i, 1, k, right_element]
+                interfaces_u[2, v, i, k, interface] = u[v, i, 1, k, right_element]
             end
         else # if orientations[interface] == 3
             # interface in z-direction
             for j in eachnode(dg), i in eachnode(dg), v in eachvariable(equations)
-                interfaces.u[1, v, i, j, interface] = u[v, i, j, nnodes(dg),
+                interfaces_u[1, v, i, j, interface] = u[v, i, j, nnodes(dg),
                                                         left_element]
-                interfaces.u[2, v, i, j, interface] = u[v, i, j, 1, right_element]
+                interfaces_u[2, v, i, j, interface] = u[v, i, j, 1, right_element]
             end
         end
     end
@@ -1350,8 +1351,10 @@ end
 
 function apply_jacobian!(du, mesh::TreeMesh{3},
                          equations, dg::DG, cache)
+    @unpack inverse_jacobian = cache.elements
+
     @threaded for element in eachelement(dg, cache)
-        factor = -cache.elements.inverse_jacobian[element]
+        factor = -inverse_jacobian[element]
 
         for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
             for v in eachvariable(equations)
@@ -1371,11 +1374,13 @@ end
 
 function calc_sources!(du, u, t, source_terms,
                        equations::AbstractEquations{3}, dg::DG, cache)
+    @unpack node_coordinates = cache.elements
+
     @threaded for element in eachelement(dg, cache)
         for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
             u_local = get_node_vars(u, equations, dg, i, j, k, element)
-            x_local = get_node_coords(cache.elements.node_coordinates, equations, dg, i,
-                                      j, k, element)
+            x_local = get_node_coords(node_coordinates, equations, dg,
+                                      i, j, k, element)
             du_local = source_terms(u_local, x_local, t, equations)
             add_to_node_vars!(du, du_local, equations, dg, i, j, k, element)
         end
diff --git a/test/test_threaded.jl b/test/test_threaded.jl
index 323d12d7091..77fa16ad33e 100644
--- a/test/test_threaded.jl
+++ b/test/test_threaded.jl
@@ -16,6 +16,15 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
         # Expected errors are exactly the same as in the serial test!
         l2   = [7.81674284320524e-6],
         linf = [6.314906965243505e-5])
+
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+          t = sol.t[end]
+          u_ode = sol.u[end]
+          du_ode = similar(u_ode)
+          @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+        end
     end
 
     @trixi_testset "elixir_advection_restart.jl with threaded time integration" begin
@@ -30,12 +39,30 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
       @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_amr_refine_twice.jl"),
         l2   = [0.00020547512522578292],
         linf = [0.007831753383083506])
+
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+          t = sol.t[end]
+          u_ode = sol.u[end]
+          du_ode = similar(u_ode)
+          @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+        end
     end
 
     @trixi_testset "elixir_advection_amr_coarsen_twice.jl" begin
       @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_amr_coarsen_twice.jl"),
         l2   = [0.0014321062757891826],
         linf = [0.0253454486893413])
+
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+          t = sol.t[end]
+          u_ode = sol.u[end]
+          du_ode = similar(u_ode)
+          @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+        end
     end
 
     @trixi_testset "elixir_euler_source_terms_nonperiodic.jl" begin
@@ -43,12 +70,30 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
         l2   = [2.259440511766445e-6, 2.318888155713922e-6, 2.3188881557894307e-6, 6.3327863238858925e-6],
         linf = [1.498738264560373e-5, 1.9182011928187137e-5, 1.918201192685487e-5, 6.0526717141407005e-5],
         rtol = 0.001)
+
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+          t = sol.t[end]
+          u_ode = sol.u[end]
+          du_ode = similar(u_ode)
+          @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+        end
     end
 
     @trixi_testset "elixir_euler_ec.jl" begin
       @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_euler_ec.jl"),
         l2   = [0.061751715597716854, 0.05018223615408711, 0.05018989446443463, 0.225871559730513],
         linf = [0.29347582879608825, 0.31081249232844693, 0.3107380389947736, 1.0540358049885143])
+
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+          t = sol.t[end]
+          u_ode = sol.u[end]
+          du_ode = similar(u_ode)
+          @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+        end
     end
 
     @trixi_testset "elixir_advection_diffusion.jl" begin
@@ -58,6 +103,47 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
         l2 = [4.0915532997994255e-6],
         linf = [2.3040850347877395e-5]
       )
+
+      # Ensure that we do not have excessive memory allocations
+      # (e.g., from type instabilities)
+      let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+      end
+    end
+
+    @trixi_testset "FDSBP, elixir_advection_extended.jl" begin
+      @test_trixi_include(joinpath(examples_dir(), "tree_2d_fdsbp", "elixir_advection_extended.jl"),
+        l2   = [2.898644263922225e-6],
+        linf = [8.491517930142578e-6],
+        rtol = 1.0e-7) # These results change a little bit and depend on the CI system
+
+      # Ensure that we do not have excessive memory allocations
+      # (e.g., from type instabilities)
+      let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+      end
+    end
+
+    @trixi_testset "FDSBP, elixir_euler_convergence.jl" begin
+      @test_trixi_include(joinpath(examples_dir(), "tree_2d_fdsbp", "elixir_euler_convergence.jl"),
+        l2   = [1.7088389997042244e-6, 1.7437997855125774e-6, 1.7437997855350776e-6, 5.457223460127621e-6],
+        linf = [9.796504903736292e-6, 9.614745892783105e-6, 9.614745892783105e-6, 4.026107182575345e-5],
+        tspan = (0.0, 0.1))
+
+      # Ensure that we do not have excessive memory allocations
+      # (e.g., from type instabilities)
+      let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+      end
     end
   end
 
@@ -70,6 +156,15 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
         rtol = 5.0e-5, # Higher tolerance to make tests pass in CI (in particular with macOS)
         elixir_file="elixir_advection_waving_flag.jl",
         restart_file="restart_000021.h5")
+
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+          t = sol.t[end]
+          u_ode = sol.u[end]
+          du_ode = similar(u_ode)
+          @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+        end
     end
 
     @trixi_testset "elixir_mhd_ec.jl" begin
@@ -81,6 +176,15 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
                 0.9757376320946505, 0.12123736788315098, 0.12837436699267113, 0.17793825293524734,
                 0.03460761690059514],
         tspan = (0.0, 0.3))
+
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+          t = sol.t[end]
+          u_ode = sol.u[end]
+          du_ode = similar(u_ode)
+          @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+        end
     end
   end
 
@@ -93,6 +197,15 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
       linf = [0.36236334472179443, 0.3690785638275256, 0.8475748723784078, 0.0,
               8.881784197001252e-16, 1.7763568394002505e-15, 1.7763568394002505e-15],
         tspan = (0.0, 5.0))
+
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+          t = sol.t[end]
+          u_ode = sol.u[end]
+          du_ode = similar(u_ode)
+          @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+        end
     end
   end
 
@@ -102,6 +215,15 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
       @test_trixi_include(joinpath(examples_dir(), "p4est_2d_dgsem", "elixir_euler_source_terms_nonconforming_unstructured_flag.jl"),
         l2   = [0.0034516244508588046, 0.0023420334036925493, 0.0024261923964557187, 0.004731710454271893],
         linf = [0.04155789011775046, 0.024772109862748914, 0.03759938693042297, 0.08039824959535657])
+
+        # Ensure that we do not have excessive memory allocations
+        # (e.g., from type instabilities)
+        let
+          t = sol.t[end]
+          u_ode = sol.u[end]
+          du_ode = similar(u_ode)
+          @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+        end
     end
 
     @trixi_testset "elixir_eulergravity_convergence.jl" begin
@@ -123,17 +245,32 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
         l2 = [0.006400337855843578, 0.005303799804137764, 0.005303799804119745, 0.013204169007030144],
         linf = [0.03798302318566282, 0.05321027922532284, 0.05321027922605448, 0.13392025411839015],
       )
+
+      # Ensure that we do not have excessive memory allocations
+      # (e.g., from type instabilities)
+      let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+      end
     end
 
     @trixi_testset "elixir_euler_curved.jl with threaded time integration" begin
-      @test_broken false
-      # TODO: This is currently broken and needs to be fixed upstream
-      #       See https://github.com/JuliaSIMD/StrideArrays.jl/issues/77
-      # @test_trixi_include(joinpath(examples_dir(), "dgmulti_2d", "elixir_euler_curved.jl"),
-      #   alg = RDPK3SpFSAL49(thread = OrdinaryDiffEq.True()),
-      #   l2 = [1.720476068165337e-5, 1.592168205710526e-5, 1.592168205812963e-5, 4.894094865697305e-5],
-      #   linf = [0.00010525416930584619, 0.00010003778091061122, 0.00010003778085621029, 0.00036426282101720275]
-      # )
+      @test_trixi_include(joinpath(examples_dir(), "dgmulti_2d", "elixir_euler_curved.jl"),
+        alg = RDPK3SpFSAL49(thread = OrdinaryDiffEq.True()),
+        l2 = [1.720476068165337e-5, 1.592168205710526e-5, 1.592168205812963e-5, 4.894094865697305e-5],
+        linf = [0.00010525416930584619, 0.00010003778091061122, 0.00010003778085621029, 0.00036426282101720275]
+      )
+
+      # Ensure that we do not have excessive memory allocations
+      # (e.g., from type instabilities)
+      let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+      end
     end
 
     @trixi_testset "elixir_euler_triangulate_pkg_mesh.jl" begin
@@ -141,6 +278,19 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
         l2 = [2.344080455438114e-6, 1.8610038753097983e-6, 2.4095165666095305e-6, 6.373308158814308e-6],
         linf = [2.5099852761334418e-5, 2.2683684021362893e-5, 2.6180448559287584e-5, 5.5752932611508044e-5]
       )
+
+      # Ensure that we do not have excessive memory allocations
+      # (e.g., from type instabilities)
+      let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        if (Threads.nthreads() < 2) || (VERSION < v"1.9")
+          @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+        else
+          @test_broken (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+        end
+      end
     end
 
     @trixi_testset "elixir_euler_fdsbp_periodic.jl" begin
@@ -148,6 +298,15 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
         l2 = [1.3333320340010056e-6, 2.044834627970641e-6, 2.044834627855601e-6, 5.282189803559564e-6],
         linf = [2.7000151718858945e-6, 3.988595028259212e-6, 3.9885950273710336e-6, 8.848583042286862e-6]
       )
+
+      # Ensure that we do not have excessive memory allocations
+      # (e.g., from type instabilities)
+      let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+      end
     end
   end
 end

From 3dd2cb60d0798a5a9a327c73e6150382636c7845 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Tue, 25 Jul 2023 08:59:37 +0200
Subject: [PATCH 098/163] set version to v0.5.36

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 00bf2718d8b..2017290c785 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.36-pre"
+version = "0.5.36"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 41b56ef71c535321fca8c99fe9d7b2098b70025d Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Tue, 25 Jul 2023 08:59:47 +0200
Subject: [PATCH 099/163] set development version to v0.5.37-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 2017290c785..94c47a35ac1 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.36"
+version = "0.5.37-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From fe6a818a8459d6beef3969c1fd2d5cc7ddf596df Mon Sep 17 00:00:00 2001
From: Ahmad Peyvan <115842305+apey236@users.noreply.github.com>
Date: Tue, 25 Jul 2023 04:17:42 -0400
Subject: [PATCH 100/163] Adding parabolic terms for `P4estMesh{3}` (#1555)

* Adding parabolic terms for 3D P4est mesh

* Adding parabolic terms for 3D P4estMesh

* Adding working parabolic terms for `P4estMesh{3}`

* Formatting

* Addin TGV example and test to `P4estMesh{3}`

* Update src/solvers/dgsem_tree/dg_3d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/solvers/dgsem_tree/dg_3d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/solvers/dgsem_tree/dg_3d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/solvers/dgsem_tree/dg_3d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Removing comments

* Removed comments

* Adding TGV test for `P4estMesh{3}`

* Correcting the format

* Format correction

* Remove .toml file

* Format correction

* Optimized loop for speed

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
Co-authored-by: Jesse Chan <1156048+jlchan@users.noreply.github.com>
---
 .../elixir_navierstokes_convergence.jl        | 263 +++++++
 ...elixir_navierstokes_taylor_green_vortex.jl |  82 +++
 src/callbacks_step/analysis_dg3d.jl           |   2 +-
 src/solvers/dgsem_p4est/dg.jl                 |   1 +
 src/solvers/dgsem_p4est/dg_2d_parabolic.jl    |   2 +-
 src/solvers/dgsem_p4est/dg_3d_parabolic.jl    | 691 ++++++++++++++++++
 src/solvers/dgsem_tree/dg_3d_parabolic.jl     |  13 +-
 test/test_parabolic_3d.jl                     |  19 +-
 8 files changed, 1063 insertions(+), 10 deletions(-)
 create mode 100644 examples/p4est_3d_dgsem/elixir_navierstokes_convergence.jl
 create mode 100644 examples/p4est_3d_dgsem/elixir_navierstokes_taylor_green_vortex.jl
 create mode 100644 src/solvers/dgsem_p4est/dg_3d_parabolic.jl

diff --git a/examples/p4est_3d_dgsem/elixir_navierstokes_convergence.jl b/examples/p4est_3d_dgsem/elixir_navierstokes_convergence.jl
new file mode 100644
index 00000000000..c426fe95f5b
--- /dev/null
+++ b/examples/p4est_3d_dgsem/elixir_navierstokes_convergence.jl
@@ -0,0 +1,263 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the ideal compressible Navier-Stokes equations
+
+prandtl_number() = 0.72
+mu() = 0.01
+
+equations = CompressibleEulerEquations3D(1.4)
+equations_parabolic = CompressibleNavierStokesDiffusion3D(equations, mu=mu(), Prandtl=prandtl_number(),
+                                                          gradient_variables=GradientVariablesPrimitive())
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs,
+               volume_integral=VolumeIntegralWeakForm())
+
+coordinates_min = (-1.0, -1.0, -1.0) # minimum coordinates (min(x), min(y), min(z))
+coordinates_max = ( 1.0,  1.0,  1.0) # maximum coordinates (max(x), max(y), max(z))
+
+trees_per_dimension = (2, 2, 2)
+
+mesh = P4estMesh(trees_per_dimension, polydeg=3,
+                 coordinates_min=coordinates_min, coordinates_max=coordinates_max,
+                 periodicity=(true, false, true), initial_refinement_level=2)
+
+# Note: the initial condition cannot be specialized to `CompressibleNavierStokesDiffusion3D`
+#       since it is called by both the parabolic solver (which passes in `CompressibleNavierStokesDiffusion3D`)
+#       and by the initial condition (which passes in `CompressibleEulerEquations3D`).
+# This convergence test setup was originally derived by Andrew Winters (@andrewwinters5000)
+function initial_condition_navier_stokes_convergence_test(x, t, equations)
+  # Constants. OBS! Must match those in `source_terms_navier_stokes_convergence_test`
+  c  = 2.0
+  A1 = 0.5
+  A2 = 1.0
+  A3 = 0.5
+
+  # Convenience values for trig. functions
+  pi_x = pi * x[1]
+  pi_y = pi * x[2]
+  pi_z = pi * x[3]
+  pi_t = pi * t
+
+  rho = c + A1 * sin(pi_x) * cos(pi_y) * sin(pi_z) * cos(pi_t)
+  v1  = A2 * sin(pi_x) * log(x[2] + 2.0) * (1.0 - exp(-A3 * (x[2] - 1.0))) * sin(pi_z) * cos(pi_t)
+  v2  = v1
+  v3  = v1
+  p   = rho^2
+
+  return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+end
+
+@inline function source_terms_navier_stokes_convergence_test(u, x, t, equations)
+  # TODO: parabolic
+  # we currently need to hardcode these parameters until we fix the "combined equation" issue
+  # see also https://github.com/trixi-framework/Trixi.jl/pull/1160
+  inv_gamma_minus_one = inv(equations.gamma - 1)
+  Pr = prandtl_number()
+  mu_ = mu()
+
+  # Constants. OBS! Must match those in `initial_condition_navier_stokes_convergence_test`
+  c  = 2.0
+  A1 = 0.5
+  A2 = 1.0
+  A3 = 0.5
+
+  # Convenience values for trig. functions
+  pi_x = pi * x[1]
+  pi_y = pi * x[2]
+  pi_z = pi * x[3]
+  pi_t = pi * t
+
+  # Define auxiliary functions for the strange function of the y variable
+  # to make expressions easier to read
+  g    = log(x[2] + 2.0) * (1.0 - exp(-A3 * (x[2] - 1.0)))
+  g_y  = ( A3 * log(x[2] + 2.0) * exp(-A3 * (x[2] - 1.0))
+           + (1.0 - exp(-A3 * (x[2] - 1.0))) / (x[2] + 2.0) )
+  g_yy = ( 2.0 * A3 * exp(-A3 * (x[2] - 1.0)) / (x[2] + 2.0)
+           - (1.0 - exp(-A3 * (x[2] - 1.0))) / ((x[2] + 2.0)^2)
+           - A3^2 * log(x[2] + 2.0) * exp(-A3 * (x[2] - 1.0)) )
+
+  # Density and its derivatives
+  rho    =   c + A1 * sin(pi_x) * cos(pi_y) * sin(pi_z) * cos(pi_t)
+  rho_t  = -pi * A1 * sin(pi_x) * cos(pi_y) * sin(pi_z) * sin(pi_t)
+  rho_x  =  pi * A1 * cos(pi_x) * cos(pi_y) * sin(pi_z) * cos(pi_t)
+  rho_y  = -pi * A1 * sin(pi_x) * sin(pi_y) * sin(pi_z) * cos(pi_t)
+  rho_z  =  pi * A1 * sin(pi_x) * cos(pi_y) * cos(pi_z) * cos(pi_t)
+  rho_xx = -pi^2 * (rho - c)
+  rho_yy = -pi^2 * (rho - c)
+  rho_zz = -pi^2 * (rho - c)
+
+  # Velocities and their derivatives
+  # v1 terms
+  v1    =       A2 * sin(pi_x) * g * sin(pi_z) * cos(pi_t)
+  v1_t  = -pi * A2 * sin(pi_x) * g * sin(pi_z) * sin(pi_t)
+  v1_x  =  pi * A2 * cos(pi_x) * g * sin(pi_z) * cos(pi_t)
+  v1_y  =       A2 * sin(pi_x) * g_y * sin(pi_z) * cos(pi_t)
+  v1_z  =  pi * A2 * sin(pi_x) * g * cos(pi_z) * cos(pi_t)
+  v1_xx = -pi^2 * v1
+  v1_yy =       A2 * sin(pi_x) * g_yy * sin(pi_z) * cos(pi_t)
+  v1_zz = -pi^2 * v1
+  v1_xy =  pi * A2 * cos(pi_x) * g_y * sin(pi_z) * cos(pi_t)
+  v1_xz =  pi^2 * A2 * cos(pi_x) * g * cos(pi_z) * cos(pi_t)
+  v1_yz =  pi * A2 * sin(pi_x) * g_y * cos(pi_z) * cos(pi_t)
+  # v2 terms (simplifies from ansatz)
+  v2    = v1
+  v2_t  = v1_t
+  v2_x  = v1_x
+  v2_y  = v1_y
+  v2_z  = v1_z
+  v2_xx = v1_xx
+  v2_yy = v1_yy
+  v2_zz = v1_zz
+  v2_xy = v1_xy
+  v2_yz = v1_yz
+  # v3 terms (simplifies from ansatz)
+  v3    = v1
+  v3_t  = v1_t
+  v3_x  = v1_x
+  v3_y  = v1_y
+  v3_z  = v1_z
+  v3_xx = v1_xx
+  v3_yy = v1_yy
+  v3_zz = v1_zz
+  v3_xz = v1_xz
+  v3_yz = v1_yz
+
+  # Pressure and its derivatives
+  p    = rho^2
+  p_t  = 2.0 * rho * rho_t
+  p_x  = 2.0 * rho * rho_x
+  p_y  = 2.0 * rho * rho_y
+  p_z  = 2.0 * rho * rho_z
+
+  # Total energy and its derivatives; simiplifies from ansatz that v2 = v1 and v3 = v1
+  E   = p   * inv_gamma_minus_one + 1.5 * rho * v1^2
+  E_t = p_t * inv_gamma_minus_one + 1.5 * rho_t * v1^2 + 3.0 * rho * v1 * v1_t
+  E_x = p_x * inv_gamma_minus_one + 1.5 * rho_x * v1^2 + 3.0 * rho * v1 * v1_x
+  E_y = p_y * inv_gamma_minus_one + 1.5 * rho_y * v1^2 + 3.0 * rho * v1 * v1_y
+  E_z = p_z * inv_gamma_minus_one + 1.5 * rho_z * v1^2 + 3.0 * rho * v1 * v1_z
+
+  # Divergence of Fick's law ∇⋅∇q = kappa ∇⋅∇T; simplifies because p = rho², so T = p/rho = rho
+  kappa = equations.gamma * inv_gamma_minus_one / Pr
+  q_xx = kappa * rho_xx # kappa T_xx
+  q_yy = kappa * rho_yy # kappa T_yy
+  q_zz = kappa * rho_zz # kappa T_zz
+
+  # Stress tensor and its derivatives (exploit symmetry)
+  tau11 = 4.0 / 3.0 * v1_x - 2.0 / 3.0 * (v2_y + v3_z)
+  tau12 = v1_y + v2_x
+  tau13 = v1_z + v3_x
+  tau22 = 4.0 / 3.0 * v2_y - 2.0 / 3.0 * (v1_x + v3_z)
+  tau23 = v2_z + v3_y
+  tau33 = 4.0 / 3.0 * v3_z - 2.0 / 3.0 * (v1_x + v2_y)
+
+  tau11_x = 4.0 / 3.0 * v1_xx - 2.0 / 3.0 * (v2_xy + v3_xz)
+  tau12_x = v1_xy + v2_xx
+  tau13_x = v1_xz + v3_xx
+
+  tau12_y = v1_yy + v2_xy
+  tau22_y = 4.0 / 3.0 * v2_yy - 2.0 / 3.0 * (v1_xy + v3_yz)
+  tau23_y = v2_yz + v3_yy
+
+  tau13_z = v1_zz + v3_xz
+  tau23_z = v2_zz + v3_yz
+  tau33_z = 4.0 / 3.0 * v3_zz - 2.0 / 3.0 * (v1_xz + v2_yz)
+
+  # Compute the source terms
+  # Density equation
+  du1 = ( rho_t + rho_x * v1 + rho * v1_x
+                + rho_y * v2 + rho * v2_y
+                + rho_z * v3 + rho * v3_z )
+  # x-momentum equation
+  du2 = ( rho_t * v1 + rho * v1_t + p_x +         rho_x * v1^2
+                                        + 2.0   * rho   * v1 * v1_x
+                                        + rho_y * v1    * v2
+                                        + rho   * v1_y  * v2
+                                        + rho   * v1    * v2_y
+                                        + rho_z * v1    * v3
+                                        + rho   * v1_z  * v3
+                                        + rho   * v1    * v3_z
+                              - mu_ * (tau11_x + tau12_y + tau13_z) )
+  # y-momentum equation
+  du3 = ( rho_t * v2 + rho * v2_t + p_y + rho_x * v1    * v2
+                                        + rho   * v1_x  * v2
+                                        + rho   * v1    * v2_x
+                                        +         rho_y * v2^2
+                                        + 2.0   * rho   * v2 * v2_y
+                                        + rho_z * v2    * v3
+                                        + rho   * v2_z  * v3
+                                        + rho   * v2    * v3_z
+                              - mu_ * (tau12_x + tau22_y + tau23_z) )
+  # z-momentum equation
+  du4 = ( rho_t * v3 + rho * v3_t + p_z + rho_x * v1    * v3
+                                        + rho   * v1_x  * v3
+                                        + rho   * v1    * v3_x
+                                        + rho_y * v2    * v3
+                                        + rho   * v2_y  * v3
+                                        + rho   * v2    * v3_y
+                                        +         rho_z * v3^2
+                                        + 2.0   * rho   * v3 * v3_z
+                              - mu_ * (tau13_x + tau23_y + tau33_z) )
+  # Total energy equation
+  du5 = ( E_t + v1_x * (E + p) + v1 * (E_x + p_x)
+              + v2_y * (E + p) + v2 * (E_y + p_y)
+              + v3_z * (E + p) + v3 * (E_z + p_z)
+    # stress tensor and temperature gradient from x-direction
+      - mu_ * ( q_xx + v1_x * tau11   + v2_x * tau12   + v3_x * tau13
+                     + v1   * tau11_x + v2   * tau12_x + v3   * tau13_x)
+    # stress tensor and temperature gradient terms from y-direction
+      - mu_ * ( q_yy + v1_y * tau12   + v2_y * tau22   + v3_y * tau23
+                     + v1   * tau12_y + v2   * tau22_y + v3   * tau23_y)
+    # stress tensor and temperature gradient terms from z-direction
+      - mu_ * ( q_zz + v1_z * tau13   + v2_z * tau23   + v3_z * tau33
+                     + v1   * tau13_z + v2   * tau23_z + v3   * tau33_z) )
+
+  return SVector(du1, du2, du3, du4, du5)
+end
+
+initial_condition = initial_condition_navier_stokes_convergence_test
+
+# BC types
+velocity_bc_top_bottom = NoSlip((x, t, equations) -> initial_condition_navier_stokes_convergence_test(x, t, equations)[2:4])
+heat_bc_top_bottom = Adiabatic((x, t, equations) -> 0.0)
+boundary_condition_top_bottom = BoundaryConditionNavierStokesWall(velocity_bc_top_bottom, heat_bc_top_bottom)
+
+# define inviscid boundary conditions
+boundary_conditions = Dict(
+                         :y_neg => boundary_condition_slip_wall,
+                         :y_pos => boundary_condition_slip_wall
+                         )
+
+# define viscous boundary conditions
+boundary_conditions_parabolic = Dict(
+                                   :y_neg => boundary_condition_top_bottom,
+                                   :y_pos => boundary_condition_top_bottom
+                                   )
+
+semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic), initial_condition, solver;
+                                             boundary_conditions=(boundary_conditions, boundary_conditions_parabolic),
+                                             source_terms=source_terms_navier_stokes_convergence_test)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span `tspan`
+tspan = (0.0, 0.2)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+alive_callback = AliveCallback(alive_interval=10)
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
+
+###############################################################################
+# run the simulation
+
+time_int_tol = 1e-8
+sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol, dt = 1e-5,
+            ode_default_options()..., callback=callbacks)
+summary_callback() # print the timer summary
+
diff --git a/examples/p4est_3d_dgsem/elixir_navierstokes_taylor_green_vortex.jl b/examples/p4est_3d_dgsem/elixir_navierstokes_taylor_green_vortex.jl
new file mode 100644
index 00000000000..c5b9ccf2e38
--- /dev/null
+++ b/examples/p4est_3d_dgsem/elixir_navierstokes_taylor_green_vortex.jl
@@ -0,0 +1,82 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Navier-Stokes equations
+
+# TODO: parabolic; unify names of these accessor functions
+prandtl_number() = 0.72
+mu() = 6.25e-4 # equivalent to Re = 1600
+
+equations = CompressibleEulerEquations3D(1.4)
+equations_parabolic = CompressibleNavierStokesDiffusion3D(equations, mu=mu(),
+                                                          Prandtl=prandtl_number())
+
+"""
+    initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D)
+
+The classical inviscid Taylor-Green vortex.
+"""
+function initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D)
+  A  = 1.0 # magnitude of speed
+  Ms = 0.1 # maximum Mach number
+
+  rho = 1.0
+  v1  =  A * sin(x[1]) * cos(x[2]) * cos(x[3])
+  v2  = -A * cos(x[1]) * sin(x[2]) * cos(x[3])
+  v3  = 0.0
+  p   = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms
+  p   = p + 1.0/16.0 * A^2 * rho * (cos(2*x[1])*cos(2*x[3]) + 2*cos(2*x[2]) + 2*cos(2*x[1]) + cos(2*x[2])*cos(2*x[3]))
+
+  return prim2cons(SVector(rho, v1, v2, v3, p), equations)
+end
+initial_condition = initial_condition_taylor_green_vortex
+
+volume_flux = flux_ranocha
+solver = DGSEM(polydeg=3, surface_flux=flux_hll,
+               volume_integral=VolumeIntegralFluxDifferencing(volume_flux))
+
+coordinates_min = (-1.0, -1.0, -1.0) .* pi
+coordinates_max = ( 1.0,  1.0,  1.0) .* pi
+
+trees_per_dimension = (2, 2, 2)
+
+mesh = P4estMesh(trees_per_dimension, polydeg=3,
+                 coordinates_min=coordinates_min, coordinates_max=coordinates_max,
+                 periodicity=(true, true, true), initial_refinement_level=2)
+
+
+semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic),
+                                             initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 20.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 50
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=true,
+                                     extra_analysis_integrals=(energy_kinetic,
+                                                               energy_internal,
+                                                               enstrophy))
+save_solution = SaveSolutionCallback(interval=100,
+                                     save_initial_solution=true,
+                                     save_final_solution=true,
+                                     solution_variables=cons2prim)
+alive_callback = AliveCallback(analysis_interval=analysis_interval,)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback,save_solution)
+
+###############################################################################
+# run the simulation
+
+time_int_tol = 1e-8
+sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
+            ode_default_options()..., callback=callbacks)
+summary_callback() # print the timer summary
diff --git a/src/callbacks_step/analysis_dg3d.jl b/src/callbacks_step/analysis_dg3d.jl
index 76aba813fab..3d9b38fd2a5 100644
--- a/src/callbacks_step/analysis_dg3d.jl
+++ b/src/callbacks_step/analysis_dg3d.jl
@@ -228,7 +228,7 @@ function integrate(func::Func, u,
 end
 
 function integrate(func::Func, u,
-                   mesh::TreeMesh{3},
+                   mesh::Union{TreeMesh{3}, P4estMesh{3}},
                    equations, equations_parabolic,
                    dg::DGSEM,
                    cache, cache_parabolic; normalize = true) where {Func}
diff --git a/src/solvers/dgsem_p4est/dg.jl b/src/solvers/dgsem_p4est/dg.jl
index a7cc1eee04d..ec50627d3ef 100644
--- a/src/solvers/dgsem_p4est/dg.jl
+++ b/src/solvers/dgsem_p4est/dg.jl
@@ -50,5 +50,6 @@ include("dg_2d.jl")
 include("dg_2d_parabolic.jl")
 
 include("dg_3d.jl")
+include("dg_3d_parabolic.jl")
 include("dg_parallel.jl")
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
index 73ac47ed1e3..7e90a83a9ca 100644
--- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
+++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
@@ -1,7 +1,7 @@
 # This method is called when a SemidiscretizationHyperbolicParabolic is constructed.
 # It constructs the basic `cache` used throughout the simulation to compute
 # the RHS etc.
-function create_cache_parabolic(mesh::P4estMesh, equations_hyperbolic::AbstractEquations,
+function create_cache_parabolic(mesh::P4estMesh{2}, equations_hyperbolic::AbstractEquations,
                                 equations_parabolic::AbstractEquationsParabolic,
                                 dg::DG, parabolic_scheme, RealT, uEltype)
     balance!(mesh)
diff --git a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl
new file mode 100644
index 00000000000..5370c927e05
--- /dev/null
+++ b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl
@@ -0,0 +1,691 @@
+# This method is called when a SemidiscretizationHyperbolicParabolic is constructed.
+# It constructs the basic `cache` used throughout the simulation to compute
+# the RHS etc.
+function create_cache_parabolic(mesh::P4estMesh{3}, equations_hyperbolic::AbstractEquations,
+                                equations_parabolic::AbstractEquationsParabolic,
+                                dg::DG, parabolic_scheme, RealT, uEltype)
+    balance!(mesh)
+
+    elements = init_elements(mesh, equations_hyperbolic, dg.basis, uEltype)
+    interfaces = init_interfaces(mesh, equations_hyperbolic, dg.basis, elements)
+    boundaries = init_boundaries(mesh, equations_hyperbolic, dg.basis, elements)
+
+    n_vars = nvariables(equations_hyperbolic)
+    n_elements = nelements(elements)
+    n_nodes = nnodes(dg.basis) # nodes in one direction
+    u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_nodes, n_nodes, n_elements)
+    gradients = ntuple(_ -> similar(u_transformed), ndims(mesh))
+    flux_viscous = ntuple(_ -> similar(u_transformed), ndims(mesh))
+
+    cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed)
+
+    return cache
+end
+
+function calc_gradient!(gradients, u_transformed, t,
+                        mesh::P4estMesh{3}, equations_parabolic,
+                        boundary_conditions_parabolic, dg::DG,
+                        cache, cache_parabolic)
+    gradients_x, gradients_y, gradients_z = gradients
+
+    # Reset du
+    @trixi_timeit timer() "reset gradients" begin
+        reset_du!(gradients_x, dg, cache)
+        reset_du!(gradients_y, dg, cache)
+        reset_du!(gradients_z, dg, cache)
+    end
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        (; derivative_dhat) = dg.basis
+        (; contravariant_vectors) = cache.elements
+
+        @threaded for element in eachelement(dg, cache)
+
+            # Calculate gradients with respect to reference coordinates in one element
+            for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+                u_node = get_node_vars(u_transformed, equations_parabolic, dg, i, j, k,
+                                       element)
+
+                for ii in eachnode(dg)
+                    multiply_add_to_node_vars!(gradients_x, derivative_dhat[ii, i],
+                                               u_node, equations_parabolic, dg, ii, j,
+                                               k, element)
+                end
+
+                for jj in eachnode(dg)
+                    multiply_add_to_node_vars!(gradients_y, derivative_dhat[jj, j],
+                                               u_node, equations_parabolic, dg, i, jj,
+                                               k, element)
+                end
+
+                for kk in eachnode(dg)
+                    multiply_add_to_node_vars!(gradients_z, derivative_dhat[kk, k],
+                                               u_node, equations_parabolic, dg, i, j,
+                                               kk, element)
+                end
+            end
+
+            # now that the reference coordinate gradients are computed, transform them node-by-node to physical gradients
+            # using the contravariant vectors
+            for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+                Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors,
+                                                            i, j, k, element)
+                Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors,
+                                                            i, j, k, element)
+                Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors,
+                                                            i, j, k, element)
+
+                gradients_reference_1 = get_node_vars(gradients_x, equations_parabolic, dg,
+                                                      i, j, k, element)
+                gradients_reference_2 = get_node_vars(gradients_y, equations_parabolic, dg,
+                                                      i, j, k, element)
+                gradients_reference_3 = get_node_vars(gradients_z, equations_parabolic, dg,
+                                                      i, j, k, element)
+
+                # note that the contravariant vectors are transposed compared with computations of flux
+                # divergences in `calc_volume_integral!`. See
+                # https://github.com/trixi-framework/Trixi.jl/pull/1490#discussion_r1213345190
+                # for a more detailed discussion.
+                gradient_x_node = Ja11 * gradients_reference_1 +
+                                  Ja21 * gradients_reference_2 +
+                                  Ja31 * gradients_reference_3
+                gradient_y_node = Ja12 * gradients_reference_1 +
+                                  Ja22 * gradients_reference_2 +
+                                  Ja32 * gradients_reference_3
+                gradient_z_node = Ja13 * gradients_reference_1 +
+                                  Ja23 * gradients_reference_2 +
+                                  Ja33 * gradients_reference_3
+
+                set_node_vars!(gradients_x, gradient_x_node, equations_parabolic, dg,
+                               i, j, k, element)
+                set_node_vars!(gradients_y, gradient_y_node, equations_parabolic, dg,
+                               i, j, k, element)
+                set_node_vars!(gradients_z, gradient_z_node, equations_parabolic, dg,
+                               i, j, k, element)
+            end
+        end
+    end
+
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache_parabolic, u_transformed, mesh,
+                            equations_parabolic, dg.surface_integral, dg)
+    end
+
+    # Calculate interface fluxes for the gradient. This reuses P4est `calc_interface_flux!` along with a
+    # specialization for AbstractEquationsParabolic.
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache_parabolic.elements.surface_flux_values,
+                             mesh, False(), # False() = no nonconservative terms
+                             equations_parabolic, dg.surface_integral, dg, cache_parabolic)
+    end
+
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache_parabolic, u_transformed, mesh,
+                            equations_parabolic, dg.surface_integral, dg)
+    end
+
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux_gradients!(cache_parabolic, t, boundary_conditions_parabolic,
+                                      mesh, equations_parabolic, dg.surface_integral, dg)
+    end
+
+    # TODO: parabolic; mortars
+    @assert nmortars(dg, cache) == 0
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        (; boundary_interpolation) = dg.basis
+        (; surface_flux_values) = cache_parabolic.elements
+        (; contravariant_vectors) = cache.elements
+
+        # Access the factors only once before beginning the loop to increase performance.
+        # We also use explicit assignments instead of `+=` to let `@muladd` turn these
+        # into FMAs (see comment at the top of the file).
+        factor_1 = boundary_interpolation[1, 1]
+        factor_2 = boundary_interpolation[nnodes(dg), 2]
+        @threaded for element in eachelement(dg, cache)
+            for l in eachnode(dg), m in eachnode(dg)
+                for v in eachvariable(equations_parabolic)
+                    for dim in 1:3
+                        grad = gradients[dim]
+                        # surface at -x
+                        normal_direction = get_normal_direction(1, contravariant_vectors,
+                                                                1, l, m, element)
+                        grad[v, 1, l, m, element] = (grad[v, 1, l, m, element] +
+                                                     surface_flux_values[v, l, m, 1,
+                                                                         element] *
+                                                     factor_1 * normal_direction[dim])
+
+                        # surface at +x
+                        normal_direction = get_normal_direction(2, contravariant_vectors,
+                                                                nnodes(dg), l, m, element)
+                        grad[v, nnodes(dg), l, m, element] = (grad[v, nnodes(dg), l, m,
+                                                                   element] +
+                                                              surface_flux_values[v, l, m,
+                                                                                  2,
+                                                                                  element] *
+                                                              factor_2 *
+                                                              normal_direction[dim])
+
+                        # surface at -y
+                        normal_direction = get_normal_direction(3, contravariant_vectors,
+                                                                l, m, 1, element)
+                        grad[v, l, 1, m, element] = (grad[v, l, 1, m, element] +
+                                                     surface_flux_values[v, l, m, 3,
+                                                                         element] *
+                                                     factor_1 * normal_direction[dim])
+
+                        # surface at +y
+                        normal_direction = get_normal_direction(4, contravariant_vectors,
+                                                                l, nnodes(dg), m, element)
+                        grad[v, l, nnodes(dg), m, element] = (grad[v, l, nnodes(dg), m,
+                                                                   element] +
+                                                              surface_flux_values[v, l, m,
+                                                                                  4,
+                                                                                  element] *
+                                                              factor_2 *
+                                                              normal_direction[dim])
+
+                        # surface at -z
+                        normal_direction = get_normal_direction(5, contravariant_vectors,
+                                                                l, m, 1, element)
+                        grad[v, l, m, 1, element] = (grad[v, l, m, 1, element] +
+                                                     surface_flux_values[v, l, m, 5,
+                                                                         element] *
+                                                     factor_1 * normal_direction[dim])
+
+                        # surface at +z
+                        normal_direction = get_normal_direction(6, contravariant_vectors,
+                                                                l, m, nnodes(dg), element)
+                        grad[v, l, m, nnodes(dg), element] = (grad[v, l, m, nnodes(dg),
+                                                                   element] +
+                                                              surface_flux_values[v, l, m,
+                                                                                  6,
+                                                                                  element] *
+                                                              factor_2 *
+                                                              normal_direction[dim])
+                    end
+                end
+            end
+        end
+    end
+
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" begin
+        apply_jacobian_parabolic!(gradients_x, mesh, equations_parabolic, dg,
+                                  cache_parabolic)
+        apply_jacobian_parabolic!(gradients_y, mesh, equations_parabolic, dg,
+                                  cache_parabolic)
+        apply_jacobian_parabolic!(gradients_z, mesh, equations_parabolic, dg,
+                                  cache_parabolic)
+    end
+
+    return nothing
+end
+
+# This version is used for parabolic gradient computations
+@inline function calc_interface_flux!(surface_flux_values, mesh::P4estMesh{3},
+                                      nonconservative_terms::False,
+                                      equations::AbstractEquationsParabolic,
+                                      surface_integral, dg::DG, cache,
+                                      interface_index, normal_direction,
+                                      primary_i_node_index, primary_j_node_index,
+                                      primary_direction_index, primary_element_index,
+                                      secondary_i_node_index, secondary_j_node_index,
+                                      secondary_direction_index,
+                                      secondary_element_index)
+    @unpack u = cache.interfaces
+    @unpack surface_flux = surface_integral
+
+    u_ll, u_rr = get_surface_node_vars(u, equations, dg, primary_i_node_index,
+                                       primary_j_node_index,
+                                       interface_index)
+
+    flux_ = 0.5 * (u_ll + u_rr) # we assume that the gradient computations utilize a central flux
+
+    # Note that we don't flip the sign on the secondondary flux. This is because for parabolic terms,
+    # the normals are not embedded in `flux_` for the parabolic gradient computations.
+    for v in eachvariable(equations)
+        surface_flux_values[v, primary_i_node_index, primary_j_node_index, primary_direction_index, primary_element_index] = flux_[v]
+        surface_flux_values[v, secondary_i_node_index, secondary_j_node_index, secondary_direction_index, secondary_element_index] = flux_[v]
+    end
+end
+
+# This is the version used when calculating the divergence of the viscous fluxes
+function calc_volume_integral!(du, flux_viscous,
+                               mesh::P4estMesh{3},
+                               equations_parabolic::AbstractEquationsParabolic,
+                               dg::DGSEM, cache)
+    (; derivative_dhat) = dg.basis
+    (; contravariant_vectors) = cache.elements
+    flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
+
+    @threaded for element in eachelement(dg, cache)
+        # Calculate volume terms in one element
+        for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
+            flux1 = get_node_vars(flux_viscous_x, equations_parabolic, dg, i, j, k, element)
+            flux2 = get_node_vars(flux_viscous_y, equations_parabolic, dg, i, j, k, element)
+            flux3 = get_node_vars(flux_viscous_z, equations_parabolic, dg, i, j, k, element)
+
+            # Compute the contravariant flux by taking the scalar product of the
+            # first contravariant vector Ja^1 and the flux vector
+            Ja11, Ja12, Ja13 = get_contravariant_vector(1, contravariant_vectors, i, j, k,
+                                                        element)
+            contravariant_flux1 = Ja11 * flux1 + Ja12 * flux2 + Ja13 * flux3
+            for ii in eachnode(dg)
+                multiply_add_to_node_vars!(du, derivative_dhat[ii, i], contravariant_flux1,
+                                           equations_parabolic, dg, ii, j, k, element)
+            end
+
+            # Compute the contravariant flux by taking the scalar product of the
+            # second contravariant vector Ja^2 and the flux vector
+            Ja21, Ja22, Ja23 = get_contravariant_vector(2, contravariant_vectors, i, j, k,
+                                                        element)
+            contravariant_flux2 = Ja21 * flux1 + Ja22 * flux2 + Ja23 * flux3
+            for jj in eachnode(dg)
+                multiply_add_to_node_vars!(du, derivative_dhat[jj, j], contravariant_flux2,
+                                           equations_parabolic, dg, i, jj, k, element)
+            end
+
+            # Compute the contravariant flux by taking the scalar product of the
+            # second contravariant vector Ja^2 and the flux vector
+            Ja31, Ja32, Ja33 = get_contravariant_vector(3, contravariant_vectors, i, j, k,
+                                                        element)
+            contravariant_flux3 = Ja31 * flux1 + Ja32 * flux2 + Ja33 * flux3
+            for kk in eachnode(dg)
+                multiply_add_to_node_vars!(du, derivative_dhat[kk, k], contravariant_flux3,
+                                           equations_parabolic, dg, i, j, kk, element)
+            end
+        end
+    end
+
+    return nothing
+end
+
+# This is the version used when calculating the divergence of the viscous fluxes
+# We pass the `surface_integral` argument solely for dispatch
+function prolong2interfaces!(cache_parabolic, flux_viscous,
+                             mesh::P4estMesh{3},
+                             equations_parabolic::AbstractEquationsParabolic,
+                             surface_integral, dg::DG, cache)
+    (; interfaces) = cache_parabolic
+    (; contravariant_vectors) = cache_parabolic.elements
+    index_range = eachnode(dg)
+    flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
+
+    @threaded for interface in eachinterface(dg, cache)
+        # Copy solution data from the primary element using "delayed indexing" with
+        # a start value and a step size to get the correct face and orientation.
+        # Note that in the current implementation, the interface will be
+        # "aligned at the primary element", i.e., the index of the primary side
+        # will always run forwards.
+        primary_element = interfaces.neighbor_ids[1, interface]
+        primary_indices = interfaces.node_indices[1, interface]
+        primary_direction = indices2direction(primary_indices)
+
+        i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1],
+                                                                                     index_range)
+        j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2],
+                                                                                     index_range)
+        k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3],
+                                                                                     index_range)
+
+        i_primary = i_primary_start
+        j_primary = j_primary_start
+        k_primary = k_primary_start
+
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                # this is the outward normal direction on the primary element
+                normal_direction = get_normal_direction(primary_direction,
+                                                        contravariant_vectors,
+                                                        i_primary, j_primary, k_primary,
+                                                        primary_element)
+
+                for v in eachvariable(equations_parabolic)
+                    # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
+                    flux_viscous = SVector(flux_viscous_x[v, i_primary, j_primary,
+                                                          k_primary,
+                                                          primary_element],
+                                           flux_viscous_y[v, i_primary, j_primary,
+                                                          k_primary,
+                                                          primary_element],
+                                           flux_viscous_z[v, i_primary, j_primary,
+                                                          k_primary,
+                                                          primary_element])
+
+                    interfaces.u[1, v, i, j, interface] = dot(flux_viscous,
+                                                              normal_direction)
+                end
+                i_primary += i_primary_step_i
+                j_primary += j_primary_step_i
+                k_primary += k_primary_step_i
+            end
+            i_primary += i_primary_step_j
+            j_primary += j_primary_step_j
+            k_primary += k_primary_step_j
+        end
+
+        # Copy solution data from the secondary element using "delayed indexing" with
+        # a start value and a step size to get the correct face and orientation.
+        secondary_element = interfaces.neighbor_ids[2, interface]
+        secondary_indices = interfaces.node_indices[2, interface]
+        secondary_direction = indices2direction(secondary_indices)
+
+        i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_indices[1],
+                                                                                           index_range)
+        j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_indices[2],
+                                                                                           index_range)
+        k_secondary_start, k_secondary_step_i, k_secondary_step_j = index_to_start_step_3d(secondary_indices[3],
+                                                                                           index_range)
+
+        i_secondary = i_secondary_start
+        j_secondary = j_secondary_start
+        k_secondary = k_secondary_start
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                # This is the outward normal direction on the secondary element.
+                # Here, we assume that normal_direction on the secondary element is
+                # the negative of normal_direction on the primary element.
+                normal_direction = get_normal_direction(secondary_direction,
+                                                        contravariant_vectors,
+                                                        i_secondary, j_secondary,
+                                                        k_secondary,
+                                                        secondary_element)
+
+                for v in eachvariable(equations_parabolic)
+                    # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
+                    flux_viscous = SVector(flux_viscous_x[v, i_secondary, j_secondary,
+                                                          k_secondary,
+                                                          secondary_element],
+                                           flux_viscous_y[v, i_secondary, j_secondary,
+                                                          k_secondary,
+                                                          secondary_element],
+                                           flux_viscous_z[v, i_secondary, j_secondary,
+                                                          k_secondary,
+                                                          secondary_element])
+                    # store the normal flux with respect to the primary normal direction
+                    interfaces.u[2, v, i, j, interface] = -dot(flux_viscous,
+                                                               normal_direction)
+                end
+                i_secondary += i_secondary_step_i
+                j_secondary += j_secondary_step_i
+                k_secondary += k_secondary_step_i
+            end
+            i_secondary += i_secondary_step_j
+            j_secondary += j_secondary_step_j
+            k_secondary += k_secondary_step_j
+        end
+    end
+
+    return nothing
+end
+
+# This version is used for divergence flux computations 
+function calc_interface_flux!(surface_flux_values,
+                              mesh::P4estMesh{3}, equations_parabolic,
+                              dg::DG, cache_parabolic)
+    (; neighbor_ids, node_indices) = cache_parabolic.interfaces
+    index_range = eachnode(dg)
+
+    @threaded for interface in eachinterface(dg, cache_parabolic)
+        # Get element and side index information on the primary element
+        primary_element = neighbor_ids[1, interface]
+        primary_indices = node_indices[1, interface]
+        primary_direction_index = indices2direction(primary_indices)
+
+        i_primary_start, i_primary_step_i, i_primary_step_j = index_to_start_step_3d(primary_indices[1],
+                                                                                     index_range)
+        j_primary_start, j_primary_step_i, j_primary_step_j = index_to_start_step_3d(primary_indices[2],
+                                                                                     index_range)
+        k_primary_start, k_primary_step_i, k_primary_step_j = index_to_start_step_3d(primary_indices[3],
+                                                                                     index_range)
+
+        i_primary = i_primary_start
+        j_primary = j_primary_start
+        k_primary = k_primary_start
+
+        # Get element and side index information on the secondary element
+        secondary_element = neighbor_ids[2, interface]
+        secondary_indices = node_indices[2, interface]
+        secondary_direction_index = indices2direction(secondary_indices)
+        secondary_surface_indices = surface_indices(secondary_indices)
+
+        # Initiate the secondary index to be used in the surface for loop.
+        # This index on the primary side will always run forward but
+        # the secondary index might need to run backwards for flipped sides.
+        # Get the surface indexing on the secondary element.
+        # Note that the indices of the primary side will always run forward but
+        # the secondary indices might need to run backwards for flipped sides.
+        i_secondary_start, i_secondary_step_i, i_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[1],
+                                                                                           index_range)
+        j_secondary_start, j_secondary_step_i, j_secondary_step_j = index_to_start_step_3d(secondary_surface_indices[2],
+                                                                                           index_range)
+        i_secondary = i_secondary_start
+        j_secondary = j_secondary_start
+
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                # We prolong the viscous flux dotted with respect the outward normal on the 
+                # primary element. We assume a BR-1 type of flux.
+                viscous_flux_normal_ll, viscous_flux_normal_rr = get_surface_node_vars(cache_parabolic.interfaces.u,
+                                                                                       equations_parabolic,
+                                                                                       dg,
+                                                                                       i, j,
+                                                                                       interface)
+
+                flux = 0.5 * (viscous_flux_normal_ll + viscous_flux_normal_rr)
+
+                for v in eachvariable(equations_parabolic)
+                    surface_flux_values[v, i, j, primary_direction_index, primary_element] = flux[v]
+                    surface_flux_values[v, i_secondary, j_secondary, secondary_direction_index, secondary_element] = -flux[v]
+                end
+
+                # Increment the primary element indices
+                i_primary += i_primary_step_i
+                j_primary += j_primary_step_i
+                k_primary += k_primary_step_i
+                # Increment the secondary element surface indices
+                i_secondary += i_secondary_step_i
+                j_secondary += j_secondary_step_i
+            end
+            # Increment the primary element indices
+            i_primary += i_primary_step_j
+            j_primary += j_primary_step_j
+            k_primary += k_primary_step_j
+            # Increment the secondary element surface indices
+            i_secondary += i_secondary_step_j
+            j_secondary += j_secondary_step_j
+        end
+    end
+
+    return nothing
+end
+
+# TODO: parabolic, finish implementing `calc_boundary_flux_gradients!` and `calc_boundary_flux_divergence!`
+function prolong2boundaries!(cache_parabolic, flux_viscous,
+                             mesh::P4estMesh{3},
+                             equations_parabolic::AbstractEquationsParabolic,
+                             surface_integral, dg::DG, cache)
+    (; boundaries) = cache_parabolic
+    (; contravariant_vectors) = cache_parabolic.elements
+    index_range = eachnode(dg)
+
+    flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
+
+    @threaded for boundary in eachboundary(dg, cache_parabolic)
+        # Copy solution data from the element using "delayed indexing" with
+        # a start value and a step size to get the correct face and orientation.
+        element = boundaries.neighbor_ids[boundary]
+        node_indices = boundaries.node_indices[boundary]
+        direction = indices2direction(node_indices)
+
+        i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1],
+                                                                            index_range)
+        j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2],
+                                                                            index_range)
+        k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3],
+                                                                            index_range)
+
+        i_node = i_node_start
+        j_node = j_node_start
+        k_node = k_node_start
+
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                # this is the outward normal direction on the primary element
+                normal_direction = get_normal_direction(direction, contravariant_vectors,
+                                                        i_node, j_node, k_node, element)
+
+                for v in eachvariable(equations_parabolic)
+                    flux_viscous = SVector(flux_viscous_x[v, i_node, j_node, k_node,
+                                                          element],
+                                           flux_viscous_y[v, i_node, j_node, k_node,
+                                                          element],
+                                           flux_viscous_z[v, i_node, j_node, k_node,
+                                                          element])
+
+                    boundaries.u[v, i, j, boundary] = dot(flux_viscous, normal_direction)
+                end
+                i_node += i_node_step_i
+                j_node += j_node_step_i
+                k_node += k_node_step_i
+            end
+            i_node += i_node_step_j
+            j_node += j_node_step_j
+            k_node += k_node_step_j
+        end
+    end
+    return nothing
+end
+
+# # Function barrier for type stability
+# !!! TODO: Figure out why this cannot removed eventhough it exists in the dg_2d_parabolic.jl file
+function calc_boundary_flux_gradients!(cache, t, boundary_conditions, mesh::P4estMesh,
+                                       equations, surface_integral, dg::DG)
+    (; boundary_condition_types, boundary_indices) = boundary_conditions
+
+    calc_boundary_flux_by_type!(cache, t, boundary_condition_types, boundary_indices,
+                                Gradient(), mesh, equations, surface_integral, dg)
+    return nothing
+end
+
+function calc_boundary_flux_divergence!(cache, t, boundary_conditions, mesh::P4estMesh,
+                                        equations, surface_integral, dg::DG)
+    (; boundary_condition_types, boundary_indices) = boundary_conditions
+
+    calc_boundary_flux_by_type!(cache, t, boundary_condition_types, boundary_indices,
+                                Divergence(), mesh, equations, surface_integral, dg)
+    return nothing
+end
+
+# Iterate over tuples of boundary condition types and associated indices
+# in a type-stable way using "lispy tuple programming".
+function calc_boundary_flux_by_type!(cache, t, BCs::NTuple{N, Any},
+                                     BC_indices::NTuple{N, Vector{Int}},
+                                     operator_type,
+                                     mesh::P4estMesh,
+                                     equations, surface_integral, dg::DG) where {N}
+    # Extract the boundary condition type and index vector
+    boundary_condition = first(BCs)
+    boundary_condition_indices = first(BC_indices)
+    # Extract the remaining types and indices to be processed later
+    remaining_boundary_conditions = Base.tail(BCs)
+    remaining_boundary_condition_indices = Base.tail(BC_indices)
+
+    # process the first boundary condition type
+    calc_boundary_flux!(cache, t, boundary_condition, boundary_condition_indices,
+                        operator_type, mesh, equations, surface_integral, dg)
+
+    # recursively call this method with the unprocessed boundary types
+    calc_boundary_flux_by_type!(cache, t, remaining_boundary_conditions,
+                                remaining_boundary_condition_indices,
+                                operator_type,
+                                mesh, equations, surface_integral, dg)
+
+    return nothing
+end
+
+# terminate the type-stable iteration over tuples
+function calc_boundary_flux_by_type!(cache, t, BCs::Tuple{}, BC_indices::Tuple{},
+                                     operator_type, mesh::P4estMesh, equations,
+                                     surface_integral, dg::DG)
+    nothing
+end
+
+function calc_boundary_flux!(cache, t,
+                             boundary_condition_parabolic, # works with Dict types
+                             boundary_condition_indices,
+                             operator_type, mesh::P4estMesh{3},
+                             equations_parabolic::AbstractEquationsParabolic,
+                             surface_integral, dg::DG)
+    (; boundaries) = cache
+    (; node_coordinates, surface_flux_values) = cache.elements
+    (; contravariant_vectors) = cache.elements
+    index_range = eachnode(dg)
+
+    @threaded for local_index in eachindex(boundary_condition_indices)
+        # Use the local index to get the global boundary index from the pre-sorted list
+        boundary_index = boundary_condition_indices[local_index]
+
+        # Get information on the adjacent element, compute the surface fluxes,
+        # and store them
+        element = boundaries.neighbor_ids[boundary_index]
+        node_indices = boundaries.node_indices[boundary_index]
+        direction_index = indices2direction(node_indices)
+
+        i_node_start, i_node_step_i, i_node_step_j = index_to_start_step_3d(node_indices[1],
+                                                                            index_range)
+        j_node_start, j_node_step_i, j_node_step_j = index_to_start_step_3d(node_indices[2],
+                                                                            index_range)
+        k_node_start, k_node_step_i, k_node_step_j = index_to_start_step_3d(node_indices[3],
+                                                                            index_range)
+
+        i_node = i_node_start
+        j_node = j_node_start
+        k_node = k_node_start
+
+        for j in eachnode(dg)
+            for i in eachnode(dg)
+                # Extract solution data from boundary container
+                u_inner = get_node_vars(boundaries.u, equations_parabolic, dg, i, j,
+                                        boundary_index)
+
+                # Outward-pointing normal direction (not normalized)
+                normal_direction = get_normal_direction(direction_index,
+                                                        contravariant_vectors,
+                                                        i_node, j_node, k_node, element)
+
+                # TODO: revisit if we want more general boundary treatments.
+                # This assumes the gradient numerical flux at the boundary is the gradient variable,
+                # which is consistent with BR1, LDG.
+                flux_inner = u_inner
+
+                # Coordinates at boundary node
+                x = get_node_coords(node_coordinates, equations_parabolic, dg, i_node,
+                                    j_node, k_node,
+                                    element)
+
+                flux_ = boundary_condition_parabolic(flux_inner, u_inner, normal_direction,
+                                                     x, t, operator_type,
+                                                     equations_parabolic)
+
+                # Copy flux to element storage in the correct orientation
+                for v in eachvariable(equations_parabolic)
+                    surface_flux_values[v, i, j, direction_index, element] = flux_[v]
+                end
+
+                i_node += i_node_step_i
+                j_node += j_node_step_i
+                k_node += k_node_step_i
+            end
+            i_node += i_node_step_j
+            j_node += j_node_step_j
+            k_node += k_node_step_j
+        end
+    end
+end
diff --git a/src/solvers/dgsem_tree/dg_3d_parabolic.jl b/src/solvers/dgsem_tree/dg_3d_parabolic.jl
index d6d74637021..5b63b971cd8 100644
--- a/src/solvers/dgsem_tree/dg_3d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_3d_parabolic.jl
@@ -13,7 +13,7 @@
 #               2. compute f(u, grad(u))
 #               3. compute div(f(u, grad(u))) (i.e., the "regular" rhs! call)
 # boundary conditions will be applied to both grad(u) and div(f(u, grad(u))).
-function rhs_parabolic!(du, u, t, mesh::TreeMesh{3},
+function rhs_parabolic!(du, u, t, mesh::Union{TreeMesh{3}, P4estMesh{3}},
                         equations_parabolic::AbstractEquationsParabolic,
                         initial_condition, boundary_conditions_parabolic, source_terms,
                         dg::DG, parabolic_scheme, cache, cache_parabolic)
@@ -105,7 +105,7 @@ end
 # Transform solution variables prior to taking the gradient
 # (e.g., conservative to primitive variables). Defaults to doing nothing.
 # TODO: can we avoid copying data?
-function transform_variables!(u_transformed, u, mesh::TreeMesh{3},
+function transform_variables!(u_transformed, u, mesh::Union{TreeMesh{3}, P4estMesh{3}},
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, parabolic_scheme, cache, cache_parabolic)
     @threaded for element in eachelement(dg, cache)
@@ -325,7 +325,8 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
     return nothing
 end
 
-function calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh::TreeMesh{3},
+function calc_viscous_fluxes!(flux_viscous, gradients, u_transformed,
+                              mesh::Union{TreeMesh{3}, P4estMesh{3}},
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, cache, cache_parabolic)
     gradients_x, gradients_y, gradients_z = gradients
@@ -379,7 +380,7 @@ end
 
 function calc_boundary_flux_gradients!(cache, t,
                                        boundary_conditions_parabolic::BoundaryConditionPeriodic,
-                                       mesh::TreeMesh{3},
+                                       mesh::Union{TreeMesh{3}, P4estMesh{3}},
                                        equations_parabolic::AbstractEquationsParabolic,
                                        surface_integral, dg::DG)
     return nothing
@@ -387,7 +388,7 @@ end
 
 function calc_boundary_flux_divergence!(cache, t,
                                         boundary_conditions_parabolic::BoundaryConditionPeriodic,
-                                        mesh::TreeMesh{3},
+                                        mesh::Union{TreeMesh{3}, P4estMesh{3}},
                                         equations_parabolic::AbstractEquationsParabolic,
                                         surface_integral, dg::DG)
     return nothing
@@ -806,7 +807,7 @@ end
 # This is because the parabolic fluxes are assumed to be of the form
 #   `du/dt + df/dx = dg/dx + source(x,t)`,
 # where f(u) is the inviscid flux and g(u) is the viscous flux.
-function apply_jacobian_parabolic!(du, mesh::TreeMesh{3},
+function apply_jacobian_parabolic!(du, mesh::Union{TreeMesh{3}, P4estMesh{3}},
                                    equations::AbstractEquationsParabolic, dg::DG, cache)
     @threaded for element in eachelement(dg, cache)
         factor = cache.elements.inverse_jacobian[element]
diff --git a/test/test_parabolic_3d.jl b/test/test_parabolic_3d.jl
index 1ae5eed44ae..67a27238969 100644
--- a/test/test_parabolic_3d.jl
+++ b/test/test_parabolic_3d.jl
@@ -86,9 +86,24 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
-end
+  @trixi_testset "P4estMesh3D: elixir_navierstokes_convergence.jl" begin
+    @test_trixi_include(joinpath(examples_dir(), "p4est_3d_dgsem", "elixir_navierstokes_convergence.jl"),
+      initial_refinement_level = 2, tspan=(0.0, 0.1),
+      l2 = [0.00026599105554982194, 0.000461877794472316, 0.0005424899076052261, 0.0004618777944723191, 0.0015846392581126832], 
+      linf = [0.0025241668929956163, 0.006308461681816373, 0.004334939663169113, 0.006308461681804009, 0.03176343480493493]
+    )
+  end
 
+  @trixi_testset "P4estMesh3D: elixir_navierstokes_taylor_green_vortex.jl" begin
+    @test_trixi_include(joinpath(examples_dir(), "p4est_3d_dgsem", "elixir_navierstokes_taylor_green_vortex.jl"),
+      initial_refinement_level = 2, tspan=(0.0, 0.25),
+      l2 = [0.0001547509861140407, 0.015637861347119624, 0.015637861347119687, 0.022024699158522523, 0.009711013505930812], 
+      linf = [0.0006696415247340326, 0.03442565722527785, 0.03442565722577423, 0.06295407168705314, 0.032857472756916195]
+    )
+  end
+  
+end
 # Clean up afterwards: delete Trixi.jl output directory
 @test_nowarn isdir(outdir) && rm(outdir, recursive=true)
 
-end # module
+end # module
\ No newline at end of file

From d7ea40b19b98cc18d18e5f047131f141d3c08acc Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Tue, 25 Jul 2023 18:42:12 +0200
Subject: [PATCH 101/163] reset threads also when initializing the summary
 callback (#1587)

* reset threads also when initializing the summary callback

I added the option to reset the threads from Polyester.jl in also in the summary callback.
The idea is that this supports another development workflow where we just modify the RHS
implementation and call solve again without re-ccreating the ODE.

The same comment as in 036eaed82b92be9376c5b610d8d40eddf45ca1fa applies:
However, I did not document it in the docstring since we have not documented
that we use Polyester.jl threads in general - and the resetting is specific
to Polyester.jl. I was not sure whether we still would like to keep the option
to change the threading backend any time - although I do not see a good reason
why we should do so.
---
 src/callbacks_step/summary.jl | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/callbacks_step/summary.jl b/src/callbacks_step/summary.jl
index 08e13d0b98d..26981a58b73 100644
--- a/src/callbacks_step/summary.jl
+++ b/src/callbacks_step/summary.jl
@@ -15,10 +15,14 @@ Create and return a callback that prints a human-readable summary of the simulat
 beginning of a simulation and then resets the timer. When the returned callback is executed
 directly, the current timer values are shown.
 """
-function SummaryCallback()
+function SummaryCallback(reset_threads = true)
+    function initialize(cb, u, t, integrator)
+        initialize_summary_callback(cb, u, t, integrator;
+                                    reset_threads)
+    end
     DiscreteCallback(summary_callback, summary_callback,
                      save_positions = (false, false),
-                     initialize = initialize_summary_callback)
+                     initialize = initialize)
 end
 
 function Base.show(io::IO, cb::DiscreteCallback{<:Any, <:typeof(summary_callback)})
@@ -139,7 +143,15 @@ end
 
 # Print information about the current simulation setup
 # Note: This is called *after* all initialization is done, but *before* the first time step
-function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator)
+function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator;
+                                     reset_threads = true)
+    # Optionally reset Polyester.jl threads. See
+    # https://github.com/trixi-framework/Trixi.jl/issues/1583
+    # https://github.com/JuliaSIMD/Polyester.jl/issues/30
+    if reset_threads
+        Polyester.reset_threads!()
+    end
+
     mpi_isroot() || return nothing
 
     print_startup_message()

From 53a826b62241fc0f58c0a3cd0a0acc1789a79509 Mon Sep 17 00:00:00 2001
From: Arpit Babbar <arpitbabbar@gmail.com>
Date: Wed, 26 Jul 2023 10:47:07 +0530
Subject: [PATCH 102/163] Timestep stamp in mesh file (#1580)

* Timestep stamp in mesh file

* Update src/callbacks_step/save_solution.jl

Fixes other mesh type issue

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Add test for multiple mesh files

* Keep within pre-existing tests

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 src/callbacks_step/save_solution.jl | 9 ++++++++-
 test/test_mpi_tree.jl               | 6 ++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/callbacks_step/save_solution.jl b/src/callbacks_step/save_solution.jl
index 1fe0d6b1e15..14ea33368f8 100644
--- a/src/callbacks_step/save_solution.jl
+++ b/src/callbacks_step/save_solution.jl
@@ -155,7 +155,14 @@ function save_mesh(semi::AbstractSemidiscretization, output_directory, timestep
     mesh, _, _, _ = mesh_equations_solver_cache(semi)
 
     if mesh.unsaved_changes
-        mesh.current_filename = save_mesh_file(mesh, output_directory)
+        # We only append the time step number to the mesh file name if it has
+        # changed during the simulation due to AMR. We do not append it for
+        # the first time step.
+        if timestep == 0
+            mesh.current_filename = save_mesh_file(mesh, output_directory)
+        else
+            mesh.current_filename = save_mesh_file(mesh, output_directory, timestep)
+        end
         mesh.unsaved_changes = false
     end
 end
diff --git a/test/test_mpi_tree.jl b/test/test_mpi_tree.jl
index 84d2609cbb1..8403fcf1b04 100644
--- a/test/test_mpi_tree.jl
+++ b/test/test_mpi_tree.jl
@@ -55,10 +55,16 @@ CI_ON_WINDOWS = (get(ENV, "GITHUB_ACTIONS", false) == "true") && Sys.iswindows()
   # Linear scalar advection with AMR
   # These example files are only for testing purposes and have no practical use
   @trixi_testset "elixir_advection_amr_refine_twice.jl" begin
+    # Here, we also test that SaveSolutionCallback prints multiple mesh files with AMR
+    # Start with a clean environment: remove Trixi.jl output directory if it exists
+    outdir = "out"
+    isdir(outdir) && rm(outdir, recursive=true)
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_amr_refine_twice.jl"),
       l2   = [0.00020547512522578292],
       linf = [0.007831753383083506],
       coverage_override = (maxiters=6,))
+    meshfiles = filter(file -> endswith(file,".h5") && startswith(file,"mesh"), readdir(outdir))
+    @test length(meshfiles) > 1
   end
 
   @trixi_testset "elixir_advection_amr_coarsen_twice.jl" begin

From fe0e78c658283db21e581bbbc6d48d0adcf39510 Mon Sep 17 00:00:00 2001
From: Johannes Markert <10619309+jmark@users.noreply.github.com>
Date: Wed, 26 Jul 2023 10:47:54 +0200
Subject: [PATCH 103/163] Feature: t8code as meshing backend (#1426)

* Initial commit for the new feature using t8code as meshing backend.

* Delete t8code_2d_dgsem

* Added new examples and tests. Testing updates for T8code.jl.

* Worked in the comments.

* Fixed spelling.

* Update src/auxiliary/auxiliary.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Added whitespace in Unions.

* Adapted commented out code block reporting the no. of elements per level.

* Added dummy save mesh support for .

* Added test .

* Added  to  method signature.

* Deleted unnecessary comments.

* Removed commented out tests.

* Fixed Morton ordering bug in 2D at mortar interfaces.

* Disabled `save_solution` callbacks and added more tests.

* Added more tests.

* Updated code according to the review.

* Update src/auxiliary/t8code.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/auxiliary/t8code.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/auxiliary/t8code.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/auxiliary/t8code.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/meshes/t8code_mesh.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/meshes/t8code_mesh.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/meshes/t8code_mesh.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/meshes/t8code_mesh.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/meshes/t8code_mesh.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/meshes/t8code_mesh.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/solvers/dgsem_t8code/containers_2d.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/meshes/t8code_mesh.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Code cleanup.

* Updated to T8code@0.3.0

* Fixing minor issues.

* Fixed typo.

* Code cleanup.

* Enabled `set_ghost` in examples.

* Generalized type info in function signature.

* Added namespace qualifier.

* Updated comments.

* Refactored code and deleted lots of it.

* Removed a copy operation.

* Fixed some merging issues and formatting.

* Fixed spelling.

* Fixed spelling and changed assert macro.

* Applied automatic formatting.

* Backup.

* Removed superfluous outer constructor for T8codeMesh.

* Added return statement for consistency.

* Fixed wrong indentation by autoformatter.

* Added comments.

* Made sure an exception is thrown.

* Changed flags for sc_init for t8code initialization.

* Updated formatting.

* Workaround for error about calling MPI routines after MPI has been finalized.

* Upped to T8code v0.4.1.

* Added mpi_finailize_hook for proper memory cleanup.

* Added t8code to test_threaded.jl

* Added a `save_mesh_file` call in order to satisfy code coverage.

* Improved finalizer logic for T8coeMesh.

* Refined code.

* Restructured to do blocks.

* Moved save_mesh_file call to test file.

* Fixed spelling error.

---------

Co-authored-by: Johannes Markert <johannes.markert@dlr.de>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 .github/workflows/ci.yml                      |   1 +
 Project.toml                                  |   2 +
 ...ixir_advection_amr_solution_independent.jl | 143 ++++++
 .../elixir_advection_amr_unstructured_flag.jl |  87 ++++
 .../t8code_2d_dgsem/elixir_advection_basic.jl |  59 +++
 .../elixir_advection_nonconforming_flag.jl    | 109 ++++
 .../elixir_advection_unstructured_flag.jl     |  81 +++
 .../elixir_euler_free_stream.jl               | 122 +++++
 .../t8code_2d_dgsem/elixir_euler_sedov.jl     |  97 ++++
 .../elixir_euler_shockcapturing_ec.jl         |  68 +++
 ...e_terms_nonconforming_unstructured_flag.jl | 122 +++++
 .../elixir_eulergravity_convergence.jl        |  77 +++
 .../t8code_2d_dgsem/elixir_mhd_alfven_wave.jl |  60 +++
 examples/t8code_2d_dgsem/elixir_mhd_rotor.jl  | 134 +++++
 .../elixir_shallowwater_source_terms.jl       |  60 +++
 src/Trixi.jl                                  |   5 +-
 src/auxiliary/t8code.jl                       | 486 ++++++++++++++++++
 src/callbacks_step/amr.jl                     |  63 +++
 src/callbacks_step/amr_dg2d.jl                |  72 ++-
 src/callbacks_step/analysis.jl                |  30 ++
 src/callbacks_step/analysis_dg2d.jl           |  16 +-
 src/callbacks_step/save_restart_dg.jl         |   3 +-
 src/callbacks_step/save_solution_dg.jl        |   3 +-
 src/callbacks_step/stepsize_dg2d.jl           |   8 +-
 src/meshes/mesh_io.jl                         |   9 +-
 src/meshes/meshes.jl                          |   1 +
 src/meshes/t8code_mesh.jl                     | 345 +++++++++++++
 src/solvers/dg.jl                             |   4 +-
 src/solvers/dgsem_p4est/containers.jl         |   9 +-
 src/solvers/dgsem_p4est/containers_2d.jl      |   5 +-
 src/solvers/dgsem_p4est/dg_2d.jl              |  33 +-
 src/solvers/dgsem_structured/dg_2d.jl         |  19 +-
 src/solvers/dgsem_t8code/containers.jl        |  60 +++
 src/solvers/dgsem_t8code/containers_2d.jl     |  58 +++
 src/solvers/dgsem_t8code/dg.jl                |  31 ++
 src/solvers/dgsem_tree/dg_2d.jl               |  22 +-
 src/solvers/dgsem_tree/indicators_2d.jl       |   3 +-
 src/solvers/dgsem_unstructured/dg_2d.jl       |  10 +-
 test/runtests.jl                              | 220 ++++----
 test/test_t8code_2d.jl                        | 182 +++++++
 test/test_threaded.jl                         |  16 +
 41 files changed, 2767 insertions(+), 168 deletions(-)
 create mode 100644 examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_advection_basic.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_euler_free_stream.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_euler_sedov.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_mhd_alfven_wave.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
 create mode 100644 examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl
 create mode 100644 src/auxiliary/t8code.jl
 create mode 100644 src/meshes/t8code_mesh.jl
 create mode 100644 src/solvers/dgsem_t8code/containers.jl
 create mode 100644 src/solvers/dgsem_t8code/containers_2d.jl
 create mode 100644 src/solvers/dgsem_t8code/dg.jl
 create mode 100644 test/test_t8code_2d.jl

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b0a2c93db3c..4790f93d913 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -69,6 +69,7 @@ jobs:
           - structured
           - p4est_part1
           - p4est_part2
+          - t8code_part1
           - unstructured_dgmulti
           - parabolic
           - paper_self_gravitating_gas_dynamics
diff --git a/Project.toml b/Project.toml
index 94c47a35ac1..db410317851 100644
--- a/Project.toml
+++ b/Project.toml
@@ -37,6 +37,7 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 StrideArrays = "d1fa6d79-ef01-42a6-86c9-f7c551f8593b"
 StructArrays = "09ab397b-f2b6-538f-b94a-2f83cf4a842a"
 SummationByPartsOperators = "9f78cca6-572e-554e-b819-917d2f1cf240"
+T8code = "d0cc0030-9a40-4274-8435-baadcfd54fa1"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
 Triangulate = "f7e6ffb2-c36d-4f8f-a77e-16e897189344"
 TriplotBase = "981d1d27-644d-49a2-9326-4793e63143c3"
@@ -80,6 +81,7 @@ StaticArrays = "1"
 StrideArrays = "0.1.18"
 StructArrays = "0.6"
 SummationByPartsOperators = "0.5.41"
+T8code = "0.4.1"
 TimerOutputs = "0.5"
 Triangulate = "2.0"
 TriplotBase = "0.1"
diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
new file mode 100644
index 00000000000..653bab41e2d
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_solution_independent.jl
@@ -0,0 +1,143 @@
+using OrdinaryDiffEq
+using Trixi
+
+# Define new structs inside a module to allow re-evaluating the file.
+module TrixiExtension
+
+using Trixi
+
+struct IndicatorSolutionIndependent{Cache <: NamedTuple} <: Trixi.AbstractIndicator
+    cache::Cache
+end
+
+function IndicatorSolutionIndependent(semi)
+    basis = semi.solver.basis
+    alpha = Vector{real(basis)}()
+    cache = (; semi.mesh, alpha)
+    return IndicatorSolutionIndependent{typeof(cache)}(cache)
+end
+
+function (indicator::IndicatorSolutionIndependent)(u::AbstractArray{<:Any, 4},
+                                                   mesh, equations, dg, cache;
+                                                   t, kwargs...)
+    mesh = indicator.cache.mesh
+    alpha = indicator.cache.alpha
+    resize!(alpha, nelements(dg, cache))
+
+    # Predict the theoretical center.
+    advection_velocity = (0.2, -0.7)
+    center = t .* advection_velocity
+
+    inner_distance = 1
+    outer_distance = 1.85
+
+    # Iterate over all elements.
+    for element in 1:length(alpha)
+        # Calculate periodic distance between cell and center.
+        # This requires an uncurved mesh!
+        coordinates = SVector(0.5 * (cache.elements.node_coordinates[1, 1, 1, element] +
+                               cache.elements.node_coordinates[1, end, 1, element]),
+                              0.5 * (cache.elements.node_coordinates[2, 1, 1, element] +
+                               cache.elements.node_coordinates[2, 1, end, element]))
+
+        # The geometric shape of the amr should be preserved when the base_level is increased.
+        # This is done by looking at the original coordinates of each cell.
+        cell_coordinates = original_coordinates(coordinates, 5 / 8)
+        cell_distance = periodic_distance_2d(cell_coordinates, center, 10)
+        if cell_distance < (inner_distance + outer_distance) / 2
+            cell_coordinates = original_coordinates(coordinates, 5 / 16)
+            cell_distance = periodic_distance_2d(cell_coordinates, center, 10)
+        end
+
+        # Set alpha according to cells position inside the circles.
+        target_level = (cell_distance < inner_distance) + (cell_distance < outer_distance)
+        alpha[element] = target_level / 2
+    end
+    return alpha
+end
+
+# For periodic domains, distance between two points must take into account
+# periodic extensions of the domain.
+function periodic_distance_2d(coordinates, center, domain_length)
+    dx = coordinates .- center
+    dx_shifted = abs.(dx .% domain_length)
+    dx_periodic = min.(dx_shifted, domain_length .- dx_shifted)
+    return sqrt(sum(dx_periodic .^ 2))
+end
+
+# This takes a cells coordinates and transforms them into the coordinates of a
+# parent-cell it originally refined from.  It does it so that the parent-cell
+# has given cell_length.
+function original_coordinates(coordinates, cell_length)
+    offset = coordinates .% cell_length
+    offset_sign = sign.(offset)
+    border = coordinates - offset
+    center = border + (offset_sign .* cell_length / 2)
+    return center
+end
+
+end # module TrixiExtension
+
+import .TrixiExtension
+
+###############################################################################
+# Semidiscretization of the linear advection equation.
+
+advection_velocity = (0.2, -0.7)
+equations = LinearScalarAdvectionEquation2D(advection_velocity)
+
+initial_condition = initial_condition_gauss
+
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+coordinates_min = (-5.0, -5.0)
+coordinates_max = (5.0, 5.0)
+
+mapping = Trixi.coordinates2mapping(coordinates_min, coordinates_max)
+
+trees_per_dimension = (1, 1)
+
+mesh = T8codeMesh(trees_per_dimension, polydeg = 3,
+                  mapping = mapping,
+                  initial_refinement_level = 1)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 10.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
+                                     extra_analysis_integrals = (entropy,))
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+amr_controller = ControllerThreeLevel(semi,
+                                      TrixiExtension.IndicatorSolutionIndependent(semi),
+                                      base_level = 4,
+                                      med_level = 5, med_threshold = 0.1,
+                                      max_level = 6, max_threshold = 0.6)
+
+amr_callback = AMRCallback(semi, amr_controller,
+                           interval = 5,
+                           adapt_initial_condition = true,
+                           adapt_initial_condition_only_refine = true)
+
+stepsize_callback = StepsizeCallback(cfl = 1.6)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback,
+                        amr_callback, stepsize_callback);
+
+###############################################################################
+# Run the simulation.
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+summary_callback() # print the timer summary
diff --git a/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
new file mode 100644
index 00000000000..adf1d009a59
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_advection_amr_unstructured_flag.jl
@@ -0,0 +1,87 @@
+using Downloads: download
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# Semidiscretization of the linear advection equation.
+
+advection_velocity = (0.2, -0.7)
+equations = LinearScalarAdvectionEquation2D(advection_velocity)
+
+initial_condition = initial_condition_gauss
+
+boundary_condition = BoundaryConditionDirichlet(initial_condition)
+boundary_conditions = Dict(:all => boundary_condition)
+
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+# Deformed rectangle that looks like a waving flag, lower and upper faces are
+# sinus curves, left and right are vertical lines.
+f1(s) = SVector(-5.0, 5 * s - 5.0)
+f2(s) = SVector(5.0, 5 * s + 5.0)
+f3(s) = SVector(5 * s, -5.0 + 5 * sin(0.5 * pi * s))
+f4(s) = SVector(5 * s, 5.0 + 5 * sin(0.5 * pi * s))
+faces = (f1, f2, f3, f4)
+
+# This creates a mapping that transforms [-1, 1]^2 to the domain with the faces
+# defined above.  It generally doesn't work for meshes loaded from mesh files
+# because these can be meshes of arbitrary domains, but the mesh below is
+# specifically built on the domain [-1, 1]^2.
+Trixi.validate_faces(faces)
+mapping_flag = Trixi.transfinite_mapping(faces)
+
+# Unstructured mesh with 24 cells of the square domain [-1, 1]^n
+mesh_file = joinpath(@__DIR__, "square_unstructured_2.inp")
+isfile(mesh_file) ||
+    download("https://gist.githubusercontent.com/efaulhaber/63ff2ea224409e55ee8423b3a33e316a/raw/7db58af7446d1479753ae718930741c47a3b79b7/square_unstructured_2.inp",
+             mesh_file)
+
+# INP mesh files are only support by p4est. Hence, we
+# create a p4est connecvity object first from which
+# we can create a t8code mesh.
+conn = Trixi.read_inp_p4est(mesh_file, Val(2))
+
+mesh = T8codeMesh{2}(conn, polydeg = 3,
+                     mapping = mapping_flag,
+                     initial_refinement_level = 1)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 10.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
+                                     extra_analysis_integrals = (entropy,))
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable = first),
+                                      base_level = 1,
+                                      med_level = 2, med_threshold = 0.1,
+                                      max_level = 3, max_threshold = 0.6)
+amr_callback = AMRCallback(semi, amr_controller,
+                           interval = 5,
+                           adapt_initial_condition = true,
+                           adapt_initial_condition_only_refine = true)
+
+stepsize_callback = StepsizeCallback(cfl = 0.7)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback,
+                        amr_callback, stepsize_callback)
+
+###############################################################################
+# Run the simulation.
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+
+summary_callback() # print the timer summary
diff --git a/examples/t8code_2d_dgsem/elixir_advection_basic.jl b/examples/t8code_2d_dgsem/elixir_advection_basic.jl
new file mode 100644
index 00000000000..efc51226586
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_advection_basic.jl
@@ -0,0 +1,59 @@
+# The same setup as tree_2d_dgsem/elixir_advection_basic.jl
+# to verify the StructuredMesh implementation against TreeMesh
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+advection_velocity = (0.2, -0.7)
+equations = LinearScalarAdvectionEquation2D(advection_velocity)
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+coordinates_min = (-1.0, -1.0) # minimum coordinates (min(x), min(y))
+coordinates_max = (1.0, 1.0) # maximum coordinates (max(x), max(y))
+
+mapping = Trixi.coordinates2mapping(coordinates_min, coordinates_max)
+
+trees_per_dimension = (8, 8)
+
+mesh = T8codeMesh(trees_per_dimension, polydeg = 3,
+                  mapping = mapping,
+                  initial_refinement_level = 1)
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
+                                    solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 1.0
+ode = semidiscretize(semi, (0.0, 1.0));
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_callback = AnalysisCallback(semi, interval = 100)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl = 1.6)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
new file mode 100644
index 00000000000..31a8bc93697
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_advection_nonconforming_flag.jl
@@ -0,0 +1,109 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+advection_velocity = (0.2, -0.7)
+equations = LinearScalarAdvectionEquation2D(advection_velocity)
+
+# Create DG solver with polynomial degree = 4 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg = 4, surface_flux = flux_lax_friedrichs)
+
+# Deformed rectangle that looks like a waving flag,
+# lower and upper faces are sinus curves, left and right are vertical lines.
+f1(s) = SVector(-1.0, s - 1.0)
+f2(s) = SVector(1.0, s + 1.0)
+f3(s) = SVector(s, -1.0 + sin(0.5 * pi * s))
+f4(s) = SVector(s, 1.0 + sin(0.5 * pi * s))
+
+faces = (f1, f2, f3, f4)
+mapping = Trixi.transfinite_mapping(faces)
+
+# Create P4estMesh with 3 x 2 trees and 6 x 4 elements,
+# approximate the geometry with a smaller polydeg for testing.
+trees_per_dimension = (3, 2)
+mesh = T8codeMesh(trees_per_dimension, polydeg = 3,
+                  mapping = mapping,
+                  initial_refinement_level = 1)
+
+function adapt_callback(forest,
+                        forest_from,
+                        which_tree,
+                        lelement_id,
+                        ts,
+                        is_family,
+                        num_elements,
+                        elements_ptr)::Cint
+    vertex = Vector{Cdouble}(undef, 3)
+
+    elements = unsafe_wrap(Array, elements_ptr, num_elements)
+
+    Trixi.t8_element_vertex_reference_coords(ts, elements[1], 0, pointer(vertex))
+
+    level = Trixi.t8_element_level(ts, elements[1])
+
+    # TODO: Make this condition more general.
+    if vertex[1] < 1e-8 && vertex[2] < 1e-8 && level < 4
+        # return true (refine)
+        return 1
+    else
+        # return false (don't refine)
+        return 0
+    end
+end
+
+Trixi.@T8_ASSERT(Trixi.t8_forest_is_committed(mesh.forest)!=0);
+
+# Init new forest.
+new_forest_ref = Ref{Trixi.t8_forest_t}()
+Trixi.t8_forest_init(new_forest_ref);
+new_forest = new_forest_ref[]
+
+# Check out `examples/t8_step4_partition_balance_ghost.jl` in
+# https://github.com/DLR-AMR/T8code.jl for detailed explanations.
+let set_from = C_NULL, recursive = 1, set_for_coarsening = 0, no_repartition = 0
+    Trixi.t8_forest_set_user_data(new_forest, C_NULL)
+    Trixi.t8_forest_set_adapt(new_forest, mesh.forest,
+                              Trixi.@t8_adapt_callback(adapt_callback), recursive)
+    Trixi.t8_forest_set_balance(new_forest, set_from, no_repartition)
+    Trixi.t8_forest_set_partition(new_forest, set_from, set_for_coarsening)
+    Trixi.t8_forest_set_ghost(new_forest, 1, Trixi.T8_GHOST_FACES)
+    Trixi.t8_forest_commit(new_forest)
+end
+
+mesh.forest = new_forest
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
+                                    solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 0.2
+ode = semidiscretize(semi, (0.0, 0.2));
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of the simulation setup
+# and resets the timers
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and prints the results
+analysis_callback = AnalysisCallback(semi, interval = 100)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each time step
+stepsize_callback = StepsizeCallback(cfl = 1.6)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
new file mode 100644
index 00000000000..df9cbc26f6e
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_advection_unstructured_flag.jl
@@ -0,0 +1,81 @@
+using Downloads: download
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# Semidiscretization of the linear advection equation.
+
+advection_velocity = (0.2, -0.7)
+equations = LinearScalarAdvectionEquation2D(advection_velocity)
+
+initial_condition = initial_condition_convergence_test
+
+boundary_condition = BoundaryConditionDirichlet(initial_condition)
+boundary_conditions = Dict(:all => boundary_condition)
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux.
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+# Deformed rectangle that looks like a waving flag,
+# lower and upper faces are sinus curves, left and right are vertical lines.
+f1(s) = SVector(-1.0, s - 1.0)
+f2(s) = SVector(1.0, s + 1.0)
+f3(s) = SVector(s, -1.0 + sin(0.5 * pi * s))
+f4(s) = SVector(s, 1.0 + sin(0.5 * pi * s))
+faces = (f1, f2, f3, f4)
+
+Trixi.validate_faces(faces)
+mapping_flag = Trixi.transfinite_mapping(faces)
+
+# Unstructured mesh with 24 cells of the square domain [-1, 1]^n.
+mesh_file = joinpath(@__DIR__, "square_unstructured_2.inp")
+isfile(mesh_file) ||
+    download("https://gist.githubusercontent.com/efaulhaber/63ff2ea224409e55ee8423b3a33e316a/raw/7db58af7446d1479753ae718930741c47a3b79b7/square_unstructured_2.inp",
+             mesh_file)
+
+# INP mesh files are only support by p4est. Hence, we
+# create a p4est connecvity object first from which
+# we can create a t8code mesh.
+conn = Trixi.read_inp_p4est(mesh_file, Val(2))
+
+mesh = T8codeMesh{2}(conn, polydeg = 3,
+                     mapping = mapping_flag,
+                     initial_refinement_level = 2)
+
+# A semidiscretization collects data structures and functions for the spatial discretization.
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 0.2.
+tspan = (0.0, 0.2)
+ode = semidiscretize(semi, tspan)
+
+# At the beginning of the main loop, the SummaryCallback prints a summary of
+# the simulation setup and resets the timers.
+summary_callback = SummaryCallback()
+
+# The AnalysisCallback allows to analyse the solution in regular intervals and
+# prints the results.
+analysis_callback = AnalysisCallback(semi, interval = 100)
+
+# The StepsizeCallback handles the re-calculation of the maximum Δt after each
+# time step.
+stepsize_callback = StepsizeCallback(cfl = 1.4)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to
+# the ODE solver.
+callbacks = CallbackSet(summary_callback, analysis_callback, stepsize_callback)
+
+###############################################################################
+# Run the simulation.
+
+# OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks.
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # Solve needs some value here but it will be overwritten by the stepsize_callback.
+            save_everystep = false, callback = callbacks);
+
+# Print the timer summary.
+summary_callback()
diff --git a/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl b/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl
new file mode 100644
index 00000000000..01e0449c67e
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_euler_free_stream.jl
@@ -0,0 +1,122 @@
+using Downloads: download
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# Semidiscretization of the compressible Euler equations.
+
+equations = CompressibleEulerEquations2D(1.4)
+
+initial_condition = initial_condition_constant
+
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+# Mapping as described in https://arxiv.org/abs/2012.12040 but reduced to 2D
+function mapping(xi_, eta_)
+    # Transform input variables between -1 and 1 onto [0,3]
+    xi = 1.5 * xi_ + 1.5
+    eta = 1.5 * eta_ + 1.5
+
+    y = eta + 3 / 8 * (cos(1.5 * pi * (2 * xi - 3) / 3) *
+                       cos(0.5 * pi * (2 * eta - 3) / 3))
+
+    x = xi + 3 / 8 * (cos(0.5 * pi * (2 * xi - 3) / 3) *
+                      cos(2 * pi * (2 * y - 3) / 3))
+
+    return SVector(x, y)
+end
+
+###############################################################################
+# Get the uncurved mesh from a file (downloads the file if not available locally)
+
+# Unstructured mesh with 48 cells of the square domain [-1, 1]^n
+mesh_file = joinpath(@__DIR__, "square_unstructured_1.inp")
+isfile(mesh_file) ||
+    download("https://gist.githubusercontent.com/efaulhaber/a075f8ec39a67fa9fad8f6f84342cbca/raw/a7206a02ed3a5d3cadacd8d9694ac154f9151db7/square_unstructured_1.inp",
+             mesh_file)
+
+# INP mesh files are only support by p4est. Hence, we
+# create a p4est connecvity object first from which
+# we can create a t8code mesh.
+conn = Trixi.read_inp_p4est(mesh_file, Val(2))
+
+mesh = T8codeMesh{2}(conn, polydeg = 3,
+                     mapping = mapping,
+                     initial_refinement_level = 1)
+
+function adapt_callback(forest,
+                        forest_from,
+                        which_tree,
+                        lelement_id,
+                        ts,
+                        is_family,
+                        num_elements,
+                        elements_ptr)::Cint
+    vertex = Vector{Cdouble}(undef, 3)
+
+    elements = unsafe_wrap(Array, elements_ptr, num_elements)
+
+    Trixi.t8_element_vertex_reference_coords(ts, elements[1], 0, pointer(vertex))
+
+    level = Trixi.t8_element_level(ts, elements[1])
+
+    # TODO: Make this condition more general.
+    if vertex[1] < 1e-8 && vertex[2] < 1e-8 && level < 3
+        # return true (refine)
+        return 1
+    else
+        # return false (don't refine)
+        return 0
+    end
+end
+
+Trixi.@T8_ASSERT(Trixi.t8_forest_is_committed(mesh.forest)!=0);
+
+# Init new forest.
+new_forest_ref = Ref{Trixi.t8_forest_t}()
+Trixi.t8_forest_init(new_forest_ref);
+new_forest = new_forest_ref[]
+
+# Check out `examples/t8_step4_partition_balance_ghost.jl` in
+# https://github.com/DLR-AMR/T8code.jl for detailed explanations.
+let set_from = C_NULL, recursive = 1, set_for_coarsening = 0, no_repartition = 0
+    Trixi.t8_forest_set_user_data(new_forest, C_NULL)
+    Trixi.t8_forest_set_adapt(new_forest, mesh.forest,
+                              Trixi.@t8_adapt_callback(adapt_callback), recursive)
+    Trixi.t8_forest_set_balance(new_forest, set_from, no_repartition)
+    Trixi.t8_forest_set_partition(new_forest, set_from, set_for_coarsening)
+    Trixi.t8_forest_set_ghost(new_forest, 1, Trixi.T8_GHOST_FACES)
+    Trixi.t8_forest_commit(new_forest)
+end
+
+mesh.forest = new_forest
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions = Dict(:all => BoundaryConditionDirichlet(initial_condition)))
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+stepsize_callback = StepsizeCallback(cfl = 2.0)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback,
+                        stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+summary_callback() # print the timer summary
diff --git a/examples/t8code_2d_dgsem/elixir_euler_sedov.jl b/examples/t8code_2d_dgsem/elixir_euler_sedov.jl
new file mode 100644
index 00000000000..965d794f8dc
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_euler_sedov.jl
@@ -0,0 +1,97 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# Semidiscretization of the compressible Euler equations.
+
+equations = CompressibleEulerEquations2D(1.4)
+
+"""
+    initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
+
+The Sedov blast wave setup based on Flash
+- http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+"""
+function initial_condition_sedov_blast_wave(x, t, equations::CompressibleEulerEquations2D)
+    # Set up polar coordinates
+    inicenter = SVector(0.0, 0.0)
+    x_norm = x[1] - inicenter[1]
+    y_norm = x[2] - inicenter[2]
+    r = sqrt(x_norm^2 + y_norm^2)
+
+    # Setup based on http://flash.uchicago.edu/site/flashcode/user_support/flash_ug_devel/node184.html#SECTION010114000000000000000
+    r0 = 0.21875 # = 3.5 * smallest dx (for domain length=4 and max-ref=6)
+    E = 1.0
+    p0_inner = 3 * (equations.gamma - 1) * E / (3 * pi * r0^2)
+    p0_outer = 1.0e-5 # = true Sedov setup
+
+    # Calculate primitive variables
+    rho = 1.0
+    v1 = 0.0
+    v2 = 0.0
+    p = r > r0 ? p0_outer : p0_inner
+
+    return prim2cons(SVector(rho, v1, v2, p), equations)
+end
+
+initial_condition = initial_condition_sedov_blast_wave
+
+# Get the DG approximation space
+surface_flux = flux_lax_friedrichs
+volume_flux = flux_ranocha
+polydeg = 4
+basis = LobattoLegendreBasis(polydeg)
+indicator_sc = IndicatorHennemannGassner(equations, basis,
+                                         alpha_max = 1.0,
+                                         alpha_min = 0.001,
+                                         alpha_smooth = true,
+                                         variable = density_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg = volume_flux,
+                                                 volume_flux_fv = surface_flux)
+
+solver = DGSEM(polydeg = polydeg, surface_flux = surface_flux,
+               volume_integral = volume_integral)
+
+###############################################################################
+
+coordinates_min = (-1.0, -1.0)
+coordinates_max = (1.0, 1.0)
+
+mapping = Trixi.coordinates2mapping(coordinates_min, coordinates_max)
+
+trees_per_dimension = (4, 4)
+
+mesh = T8codeMesh(trees_per_dimension, polydeg = 4,
+                  mapping = mapping,
+                  initial_refinement_level = 2, periodicity = true)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 12.5)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 300
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+stepsize_callback = StepsizeCallback(cfl = 0.5)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback,
+                        stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+summary_callback() # print the timer summary
diff --git a/examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl b/examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl
new file mode 100644
index 00000000000..55a9063a001
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_euler_shockcapturing_ec.jl
@@ -0,0 +1,68 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# Semidiscretization of the compressible Euler equations.
+
+equations = CompressibleEulerEquations2D(1.4)
+
+initial_condition = initial_condition_weak_blast_wave
+
+surface_flux = flux_ranocha
+volume_flux = flux_ranocha
+polydeg = 4
+basis = LobattoLegendreBasis(polydeg)
+indicator_sc = IndicatorHennemannGassner(equations, basis,
+                                         alpha_max = 1.0,
+                                         alpha_min = 0.001,
+                                         alpha_smooth = true,
+                                         variable = density_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg = volume_flux,
+                                                 volume_flux_fv = surface_flux)
+
+solver = DGSEM(polydeg = polydeg, surface_flux = surface_flux,
+               volume_integral = volume_integral)
+
+###############################################################################
+
+coordinates_min = (-1.0, -1.0)
+coordinates_max = (1.0, 1.0)
+
+mapping = Trixi.coordinates2mapping(coordinates_min, coordinates_max)
+
+trees_per_dimension = (4, 4)
+
+mesh = T8codeMesh(trees_per_dimension, polydeg = 4,
+                  mapping = mapping,
+                  initial_refinement_level = 2, periodicity = true)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 2.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+stepsize_callback = StepsizeCallback(cfl = 1.0)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback,
+                        stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+summary_callback() # print the timer summary
diff --git a/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl b/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl
new file mode 100644
index 00000000000..21f26d79ba8
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_euler_source_terms_nonconforming_unstructured_flag.jl
@@ -0,0 +1,122 @@
+using Downloads: download
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations2D(1.4)
+
+initial_condition = initial_condition_convergence_test
+
+source_terms = source_terms_convergence_test
+
+# BCs must be passed as Dict
+boundary_condition = BoundaryConditionDirichlet(initial_condition)
+boundary_conditions = Dict(:all => boundary_condition)
+
+solver = DGSEM(polydeg = 3, surface_flux = flux_lax_friedrichs)
+
+# Deformed rectangle that looks like a waving flag,
+# lower and upper faces are sinus curves, left and right are vertical lines.
+f1(s) = SVector(-1.0, s - 1.0)
+f2(s) = SVector(1.0, s + 1.0)
+f3(s) = SVector(s, -1.0 + sin(0.5 * pi * s))
+f4(s) = SVector(s, 1.0 + sin(0.5 * pi * s))
+faces = (f1, f2, f3, f4)
+
+Trixi.validate_faces(faces)
+mapping_flag = Trixi.transfinite_mapping(faces)
+
+# Get the uncurved mesh from a file (downloads the file if not available locally)
+# Unstructured mesh with 24 cells of the square domain [-1, 1]^n
+mesh_file = joinpath(@__DIR__, "square_unstructured_2.inp")
+isfile(mesh_file) ||
+    download("https://gist.githubusercontent.com/efaulhaber/63ff2ea224409e55ee8423b3a33e316a/raw/7db58af7446d1479753ae718930741c47a3b79b7/square_unstructured_2.inp",
+             mesh_file)
+
+# INP mesh files are only support by p4est. Hence, we
+# create a p4est connecvity object first from which
+# we can create a t8code mesh.
+conn = Trixi.read_inp_p4est(mesh_file, Val(2))
+
+mesh = T8codeMesh{2}(conn, polydeg = 3,
+                     mapping = mapping_flag,
+                     initial_refinement_level = 1)
+
+function adapt_callback(forest,
+                        forest_from,
+                        which_tree,
+                        lelement_id,
+                        ts,
+                        is_family,
+                        num_elements,
+                        elements_ptr)::Cint
+    vertex = Vector{Cdouble}(undef, 3)
+
+    elements = unsafe_wrap(Array, elements_ptr, num_elements)
+
+    Trixi.t8_element_vertex_reference_coords(ts, elements[1], 0, pointer(vertex))
+
+    level = Trixi.t8_element_level(ts, elements[1])
+
+    # TODO: Make this condition more general.
+    if vertex[1] < 1e-8 && vertex[2] < 1e-8 && level < 2
+        # return true (refine)
+        return 1
+    else
+        # return false (don't refine)
+        return 0
+    end
+end
+
+@assert(Trixi.t8_forest_is_committed(mesh.forest)!=0);
+
+# Init new forest.
+new_forest_ref = Ref{Trixi.t8_forest_t}()
+Trixi.t8_forest_init(new_forest_ref);
+new_forest = new_forest_ref[]
+
+# Check out `examples/t8_step4_partition_balance_ghost.jl` in
+# https://github.com/DLR-AMR/T8code.jl for detailed explanations.
+let set_from = C_NULL, recursive = 1, set_for_coarsening = 0, no_repartition = 0
+    Trixi.t8_forest_set_user_data(new_forest, C_NULL)
+    Trixi.t8_forest_set_adapt(new_forest, mesh.forest,
+                              Trixi.@t8_adapt_callback(adapt_callback), recursive)
+    Trixi.t8_forest_set_balance(new_forest, set_from, no_repartition)
+    Trixi.t8_forest_set_partition(new_forest, set_from, set_for_coarsening)
+    Trixi.t8_forest_set_ghost(new_forest, 1, Trixi.T8_GHOST_FACES)
+    Trixi.t8_forest_commit(new_forest)
+end
+
+mesh.forest = new_forest
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    source_terms = source_terms,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+stepsize_callback = StepsizeCallback(cfl = 0.8)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback,
+                        stepsize_callback)
+###############################################################################
+# run the simulation
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+summary_callback() # print the timer summary
diff --git a/examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl b/examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl
new file mode 100644
index 00000000000..32649eacff4
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_eulergravity_convergence.jl
@@ -0,0 +1,77 @@
+using OrdinaryDiffEq
+using Trixi
+
+initial_condition = initial_condition_eoc_test_coupled_euler_gravity
+
+###############################################################################
+# semidiscretization of the compressible Euler equations
+gamma = 2.0
+equations_euler = CompressibleEulerEquations2D(gamma)
+
+polydeg = 3
+solver_euler = DGSEM(polydeg, flux_hll)
+
+coordinates_min = (0.0, 0.0)
+coordinates_max = (2.0, 2.0)
+
+trees_per_dimension = (1, 1)
+
+mapping = Trixi.coordinates2mapping(coordinates_min, coordinates_max)
+
+mesh = T8codeMesh(trees_per_dimension, polydeg = 1,
+                  mapping = mapping,
+                  initial_refinement_level = 2)
+
+semi_euler = SemidiscretizationHyperbolic(mesh, equations_euler, initial_condition, solver_euler,
+                                         source_terms=source_terms_eoc_test_coupled_euler_gravity)
+
+
+###############################################################################
+# semidiscretization of the hyperbolic diffusion equations
+equations_gravity = HyperbolicDiffusionEquations2D()
+
+solver_gravity = DGSEM(polydeg, flux_lax_friedrichs)
+
+semi_gravity = SemidiscretizationHyperbolic(mesh, equations_gravity, initial_condition, solver_gravity,
+                                            source_terms=source_terms_harmonic)
+
+
+###############################################################################
+# combining both semidiscretizations for Euler + self-gravity
+parameters = ParametersEulerGravity(background_density=2.0, # aka rho0
+                                    # rho0 is (ab)used to add a "+8π" term to the source terms
+                                    # for the manufactured solution
+                                    gravitational_constant=1.0, # aka G
+                                    cfl=1.1,
+                                    resid_tol=1.0e-10,
+                                    n_iterations_max=1000,
+                                    timestep_gravity=timestep_gravity_erk52_3Sstar!)
+
+semi = SemidiscretizationEulerGravity(semi_euler, semi_gravity, parameters)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+tspan = (0.0, 0.5)
+ode = semidiscretize(semi, tspan);
+
+summary_callback = SummaryCallback()
+
+stepsize_callback = StepsizeCallback(cfl=0.8)
+
+analysis_interval = 100
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+analysis_callback = AnalysisCallback(semi_euler, interval=analysis_interval,
+                                     save_analysis=true)
+
+callbacks = CallbackSet(summary_callback, stepsize_callback,
+                        analysis_callback, alive_callback)
+
+###############################################################################
+# run the simulation
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep=false, callback=callbacks);
+summary_callback() # print the timer summary
+println("Number of gravity subcycles: ", semi.gravity_counter.ncalls_since_readout)
diff --git a/examples/t8code_2d_dgsem/elixir_mhd_alfven_wave.jl b/examples/t8code_2d_dgsem/elixir_mhd_alfven_wave.jl
new file mode 100644
index 00000000000..463f916fa2e
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_mhd_alfven_wave.jl
@@ -0,0 +1,60 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# Semidiscretization of the compressible ideal GLM-MHD equations.
+
+gamma = 5/3
+equations = IdealGlmMhdEquations2D(gamma)
+
+initial_condition = initial_condition_convergence_test
+
+# Get the DG approximation space
+volume_flux = (flux_central, flux_nonconservative_powell)
+solver = DGSEM(polydeg=4, surface_flux=(flux_hll, flux_nonconservative_powell),
+               volume_integral=VolumeIntegralFluxDifferencing(volume_flux))
+
+coordinates_min = (0.0      , 0.0      )
+coordinates_max = (sqrt(2.0), sqrt(2.0))
+
+mapping = Trixi.coordinates2mapping(coordinates_min, coordinates_max)
+
+trees_per_dimension = (8, 8)
+
+mesh = T8codeMesh(trees_per_dimension, polydeg=3,
+                  mapping=mapping,
+                  initial_refinement_level=0, periodicity=true)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+cfl = 0.9
+stepsize_callback = StepsizeCallback(cfl=cfl)
+
+glm_speed_callback = GlmSpeedCallback(glm_scale=0.5, cfl=cfl)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback,
+                        stepsize_callback,
+                        glm_speed_callback)
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+            dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep=false, callback=callbacks);
+summary_callback() # print the timer summary
diff --git a/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl b/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
new file mode 100644
index 00000000000..9a4bd99e444
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_mhd_rotor.jl
@@ -0,0 +1,134 @@
+using Downloads: download
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible ideal GLM-MHD equations
+equations = IdealGlmMhdEquations2D(1.4)
+
+"""
+    initial_condition_rotor(x, t, equations::IdealGlmMhdEquations2D)
+
+The classical MHD rotor test case. Here, the setup is taken from
+- Dominik Derigs, Gregor J. Gassner, Stefanie Walch & Andrew R. Winters (2018)
+  Entropy Stable Finite Volume Approximations for Ideal Magnetohydrodynamics
+  [doi: 10.1365/s13291-018-0178-9](https://doi.org/10.1365/s13291-018-0178-9)
+"""
+function initial_condition_rotor(x, t, equations::IdealGlmMhdEquations2D)
+    # setup taken from Derigs et al. DMV article (2018)
+    # domain must be [0, 1] x [0, 1], γ = 1.4
+    dx = x[1] - 0.5
+    dy = x[2] - 0.5
+    r = sqrt(dx^2 + dy^2)
+    f = (0.115 - r) / 0.015
+    if r <= 0.1
+        rho = 10.0
+        v1 = -20.0 * dy
+        v2 = 20.0 * dx
+    elseif r >= 0.115
+        rho = 1.0
+        v1 = 0.0
+        v2 = 0.0
+    else
+        rho = 1.0 + 9.0 * f
+        v1 = -20.0 * f * dy
+        v2 = 20.0 * f * dx
+    end
+    v3 = 0.0
+    p = 1.0
+    B1 = 5.0 / sqrt(4.0 * pi)
+    B2 = 0.0
+    B3 = 0.0
+    psi = 0.0
+    return prim2cons(SVector(rho, v1, v2, v3, p, B1, B2, B3, psi), equations)
+end
+initial_condition = initial_condition_rotor
+
+surface_flux = (flux_lax_friedrichs, flux_nonconservative_powell)
+volume_flux = (flux_hindenlang_gassner, flux_nonconservative_powell)
+polydeg = 4
+basis = LobattoLegendreBasis(polydeg)
+indicator_sc = IndicatorHennemannGassner(equations, basis,
+                                         alpha_max = 0.5,
+                                         alpha_min = 0.001,
+                                         alpha_smooth = true,
+                                         variable = density_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg = volume_flux,
+                                                 volume_flux_fv = surface_flux)
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+# Affine type mapping to take the [-1,1]^2 domain from the mesh file
+# and put it onto the rotor domain [0,1]^2 and then warp it with a mapping
+# as described in https://arxiv.org/abs/2012.12040
+function mapping_twist(xi, eta)
+    y = 0.5 * (eta + 1.0) +
+        0.05 * cos(1.5 * pi * (2.0 * xi - 1.0)) * cos(0.5 * pi * (2.0 * eta - 1.0))
+    x = 0.5 * (xi + 1.0) + 0.05 * cos(0.5 * pi * (2.0 * xi - 1.0)) * cos(2.0 * pi * y)
+    return SVector(x, y)
+end
+
+mesh_file = joinpath(@__DIR__, "square_unstructured_2.inp")
+isfile(mesh_file) ||
+    download("https://gist.githubusercontent.com/efaulhaber/63ff2ea224409e55ee8423b3a33e316a/raw/7db58af7446d1479753ae718930741c47a3b79b7/square_unstructured_2.inp",
+             mesh_file)
+
+# INP mesh files are only support by p4est. Hence, we
+# create a p4est connecvity object first from which
+# we can create a t8code mesh.
+conn = Trixi.read_inp_p4est(mesh_file, Val(2))
+
+mesh = T8codeMesh{2}(conn, polydeg = 4,
+                     mapping = mapping_twist,
+                     initial_refinement_level = 1)
+
+boundary_condition = BoundaryConditionDirichlet(initial_condition)
+boundary_conditions = Dict(:all => boundary_condition)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    boundary_conditions = boundary_conditions)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 0.15)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+amr_indicator = IndicatorLöhner(semi,
+                                variable = density_pressure)
+
+amr_controller = ControllerThreeLevel(semi, amr_indicator,
+                                      base_level = 1,
+                                      med_level = 3, med_threshold = 0.05,
+                                      max_level = 5, max_threshold = 0.1)
+amr_callback = AMRCallback(semi, amr_controller,
+                           interval = 5,
+                           adapt_initial_condition = true,
+                           adapt_initial_condition_only_refine = true)
+
+cfl = 0.5
+stepsize_callback = StepsizeCallback(cfl = cfl)
+
+glm_speed_callback = GlmSpeedCallback(glm_scale = 0.5, cfl = cfl)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback,
+                        amr_callback,
+                        stepsize_callback,
+                        glm_speed_callback)
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+summary_callback() # print the timer summary
diff --git a/examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl b/examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl
new file mode 100644
index 00000000000..c19f440ebc7
--- /dev/null
+++ b/examples/t8code_2d_dgsem/elixir_shallowwater_source_terms.jl
@@ -0,0 +1,60 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# Semidiscretization of the shallow water equations.
+
+equations = ShallowWaterEquations2D(gravity_constant=9.81)
+
+initial_condition = initial_condition_convergence_test # MMS EOC test
+
+
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal)
+solver = DGSEM(polydeg=3, surface_flux=(flux_lax_friedrichs, flux_nonconservative_fjordholm_etal),
+               volume_integral=VolumeIntegralFluxDifferencing(volume_flux))
+
+###############################################################################
+# Get the P4estMesh and setup a periodic mesh
+
+coordinates_min = (0.0, 0.0) # minimum coordinates (min(x), min(y))
+coordinates_max = (sqrt(2.0), sqrt(2.0))  # maximum coordinates (max(x), max(y))
+
+mapping = Trixi.coordinates2mapping(coordinates_min, coordinates_max)
+
+trees_per_dimension = (8, 8)
+
+mesh = T8codeMesh(trees_per_dimension, polydeg=3,
+                  mapping=mapping,
+                  initial_refinement_level=1)
+
+# A semidiscretization collects data structures and functions for the spatial discretization
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    source_terms=source_terms_convergence_test)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span from 0.0 to 1.0
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 500
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback)
+
+###############################################################################
+# run the simulation
+
+# use a Runge-Kutta method with automatic (error based) time step size control
+sol = solve(ode, RDPK3SpFSAL49(); abstol=1.0e-8, reltol=1.0e-8,
+            ode_default_options()..., callback=callbacks);
+summary_callback() # print the timer summary
diff --git a/src/Trixi.jl b/src/Trixi.jl
index b0c872b1904..990c33f3c94 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -54,6 +54,7 @@ using Octavian: Octavian, matmul!
 using Polyester: Polyester, @batch # You know, the cheapest threads you can find...
 using OffsetArrays: OffsetArray, OffsetVector
 using P4est
+using T8code
 using Setfield: @set
 using RecipesBase: RecipesBase
 using Requires: @require
@@ -110,6 +111,7 @@ include("basic_types.jl")
 include("auxiliary/auxiliary.jl")
 include("auxiliary/mpi.jl")
 include("auxiliary/p4est.jl")
+include("auxiliary/t8code.jl")
 include("equations/equations.jl")
 include("meshes/meshes.jl")
 include("solvers/solvers.jl")
@@ -210,7 +212,7 @@ export entropy, energy_total, energy_kinetic, energy_internal, energy_magnetic,
 export lake_at_rest_error
 export ncomponents, eachcomponent
 
-export TreeMesh, StructuredMesh, UnstructuredMesh2D, P4estMesh
+export TreeMesh, StructuredMesh, UnstructuredMesh2D, P4estMesh, T8codeMesh
 
 export DG,
        DGSEM, LobattoLegendreBasis,
@@ -277,6 +279,7 @@ function __init__()
     init_mpi()
 
     init_p4est()
+    init_t8code()
 
     register_error_hints()
 
diff --git a/src/auxiliary/t8code.jl b/src/auxiliary/t8code.jl
new file mode 100644
index 00000000000..37cb782bb93
--- /dev/null
+++ b/src/auxiliary/t8code.jl
@@ -0,0 +1,486 @@
+"""
+    init_t8code()
+
+Initialize `t8code` by calling `sc_init`, `p4est_init`, and `t8_init` while
+setting the log level to `SC_LP_ERROR`. This function will check if `t8code`
+is already initialized and if yes, do nothing, thus it is safe to call it
+multiple times.
+"""
+function init_t8code()
+    t8code_package_id = t8_get_package_id()
+    if t8code_package_id >= 0
+        return nothing
+    end
+
+    # Initialize the sc library, has to happen before we initialize t8code.
+    let catch_signals = 0, print_backtrace = 0, log_handler = C_NULL
+        T8code.Libt8.sc_init(mpi_comm(), catch_signals, print_backtrace, log_handler,
+                             T8code.Libt8.SC_LP_ERROR)
+    end
+
+    if T8code.Libt8.p4est_is_initialized() == 0
+        # Initialize `p4est` with log level ERROR to prevent a lot of output in AMR simulations
+        T8code.Libt8.p4est_init(C_NULL, T8code.Libt8.SC_LP_ERROR)
+    end
+
+    # Initialize t8code with log level ERROR to prevent a lot of output in AMR simulations.
+    t8_init(T8code.Libt8.SC_LP_ERROR)
+
+    if haskey(ENV, "TRIXI_T8CODE_SC_FINALIZE")
+        # Normally, `sc_finalize` should always be called during shutdown of an
+        # application. It checks whether there is still un-freed memory by t8code
+        # and/or T8code.jl and throws an exception if this is the case. For
+        # production runs this is not mandatory, but is helpful during
+        # development. Hence, this option is only activated when environment
+        # variable TRIXI_T8CODE_SC_FINALIZE exists.
+        @warn "T8code.jl: sc_finalize will be called during shutdown of Trixi.jl."
+        MPI.add_finalize_hook!(T8code.Libt8.sc_finalize)
+    end
+
+    return nothing
+end
+
+function trixi_t8_unref_forest(forest)
+    t8_forest_unref(Ref(forest))
+end
+
+function t8_free(ptr)
+    T8code.Libt8.sc_free(t8_get_package_id(), ptr)
+end
+
+function trixi_t8_count_interfaces(forest)
+    # Check that forest is a committed, that is valid and usable, forest.
+    @assert t8_forest_is_committed(forest) != 0
+
+    # Get the number of local elements of forest.
+    num_local_elements = t8_forest_get_local_num_elements(forest)
+    # Get the number of ghost elements of forest.
+    num_ghost_elements = t8_forest_get_num_ghosts(forest)
+    # Get the number of trees that have elements of this process.
+    num_local_trees = t8_forest_get_num_local_trees(forest)
+
+    current_index = t8_locidx_t(0)
+
+    local_num_conform = 0
+    local_num_mortars = 0
+    local_num_boundary = 0
+
+    for itree in 0:(num_local_trees - 1)
+        tree_class = t8_forest_get_tree_class(forest, itree)
+        eclass_scheme = t8_forest_get_eclass_scheme(forest, tree_class)
+
+        # Get the number of elements of this tree.
+        num_elements_in_tree = t8_forest_get_tree_num_elements(forest, itree)
+
+        for ielement in 0:(num_elements_in_tree - 1)
+            element = t8_forest_get_element_in_tree(forest, itree, ielement)
+
+            level = t8_element_level(eclass_scheme, element)
+
+            num_faces = t8_element_num_faces(eclass_scheme, element)
+
+            for iface in 0:(num_faces - 1)
+                pelement_indices_ref = Ref{Ptr{t8_locidx_t}}()
+                pneighbor_leafs_ref = Ref{Ptr{Ptr{t8_element}}}()
+                pneigh_scheme_ref = Ref{Ptr{t8_eclass_scheme}}()
+
+                dual_faces_ref = Ref{Ptr{Cint}}()
+                num_neighbors_ref = Ref{Cint}()
+
+                forest_is_balanced = Cint(1)
+
+                t8_forest_leaf_face_neighbors(forest, itree, element,
+                                              pneighbor_leafs_ref, iface, dual_faces_ref,
+                                              num_neighbors_ref,
+                                              pelement_indices_ref, pneigh_scheme_ref,
+                                              forest_is_balanced)
+
+                num_neighbors = num_neighbors_ref[]
+                neighbor_ielements = unsafe_wrap(Array, pelement_indices_ref[],
+                                                 num_neighbors)
+                neighbor_leafs = unsafe_wrap(Array, pneighbor_leafs_ref[], num_neighbors)
+                neighbor_scheme = pneigh_scheme_ref[]
+
+                if num_neighbors > 0
+                    neighbor_level = t8_element_level(neighbor_scheme, neighbor_leafs[1])
+
+                    # Conforming interface: The second condition ensures we only visit the interface once.
+                    if level == neighbor_level && current_index <= neighbor_ielements[1]
+                        local_num_conform += 1
+                    elseif level < neighbor_level
+                        local_num_mortars += 1
+                    end
+
+                else
+                    local_num_boundary += 1
+                end
+
+                t8_free(dual_faces_ref[])
+                t8_free(pneighbor_leafs_ref[])
+                t8_free(pelement_indices_ref[])
+            end # for
+
+            current_index += 1
+        end # for
+    end # for
+
+    return (interfaces = local_num_conform,
+            mortars = local_num_mortars,
+            boundaries = local_num_boundary)
+end
+
+function trixi_t8_fill_mesh_info(forest, elements, interfaces, mortars, boundaries,
+                                 boundary_names)
+    # Check that forest is a committed, that is valid and usable, forest.
+    @assert t8_forest_is_committed(forest) != 0
+
+    # Get the number of local elements of forest.
+    num_local_elements = t8_forest_get_local_num_elements(forest)
+    # Get the number of ghost elements of forest.
+    num_ghost_elements = t8_forest_get_num_ghosts(forest)
+    # Get the number of trees that have elements of this process.
+    num_local_trees = t8_forest_get_num_local_trees(forest)
+
+    current_index = t8_locidx_t(0)
+
+    local_num_conform = 0
+    local_num_mortars = 0
+    local_num_boundary = 0
+
+    for itree in 0:(num_local_trees - 1)
+        tree_class = t8_forest_get_tree_class(forest, itree)
+        eclass_scheme = t8_forest_get_eclass_scheme(forest, tree_class)
+
+        # Get the number of elements of this tree.
+        num_elements_in_tree = t8_forest_get_tree_num_elements(forest, itree)
+
+        for ielement in 0:(num_elements_in_tree - 1)
+            element = t8_forest_get_element_in_tree(forest, itree, ielement)
+
+            level = t8_element_level(eclass_scheme, element)
+
+            num_faces = t8_element_num_faces(eclass_scheme, element)
+
+            for iface in 0:(num_faces - 1)
+
+                # Compute the `orientation` of the touching faces.
+                if t8_element_is_root_boundary(eclass_scheme, element, iface) == 1
+                    cmesh = t8_forest_get_cmesh(forest)
+                    itree_in_cmesh = t8_forest_ltreeid_to_cmesh_ltreeid(forest, itree)
+                    iface_in_tree = t8_element_tree_face(eclass_scheme, element, iface)
+                    orientation_ref = Ref{Cint}()
+
+                    t8_cmesh_get_face_neighbor(cmesh, itree_in_cmesh, iface_in_tree, C_NULL,
+                                               orientation_ref)
+                    orientation = orientation_ref[]
+                else
+                    orientation = zero(Cint)
+                end
+
+                pelement_indices_ref = Ref{Ptr{t8_locidx_t}}()
+                pneighbor_leafs_ref = Ref{Ptr{Ptr{t8_element}}}()
+                pneigh_scheme_ref = Ref{Ptr{t8_eclass_scheme}}()
+
+                dual_faces_ref = Ref{Ptr{Cint}}()
+                num_neighbors_ref = Ref{Cint}()
+
+                forest_is_balanced = Cint(1)
+
+                t8_forest_leaf_face_neighbors(forest, itree, element,
+                                              pneighbor_leafs_ref, iface, dual_faces_ref,
+                                              num_neighbors_ref,
+                                              pelement_indices_ref, pneigh_scheme_ref,
+                                              forest_is_balanced)
+
+                num_neighbors = num_neighbors_ref[]
+                dual_faces = unsafe_wrap(Array, dual_faces_ref[], num_neighbors)
+                neighbor_ielements = unsafe_wrap(Array, pelement_indices_ref[],
+                                                 num_neighbors)
+                neighbor_leafs = unsafe_wrap(Array, pneighbor_leafs_ref[], num_neighbors)
+                neighbor_scheme = pneigh_scheme_ref[]
+
+                if num_neighbors > 0
+                    neighbor_level = t8_element_level(neighbor_scheme, neighbor_leafs[1])
+
+                    # Conforming interface: The second condition ensures we only visit the interface once.
+                    if level == neighbor_level && current_index <= neighbor_ielements[1]
+                        local_num_conform += 1
+
+                        faces = (iface, dual_faces[1])
+                        interface_id = local_num_conform
+
+                        # Write data to interfaces container.
+                        interfaces.neighbor_ids[1, interface_id] = current_index + 1
+                        interfaces.neighbor_ids[2, interface_id] = neighbor_ielements[1] + 1
+
+                        # Iterate over primary and secondary element.
+                        for side in 1:2
+                            # Align interface in positive coordinate direction of primary element.
+                            # For orientation == 1, the secondary element needs to be indexed backwards
+                            # relative to the interface.
+                            if side == 1 || orientation == 0
+                                # Forward indexing
+                                indexing = :i_forward
+                            else
+                                # Backward indexing
+                                indexing = :i_backward
+                            end
+
+                            if faces[side] == 0
+                                # Index face in negative x-direction
+                                interfaces.node_indices[side, interface_id] = (:begin,
+                                                                               indexing)
+                            elseif faces[side] == 1
+                                # Index face in positive x-direction
+                                interfaces.node_indices[side, interface_id] = (:end,
+                                                                               indexing)
+                            elseif faces[side] == 2
+                                # Index face in negative y-direction
+                                interfaces.node_indices[side, interface_id] = (indexing,
+                                                                               :begin)
+                            else # faces[side] == 3
+                                # Index face in positive y-direction
+                                interfaces.node_indices[side, interface_id] = (indexing,
+                                                                               :end)
+                            end
+                        end
+
+                        # Non-conforming interface.
+                    elseif level < neighbor_level
+                        local_num_mortars += 1
+
+                        faces = (dual_faces[1], iface)
+
+                        mortar_id = local_num_mortars
+
+                        # Last entry is the large element.
+                        mortars.neighbor_ids[end, mortar_id] = current_index + 1
+
+                        # First `1:end-1` entries are the smaller elements.
+                        mortars.neighbor_ids[1:(end - 1), mortar_id] .= neighbor_ielements .+
+                                                                        1
+
+                        for side in 1:2
+                            # Align mortar in positive coordinate direction of small side.
+                            # For orientation == 1, the large side needs to be indexed backwards
+                            # relative to the mortar.
+                            if side == 1 || orientation == 0
+                                # Forward indexing for small side or orientation == 0.
+                                indexing = :i_forward
+                            else
+                                # Backward indexing for large side with reversed orientation.
+                                indexing = :i_backward
+                                # Since the orientation is reversed we have to account for this
+                                # when filling the `neighbor_ids` array.
+                                mortars.neighbor_ids[1, mortar_id] = neighbor_ielements[2] +
+                                                                     1
+                                mortars.neighbor_ids[2, mortar_id] = neighbor_ielements[1] +
+                                                                     1
+                            end
+
+                            if faces[side] == 0
+                                # Index face in negative x-direction
+                                mortars.node_indices[side, mortar_id] = (:begin, indexing)
+                            elseif faces[side] == 1
+                                # Index face in positive x-direction
+                                mortars.node_indices[side, mortar_id] = (:end, indexing)
+                            elseif faces[side] == 2
+                                # Index face in negative y-direction
+                                mortars.node_indices[side, mortar_id] = (indexing, :begin)
+                            else # faces[side] == 3
+                                # Index face in positive y-direction
+                                mortars.node_indices[side, mortar_id] = (indexing, :end)
+                            end
+                        end
+
+                        # else: "level > neighbor_level" is skipped since we visit the mortar interface only once.
+                    end
+
+                    # Domain boundary.
+                else
+                    local_num_boundary += 1
+                    boundary_id = local_num_boundary
+
+                    boundaries.neighbor_ids[boundary_id] = current_index + 1
+
+                    if iface == 0
+                        # Index face in negative x-direction.
+                        boundaries.node_indices[boundary_id] = (:begin, :i_forward)
+                    elseif iface == 1
+                        # Index face in positive x-direction.
+                        boundaries.node_indices[boundary_id] = (:end, :i_forward)
+                    elseif iface == 2
+                        # Index face in negative y-direction.
+                        boundaries.node_indices[boundary_id] = (:i_forward, :begin)
+                    else # iface == 3
+                        # Index face in positive y-direction.
+                        boundaries.node_indices[boundary_id] = (:i_forward, :end)
+                    end
+
+                    # One-based indexing.
+                    boundaries.name[boundary_id] = boundary_names[iface + 1, itree + 1]
+                end
+
+                t8_free(dual_faces_ref[])
+                t8_free(pneighbor_leafs_ref[])
+                t8_free(pelement_indices_ref[])
+            end # for iface = ...
+
+            current_index += 1
+        end # for
+    end # for
+
+    return (interfaces = local_num_conform,
+            mortars = local_num_mortars,
+            boundaries = local_num_boundary)
+end
+
+function trixi_t8_get_local_element_levels(forest)
+    # Check that forest is a committed, that is valid and usable, forest.
+    @assert t8_forest_is_committed(forest) != 0
+
+    levels = Vector{Int}(undef, t8_forest_get_local_num_elements(forest))
+
+    # Get the number of trees that have elements of this process.
+    num_local_trees = t8_forest_get_num_local_trees(forest)
+
+    current_index = 0
+
+    for itree in 0:(num_local_trees - 1)
+        tree_class = t8_forest_get_tree_class(forest, itree)
+        eclass_scheme = t8_forest_get_eclass_scheme(forest, tree_class)
+
+        # Get the number of elements of this tree.
+        num_elements_in_tree = t8_forest_get_tree_num_elements(forest, itree)
+
+        for ielement in 0:(num_elements_in_tree - 1)
+            element = t8_forest_get_element_in_tree(forest, itree, ielement)
+            current_index += 1
+            levels[current_index] = t8_element_level(eclass_scheme, element)
+        end # for
+    end # for
+
+    return levels
+end
+
+# Callback function prototype to decide for refining and coarsening.
+# If `is_family` equals 1, the first `num_elements` in elements
+# form a family and we decide whether this family should be coarsened
+# or only the first element should be refined.
+# Otherwise `is_family` must equal zero and we consider the first entry
+# of the element array for refinement. 
+# Entries of the element array beyond the first `num_elements` are undefined.
+# \param [in] forest       the forest to which the new elements belong
+# \param [in] forest_from  the forest that is adapted.
+# \param [in] which_tree   the local tree containing `elements`
+# \param [in] lelement_id  the local element id in `forest_old` in the tree of the current element
+# \param [in] ts           the eclass scheme of the tree
+# \param [in] is_family    if 1, the first `num_elements` entries in `elements` form a family. If 0, they do not.
+# \param [in] num_elements the number of entries in `elements` that are defined
+# \param [in] elements     Pointers to a family or, if `is_family` is zero,
+#                          pointer to one element.
+# \return greater zero if the first entry in `elements` should be refined,
+#         smaller zero if the family `elements` shall be coarsened,
+#         zero else.
+function adapt_callback(forest,
+                        forest_from,
+                        which_tree,
+                        lelement_id,
+                        ts,
+                        is_family,
+                        num_elements,
+                        elements)::Cint
+    num_levels = t8_forest_get_local_num_elements(forest_from)
+
+    indicator_ptr = Ptr{Int}(t8_forest_get_user_data(forest))
+    indicators = unsafe_wrap(Array, indicator_ptr, num_levels)
+
+    offset = t8_forest_get_tree_element_offset(forest_from, which_tree)
+
+    # Only allow coarsening for complete families.
+    if indicators[offset + lelement_id + 1] < 0 && is_family == 0
+        return Cint(0)
+    end
+
+    return Cint(indicators[offset + lelement_id + 1])
+end
+
+function trixi_t8_adapt_new(old_forest, indicators)
+    # Check that forest is a committed, that is valid and usable, forest.
+    @assert t8_forest_is_committed(old_forest) != 0
+
+    # Init new forest.
+    new_forest_ref = Ref{t8_forest_t}()
+    t8_forest_init(new_forest_ref)
+    new_forest = new_forest_ref[]
+
+    let set_from = C_NULL, recursive = 0, set_for_coarsening = 0, no_repartition = 0
+        t8_forest_set_user_data(new_forest, pointer(indicators))
+        t8_forest_set_adapt(new_forest, old_forest, @t8_adapt_callback(adapt_callback),
+                            recursive)
+        t8_forest_set_balance(new_forest, set_from, no_repartition)
+        t8_forest_set_partition(new_forest, set_from, set_for_coarsening)
+        t8_forest_set_ghost(new_forest, 1, T8_GHOST_FACES) # Note: MPI support not available yet so it is a dummy call.
+        t8_forest_commit(new_forest)
+    end
+
+    return new_forest
+end
+
+function trixi_t8_get_difference(old_levels, new_levels, num_children)
+    old_nelems = length(old_levels)
+    new_nelems = length(new_levels)
+
+    changes = Vector{Int}(undef, old_nelems)
+
+    # Local element indices.
+    old_index = 1
+    new_index = 1
+
+    while old_index <= old_nelems && new_index <= new_nelems
+        if old_levels[old_index] < new_levels[new_index]
+            # Refined.
+
+            changes[old_index] = 1
+
+            old_index += 1
+            new_index += num_children
+
+        elseif old_levels[old_index] > new_levels[new_index]
+            # Coarsend.
+
+            for child_index in old_index:(old_index + num_children - 1)
+                changes[child_index] = -1
+            end
+
+            old_index += num_children
+            new_index += 1
+
+        else
+            # No changes.
+
+            changes[old_index] = 0
+
+            old_index += 1
+            new_index += 1
+        end
+    end
+
+    return changes
+end
+
+# Coarsen or refine marked cells and rebalance forest. Return a difference between
+# old and new mesh.
+function trixi_t8_adapt!(mesh, indicators)
+    old_levels = trixi_t8_get_local_element_levels(mesh.forest)
+
+    forest_cached = trixi_t8_adapt_new(mesh.forest, indicators)
+
+    new_levels = trixi_t8_get_local_element_levels(forest_cached)
+
+    differences = trixi_t8_get_difference(old_levels, new_levels, 2^ndims(mesh))
+
+    mesh.forest = forest_cached
+
+    return differences
+end
diff --git a/src/callbacks_step/amr.jl b/src/callbacks_step/amr.jl
index bef49b4c482..4d80e6e1139 100644
--- a/src/callbacks_step/amr.jl
+++ b/src/callbacks_step/amr.jl
@@ -471,6 +471,65 @@ function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::P4estMesh,
     return has_changed
 end
 
+function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::SerialT8codeMesh,
+                                     equations, dg::DG, cache, semi,
+                                     t, iter;
+                                     only_refine = false, only_coarsen = false,
+                                     passive_args = ())
+    has_changed = false
+
+    @unpack controller, adaptor = amr_callback
+
+    u = wrap_array(u_ode, mesh, equations, dg, cache)
+    indicators = @trixi_timeit timer() "indicator" controller(u, mesh, equations, dg,
+                                                              cache, t = t, iter = iter)
+
+    if only_coarsen
+        indicators[indicators .> 0] .= 0
+    end
+
+    if only_refine
+        indicators[indicators .< 0] .= 0
+    end
+
+    @boundscheck begin
+        @assert axes(indicators)==(Base.OneTo(ncells(mesh)),) ("Indicator array (axes = $(axes(indicators))) and mesh cells (axes = $(Base.OneTo(ncells(mesh)))) have different axes")
+    end
+
+    @trixi_timeit timer() "adapt" begin
+        difference = @trixi_timeit timer() "mesh" trixi_t8_adapt!(mesh, indicators)
+
+        @trixi_timeit timer() "solver" adapt!(u_ode, adaptor, mesh, equations, dg,
+                                              cache, difference)
+    end
+
+    # Store whether there were any cells coarsened or refined and perform load balancing.
+    has_changed = any(difference .!= 0)
+
+    # TODO: T8codeMesh for MPI not implemented yet.
+    # Check if mesh changed on other processes
+    # if mpi_isparallel()
+    #   has_changed = MPI.Allreduce!(Ref(has_changed), |, mpi_comm())[]
+    # end
+
+    if has_changed
+        # TODO: T8codeMesh for MPI not implemented yet.
+        # if mpi_isparallel() && amr_callback.dynamic_load_balancing
+        #   @trixi_timeit timer() "dynamic load balancing" begin
+        #     global_first_quadrant = unsafe_wrap(Array, mesh.p4est.global_first_quadrant, mpi_nranks() + 1)
+        #     old_global_first_quadrant = copy(global_first_quadrant)
+        #     partition!(mesh)
+        #     rebalance_solver!(u_ode, mesh, equations, dg, cache, old_global_first_quadrant)
+        #   end
+        # end
+
+        reinitialize_boundaries!(semi.boundary_conditions, cache)
+    end
+
+    # Return true if there were any cells coarsened or refined, otherwise false.
+    return has_changed
+end
+
 function reinitialize_boundaries!(boundary_conditions::UnstructuredSortedBoundaryTypes,
                                   cache)
     # Reinitialize boundary types container because boundaries may have changed.
@@ -639,6 +698,10 @@ function current_element_levels(mesh::P4estMesh, solver, cache)
     return current_levels
 end
 
+function current_element_levels(mesh::T8codeMesh, solver, cache)
+    return trixi_t8_get_local_element_levels(mesh.forest)
+end
+
 # TODO: Taal refactor, merge the two loops of ControllerThreeLevel and IndicatorLöhner etc.?
 #       But that would remove the simplest possibility to write that stuff to a file...
 #       We could of course implement some additional logic and workarounds, but is it worth the effort?
diff --git a/src/callbacks_step/amr_dg2d.jl b/src/callbacks_step/amr_dg2d.jl
index 400d16347d5..1d37dfce034 100644
--- a/src/callbacks_step/amr_dg2d.jl
+++ b/src/callbacks_step/amr_dg2d.jl
@@ -333,9 +333,79 @@ function coarsen_elements!(u::AbstractArray{<:Any, 4}, element_id,
     end
 end
 
+# Coarsen and refine elements in the DG solver based on a difference list.
+function adapt!(u_ode::AbstractVector, adaptor, mesh::T8codeMesh{2}, equations,
+                dg::DGSEM, cache, difference)
+
+    # Return early if there is nothing to do.
+    if !any(difference .!= 0)
+        return nothing
+    end
+
+    # Number of (local) cells/elements.
+    old_nelems = nelements(dg, cache)
+    new_nelems = ncells(mesh)
+
+    # Local element indices.
+    old_index = 1
+    new_index = 1
+
+    # Note: This is true for `quads` only.
+    T8_CHILDREN = 4
+
+    # Retain current solution data.
+    old_u_ode = copy(u_ode)
+
+    GC.@preserve old_u_ode begin
+        old_u = wrap_array(old_u_ode, mesh, equations, dg, cache)
+
+        reinitialize_containers!(mesh, equations, dg, cache)
+
+        resize!(u_ode,
+                nvariables(equations) * nnodes(dg)^ndims(mesh) * nelements(dg, cache))
+        u = wrap_array(u_ode, mesh, equations, dg, cache)
+
+        while old_index <= old_nelems && new_index <= new_nelems
+            if difference[old_index] > 0 # Refine.
+
+                # Refine element and store solution directly in new data structure.
+                refine_element!(u, new_index, old_u, old_index, adaptor, equations, dg)
+
+                old_index += 1
+                new_index += T8_CHILDREN
+
+            elseif difference[old_index] < 0 # Coarsen.
+
+                # If an element is to be removed, sanity check if the following elements
+                # are also marked - otherwise there would be an error in the way the
+                # cells/elements are sorted.
+                @assert all(difference[old_index:(old_index + T8_CHILDREN - 1)] .< 0) "bad cell/element order"
+
+                # Coarsen elements and store solution directly in new data structure.
+                coarsen_elements!(u, new_index, old_u, old_index, adaptor, equations,
+                                  dg)
+
+                old_index += T8_CHILDREN
+                new_index += 1
+
+            else # No changes.
+
+                # Copy old element data to new element container.
+                @views u[:, .., new_index] .= old_u[:, .., old_index]
+
+                old_index += 1
+                new_index += 1
+            end
+        end # while
+    end # GC.@preserve old_u_ode
+
+    return nothing
+end
+
 # this method is called when an `ControllerThreeLevel` is constructed
 function create_cache(::Type{ControllerThreeLevel},
-                      mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations, dg::DG, cache)
+                      mesh::Union{TreeMesh{2}, P4estMesh{2}, T8codeMesh{2}}, equations,
+                      dg::DG, cache)
     controller_value = Vector{Int}(undef, nelements(dg, cache))
     return (; controller_value)
 end
diff --git a/src/callbacks_step/analysis.jl b/src/callbacks_step/analysis.jl
index 7c453aab633..fad42b11098 100644
--- a/src/callbacks_step/analysis.jl
+++ b/src/callbacks_step/analysis.jl
@@ -534,6 +534,36 @@ function print_amr_information(callbacks, mesh::P4estMesh, solver, cache)
     return nothing
 end
 
+# Print level information only if AMR is enabled
+function print_amr_information(callbacks, mesh::T8codeMesh, solver, cache)
+
+    # Return early if there is nothing to print
+    uses_amr(callbacks) || return nothing
+
+    # TODO: Switch to global element levels array when MPI supported or find
+    # another solution.
+    levels = trixi_t8_get_local_element_levels(mesh.forest)
+
+    min_level = minimum(levels)
+    max_level = maximum(levels)
+
+    mpi_println(" minlevel = $min_level")
+    mpi_println(" maxlevel = $max_level")
+
+    if min_level > 0
+        elements_per_level = [count(==(l), levels) for l in 1:max_level]
+
+        for level in max_level:-1:(min_level + 1)
+            mpi_println(" ├── level $level:    " *
+                        @sprintf("% 14d", elements_per_level[level]))
+        end
+        mpi_println(" └── level $min_level:    " *
+                    @sprintf("% 14d", elements_per_level[min_level]))
+    end
+
+    return nothing
+end
+
 # Iterate over tuples of analysis integrals in a type-stable way using "lispy tuple programming".
 function analyze_integrals(analysis_integrals::NTuple{N, Any}, io, du, u, t,
                            semi) where {N}
diff --git a/src/callbacks_step/analysis_dg2d.jl b/src/callbacks_step/analysis_dg2d.jl
index 6c74e172e46..4e456f79872 100644
--- a/src/callbacks_step/analysis_dg2d.jl
+++ b/src/callbacks_step/analysis_dg2d.jl
@@ -31,7 +31,7 @@ end
 
 function create_cache_analysis(analyzer,
                                mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
-                                           P4estMesh{2}},
+                                           P4estMesh{2}, T8codeMesh{2}},
                                equations, dg::DG, cache,
                                RealT, uEltype)
 
@@ -108,7 +108,7 @@ end
 
 function calc_error_norms(func, u, t, analyzer,
                           mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
-                                      P4estMesh{2}}, equations,
+                                      P4estMesh{2}, T8codeMesh{2}}, equations,
                           initial_condition, dg::DGSEM, cache, cache_analysis)
     @unpack vandermonde, weights = analyzer
     @unpack node_coordinates, inverse_jacobian = cache.elements
@@ -176,7 +176,7 @@ end
 
 function integrate_via_indices(func::Func, u,
                                mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
-                                           P4estMesh{2}}, equations,
+                                           P4estMesh{2}, T8codeMesh{2}}, equations,
                                dg::DGSEM, cache, args...; normalize = true) where {Func}
     @unpack weights = dg.basis
 
@@ -204,7 +204,7 @@ end
 
 function integrate(func::Func, u,
                    mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
-                               P4estMesh{2}},
+                               P4estMesh{2}, T8codeMesh{2}},
                    equations, dg::DG, cache; normalize = true) where {Func}
     integrate_via_indices(u, mesh, equations, dg, cache;
                           normalize = normalize) do u, i, j, element, equations, dg
@@ -215,7 +215,7 @@ end
 
 function analyze(::typeof(entropy_timederivative), du, u, t,
                  mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
-                             P4estMesh{2}},
+                             P4estMesh{2}, T8codeMesh{2}},
                  equations, dg::DG, cache)
     # Calculate ∫(∂S/∂u ⋅ ∂u/∂t)dΩ
     integrate_via_indices(u, mesh, equations, dg, cache,
@@ -259,7 +259,8 @@ function analyze(::Val{:l2_divb}, du, u, t,
 end
 
 function analyze(::Val{:l2_divb}, du, u, t,
-                 mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                 mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2},
+                             T8codeMesh{2}},
                  equations::IdealGlmMhdEquations2D, dg::DGSEM, cache)
     @unpack contravariant_vectors = cache.elements
     integrate_via_indices(u, mesh, equations, dg, cache, cache,
@@ -326,7 +327,8 @@ function analyze(::Val{:linf_divb}, du, u, t,
 end
 
 function analyze(::Val{:linf_divb}, du, u, t,
-                 mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+                 mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2},
+                             T8codeMesh{2}},
                  equations::IdealGlmMhdEquations2D, dg::DGSEM, cache)
     @unpack derivative_matrix, weights = dg.basis
     @unpack contravariant_vectors = cache.elements
diff --git a/src/callbacks_step/save_restart_dg.jl b/src/callbacks_step/save_restart_dg.jl
index 5695eb8bede..8db6db2d2b8 100644
--- a/src/callbacks_step/save_restart_dg.jl
+++ b/src/callbacks_step/save_restart_dg.jl
@@ -7,7 +7,8 @@
 
 function save_restart_file(u, time, dt, timestep,
                            mesh::Union{SerialTreeMesh, StructuredMesh,
-                                       UnstructuredMesh2D, SerialP4estMesh},
+                                       UnstructuredMesh2D, SerialP4estMesh,
+                                       SerialT8codeMesh},
                            equations, dg::DG, cache,
                            restart_callback)
     @unpack output_directory = restart_callback
diff --git a/src/callbacks_step/save_solution_dg.jl b/src/callbacks_step/save_solution_dg.jl
index 6cd4a0ec9c1..6d5004ff65f 100644
--- a/src/callbacks_step/save_solution_dg.jl
+++ b/src/callbacks_step/save_solution_dg.jl
@@ -7,7 +7,8 @@
 
 function save_solution_file(u, time, dt, timestep,
                             mesh::Union{SerialTreeMesh, StructuredMesh,
-                                        UnstructuredMesh2D, SerialP4estMesh},
+                                        UnstructuredMesh2D, SerialP4estMesh,
+                                        SerialT8codeMesh},
                             equations, dg::DG, cache,
                             solution_callback, element_variables = Dict{Symbol, Any}();
                             system = "")
diff --git a/src/callbacks_step/stepsize_dg2d.jl b/src/callbacks_step/stepsize_dg2d.jl
index 89a2b2b8350..673c3ba6aa6 100644
--- a/src/callbacks_step/stepsize_dg2d.jl
+++ b/src/callbacks_step/stepsize_dg2d.jl
@@ -75,7 +75,9 @@ function max_dt(u, t, mesh::ParallelTreeMesh{2},
     return dt
 end
 
-function max_dt(u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+function max_dt(u, t,
+                mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2},
+                            T8codeMesh{2}},
                 constant_speed::False, equations, dg::DG, cache)
     # to avoid a division by zero if the speed vanishes everywhere,
     # e.g. for steady-state linear advection
@@ -109,7 +111,9 @@ function max_dt(u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMe
     return 2 / (nnodes(dg) * max_scaled_speed)
 end
 
-function max_dt(u, t, mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2}},
+function max_dt(u, t,
+                mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2},
+                            T8codeMesh{2}},
                 constant_speed::True, equations, dg::DG, cache)
     @unpack contravariant_vectors, inverse_jacobian = cache.elements
 
diff --git a/src/meshes/mesh_io.jl b/src/meshes/mesh_io.jl
index da67fe23e0e..b9895e7d454 100644
--- a/src/meshes/mesh_io.jl
+++ b/src/meshes/mesh_io.jl
@@ -6,7 +6,7 @@
 #! format: noindent
 
 # Save current mesh with some context information as an HDF5 file.
-function save_mesh_file(mesh::Union{TreeMesh, P4estMesh}, output_directory,
+function save_mesh_file(mesh::Union{TreeMesh, P4estMesh, T8codeMesh}, output_directory,
                         timestep = 0)
     save_mesh_file(mesh, output_directory, timestep, mpi_parallel(mesh))
 end
@@ -220,6 +220,13 @@ function save_mesh_file(mesh::P4estMesh, output_directory, timestep, mpi_paralle
     return filename
 end
 
+# TODO: Implement this function as soon as there is support for this in `t8code`.
+function save_mesh_file(mesh::T8codeMesh, output_directory, timestep, mpi_parallel)
+    error("Mesh file output not supported yet for `T8codeMesh`.")
+
+    return joinpath(output_directory, "dummy_mesh.h5")
+end
+
 """
     load_mesh(restart_file::AbstractString; n_cells_max)
 
diff --git a/src/meshes/meshes.jl b/src/meshes/meshes.jl
index 2716aa2007b..ed2158b169a 100644
--- a/src/meshes/meshes.jl
+++ b/src/meshes/meshes.jl
@@ -12,6 +12,7 @@ include("unstructured_mesh.jl")
 include("face_interpolant.jl")
 include("transfinite_mappings_3d.jl")
 include("p4est_mesh.jl")
+include("t8code_mesh.jl")
 include("mesh_io.jl")
 include("dgmulti_meshes.jl")
 end # @muladd
diff --git a/src/meshes/t8code_mesh.jl b/src/meshes/t8code_mesh.jl
new file mode 100644
index 00000000000..13edcc29711
--- /dev/null
+++ b/src/meshes/t8code_mesh.jl
@@ -0,0 +1,345 @@
+"""
+    T8codeMesh{NDIMS} <: AbstractMesh{NDIMS}
+
+An unstructured curved mesh based on trees that uses the C library 
+['t8code'](https://github.com/DLR-AMR/t8code)
+to manage trees and mesh refinement.
+"""
+mutable struct T8codeMesh{NDIMS, RealT <: Real, IsParallel, NDIMSP2, NNODES} <:
+               AbstractMesh{NDIMS}
+    cmesh       :: Ptr{t8_cmesh} # cpointer to coarse mesh
+    scheme      :: Ptr{t8_eclass_scheme} # cpointer to element scheme
+    forest      :: Ptr{t8_forest} # cpointer to forest
+    is_parallel :: IsParallel
+
+    # This specifies the geometry interpolation for each tree.
+    tree_node_coordinates::Array{RealT, NDIMSP2} # [dimension, i, j, k, tree]
+
+    # Stores the quadrature nodes.
+    nodes::SVector{NNODES, RealT}
+
+    boundary_names   :: Array{Symbol, 2}      # [face direction, tree]
+    current_filename :: String
+
+    ninterfaces :: Int
+    nmortars    :: Int
+    nboundaries :: Int
+
+    function T8codeMesh{NDIMS}(cmesh, scheme, forest, tree_node_coordinates, nodes,
+                               boundary_names,
+                               current_filename) where {NDIMS}
+        is_parallel = False()
+
+        mesh = new{NDIMS, Float64, typeof(is_parallel), NDIMS + 2, length(nodes)}(cmesh,
+                                                                                  scheme,
+                                                                                  forest,
+                                                                                  is_parallel)
+
+        mesh.nodes = nodes
+        mesh.boundary_names = boundary_names
+        mesh.current_filename = current_filename
+        mesh.tree_node_coordinates = tree_node_coordinates
+
+        finalizer(mesh) do mesh
+            # When finalizing `mesh.forest`, `mesh.scheme` and `mesh.cmesh` are
+            # also cleaned up from within `t8code`. The cleanup code for
+            # `cmesh` does some MPI calls for deallocating shared memory
+            # arrays. Due to garbage collection in Julia the order of shutdown
+            # is not deterministic. The following code might happen after MPI
+            # is already in finalized state.
+            # If the environment variable `TRIXI_T8CODE_SC_FINALIZE` is set the
+            # `finalize_hook` of the MPI module takes care of the cleanup. See
+            # further down. However, this might cause a pile-up of `mesh`
+            # objects during long-running sessions.
+            if !MPI.Finalized()
+                trixi_t8_unref_forest(mesh.forest)
+            end
+        end
+
+        # This finalizer call is only recommended during development and not for
+        # production runs, especially long-running sessions since a reference to
+        # the `mesh` object will be kept throughout the lifetime of the session.
+        # See comments in `init_t8code()` in file `src/auxiliary/t8code.jl` for
+        # more information.
+        if haskey(ENV, "TRIXI_T8CODE_SC_FINALIZE")
+            MPI.add_finalize_hook!() do
+                trixi_t8_unref_forest(mesh.forest)
+            end
+        end
+
+        return mesh
+    end
+end
+
+const SerialT8codeMesh{NDIMS} = T8codeMesh{NDIMS, <:Real, <:False}
+@inline mpi_parallel(mesh::SerialT8codeMesh) = False()
+
+@inline Base.ndims(::T8codeMesh{NDIMS}) where {NDIMS} = NDIMS
+@inline Base.real(::T8codeMesh{NDIMS, RealT}) where {NDIMS, RealT} = RealT
+
+@inline ntrees(mesh::T8codeMesh) = Int(t8_forest_get_num_local_trees(mesh.forest))
+@inline ncells(mesh::T8codeMesh) = Int(t8_forest_get_local_num_elements(mesh.forest))
+@inline ninterfaces(mesh::T8codeMesh) = mesh.ninterfaces
+@inline nmortars(mesh::T8codeMesh) = mesh.nmortars
+@inline nboundaries(mesh::T8codeMesh) = mesh.nboundaries
+
+function Base.show(io::IO, mesh::T8codeMesh)
+    print(io, "T8codeMesh{", ndims(mesh), ", ", real(mesh), "}")
+end
+
+function Base.show(io::IO, ::MIME"text/plain", mesh::T8codeMesh)
+    if get(io, :compact, false)
+        show(io, mesh)
+    else
+        setup = [
+            "#trees" => ntrees(mesh),
+            "current #cells" => ncells(mesh),
+            "polydeg" => length(mesh.nodes) - 1,
+        ]
+        summary_box(io,
+                    "T8codeMesh{" * string(ndims(mesh)) * ", " * string(real(mesh)) * "}",
+                    setup)
+    end
+end
+
+"""
+    T8codeMesh(trees_per_dimension; polydeg, mapping=identity,
+               RealT=Float64, initial_refinement_level=0, periodicity=true)
+
+Create a structured potentially curved 'T8codeMesh' of the specified size.
+
+Non-periodic boundaries will be called ':x_neg', ':x_pos', ':y_neg', ':y_pos', ':z_neg', ':z_pos'.
+
+# Arguments
+- 'trees_per_dimension::NTupleE{NDIMS, Int}': the number of trees in each dimension.
+- 'polydeg::Integer': polynomial degree used to store the geometry of the mesh.
+                      The mapping will be approximated by an interpolation polynomial
+                      of the specified degree for each tree.
+- 'mapping': a function of 'NDIMS' variables to describe the mapping that transforms
+             the reference mesh ('[-1, 1]^n') to the physical domain.
+- 'RealT::Type': the type that should be used for coordinates.
+- 'initial_refinement_level::Integer': refine the mesh uniformly to this level before the simulation starts.
+- 'periodicity': either a 'Bool' deciding if all of the boundaries are periodic or an 'NTuple{NDIMS, Bool}'
+                 deciding for each dimension if the boundaries in this dimension are periodic.
+"""
+function T8codeMesh(trees_per_dimension; polydeg,
+                    mapping = coordinates2mapping((-1.0, -1.0), (1.0, 1.0)),
+                    RealT = Float64, initial_refinement_level = 0, periodicity = true)
+    NDIMS = length(trees_per_dimension)
+
+    @assert NDIMS == 2 # Only support for NDIMS = 2 yet.
+
+    # Convert periodicity to a Tuple of a Bool for every dimension
+    if all(periodicity)
+        # Also catches case where periodicity = true
+        periodicity = ntuple(_ -> true, NDIMS)
+    elseif !any(periodicity)
+        # Also catches case where periodicity = false
+        periodicity = ntuple(_ -> false, NDIMS)
+    else
+        # Default case if periodicity is an iterable
+        periodicity = Tuple(periodicity)
+    end
+
+    conn = T8code.Libt8.p4est_connectivity_new_brick(trees_per_dimension..., periodicity...)
+    do_partition = 0
+    cmesh = t8_cmesh_new_from_p4est(conn, mpi_comm(), do_partition)
+    T8code.Libt8.p4est_connectivity_destroy(conn)
+
+    scheme = t8_scheme_new_default_cxx()
+    forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, 0, mpi_comm())
+
+    basis = LobattoLegendreBasis(RealT, polydeg)
+    nodes = basis.nodes
+
+    tree_node_coordinates = Array{RealT, NDIMS + 2}(undef, NDIMS,
+                                                    ntuple(_ -> length(nodes), NDIMS)...,
+                                                    prod(trees_per_dimension))
+
+    # Get cell length in reference mesh: Omega_ref = [-1,1]^2.
+    dx = 2 / trees_per_dimension[1]
+    dy = 2 / trees_per_dimension[2]
+
+    num_local_trees = t8_cmesh_get_num_local_trees(cmesh)
+
+    # Non-periodic boundaries.
+    boundary_names = fill(Symbol("---"), 2 * NDIMS, prod(trees_per_dimension))
+
+    for itree in 1:num_local_trees
+        veptr = t8_cmesh_get_tree_vertices(cmesh, itree - 1)
+        verts = unsafe_wrap(Array, veptr, (3, 1 << NDIMS))
+
+        # Calculate node coordinates of reference mesh.
+        cell_x_offset = (verts[1, 1] - 1 / 2 * (trees_per_dimension[1] - 1)) * dx
+        cell_y_offset = (verts[2, 1] - 1 / 2 * (trees_per_dimension[2] - 1)) * dy
+
+        for j in eachindex(nodes), i in eachindex(nodes)
+            tree_node_coordinates[:, i, j, itree] .= mapping(cell_x_offset +
+                                                             dx * nodes[i] / 2,
+                                                             cell_y_offset +
+                                                             dy * nodes[j] / 2)
+        end
+
+        if !periodicity[1]
+            boundary_names[1, itree] = :x_neg
+            boundary_names[2, itree] = :x_pos
+        end
+
+        if !periodicity[2]
+            boundary_names[3, itree] = :y_neg
+            boundary_names[4, itree] = :y_pos
+        end
+    end
+
+    return T8codeMesh{NDIMS}(cmesh, scheme, forest, tree_node_coordinates, nodes,
+                             boundary_names, "")
+end
+
+"""
+    T8codeMesh{NDIMS}(cmesh::Ptr{t8_cmesh},
+                     mapping=nothing, polydeg=1, RealT=Float64,
+                     initial_refinement_level=0)
+
+Main mesh constructor for the `T8codeMesh` that imports an unstructured,
+conforming mesh from a `t8_cmesh` data structure.
+
+# Arguments
+- `cmesh::Ptr{t8_cmesh}`: Pointer to a cmesh object.
+- `mapping`: a function of `NDIMS` variables to describe the mapping that transforms
+             the imported mesh to the physical domain. Use `nothing` for the identity map.
+- `polydeg::Integer`: polynomial degree used to store the geometry of the mesh.
+                      The mapping will be approximated by an interpolation polynomial
+                      of the specified degree for each tree.
+                      The default of `1` creates an uncurved geometry. Use a higher value if the mapping
+                      will curve the imported uncurved mesh.
+- `RealT::Type`: the type that should be used for coordinates.
+- `initial_refinement_level::Integer`: refine the mesh uniformly to this level before the simulation starts.
+"""
+function T8codeMesh{NDIMS}(cmesh::Ptr{t8_cmesh};
+                           mapping = nothing, polydeg = 1, RealT = Float64,
+                           initial_refinement_level = 0) where {NDIMS}
+    @assert NDIMS == 2 # Only support for NDIMS = 2 yet.
+
+    scheme = t8_scheme_new_default_cxx()
+    forest = t8_forest_new_uniform(cmesh, scheme, initial_refinement_level, 0, mpi_comm())
+
+    basis = LobattoLegendreBasis(RealT, polydeg)
+    nodes = basis.nodes
+
+    num_local_trees = t8_cmesh_get_num_local_trees(cmesh)
+
+    tree_node_coordinates = Array{RealT, NDIMS + 2}(undef, NDIMS,
+                                                    ntuple(_ -> length(nodes), NDIMS)...,
+                                                    num_local_trees)
+
+    nodes_in = [-1.0, 1.0]
+    matrix = polynomial_interpolation_matrix(nodes_in, nodes)
+    data_in = Array{RealT, 3}(undef, 2, 2, 2)
+    tmp1 = zeros(RealT, 2, length(nodes), length(nodes_in))
+
+    for itree in 0:(num_local_trees - 1)
+        veptr = t8_cmesh_get_tree_vertices(cmesh, itree)
+        verts = unsafe_wrap(Array, veptr, (3, 1 << NDIMS))
+
+        u = verts[:, 2] - verts[:, 1]
+        v = verts[:, 3] - verts[:, 1]
+        w = [0.0, 0.0, 1.0]
+
+        vol = dot(cross(u, v), w)
+
+        if vol < 0.0
+            @warn "Discovered negative volumes in `cmesh`: vol = $vol"
+        end
+
+        # Tree vertices are stored in z-order.
+        @views data_in[:, 1, 1] .= verts[1:2, 1]
+        @views data_in[:, 2, 1] .= verts[1:2, 2]
+        @views data_in[:, 1, 2] .= verts[1:2, 3]
+        @views data_in[:, 2, 2] .= verts[1:2, 4]
+
+        # Interpolate corner coordinates to specified nodes.
+        multiply_dimensionwise!(view(tree_node_coordinates, :, :, :, itree + 1),
+                                matrix, matrix,
+                                data_in,
+                                tmp1)
+    end
+
+    map_node_coordinates!(tree_node_coordinates, mapping)
+
+    # There's no simple and generic way to distinguish boundaries. Name all of them :all.
+    boundary_names = fill(:all, 2 * NDIMS, num_local_trees)
+
+    return T8codeMesh{NDIMS}(cmesh, scheme, forest, tree_node_coordinates, nodes,
+                             boundary_names, "")
+end
+
+"""
+    T8codeMesh{NDIMS}(conn::Ptr{p4est_connectivity},
+                      mapping=nothing, polydeg=1, RealT=Float64,
+                      initial_refinement_level=0)
+
+Main mesh constructor for the `T8codeMesh` that imports an unstructured,
+conforming mesh from a `p4est_connectivity` data structure.
+
+# Arguments
+- `conn::Ptr{p4est_connectivity}`: Pointer to a P4est connectivity object.
+- `mapping`: a function of `NDIMS` variables to describe the mapping that transforms
+             the imported mesh to the physical domain. Use `nothing` for the identity map.
+- `polydeg::Integer`: polynomial degree used to store the geometry of the mesh.
+                      The mapping will be approximated by an interpolation polynomial
+                      of the specified degree for each tree.
+                      The default of `1` creates an uncurved geometry. Use a higher value if the mapping
+                      will curve the imported uncurved mesh.
+- `RealT::Type`: the type that should be used for coordinates.
+- `initial_refinement_level::Integer`: refine the mesh uniformly to this level before the simulation starts.
+"""
+function T8codeMesh{NDIMS}(conn::Ptr{p4est_connectivity}; kwargs...) where {NDIMS}
+    @assert NDIMS == 2 # Only support for NDIMS = 2 yet.
+
+    cmesh = t8_cmesh_new_from_p4est(conn, mpi_comm(), 0)
+
+    return T8codeMesh{NDIMS}(cmesh; kwargs...)
+end
+
+"""
+    T8codeMesh{NDIMS}(meshfile::String;
+                     mapping=nothing, polydeg=1, RealT=Float64,
+                     initial_refinement_level=0)
+
+Main mesh constructor for the `T8codeMesh` that imports an unstructured, conforming
+mesh from a Gmsh mesh file (`.msh`).
+
+# Arguments
+- `meshfile::String`: path to a Gmsh mesh file.
+- `mapping`: a function of `NDIMS` variables to describe the mapping that transforms
+             the imported mesh to the physical domain. Use `nothing` for the identity map.
+- `polydeg::Integer`: polynomial degree used to store the geometry of the mesh.
+                      The mapping will be approximated by an interpolation polynomial
+                      of the specified degree for each tree.
+                      The default of `1` creates an uncurved geometry. Use a higher value if the mapping
+                      will curve the imported uncurved mesh.
+- `RealT::Type`: the type that should be used for coordinates.
+- `initial_refinement_level::Integer`: refine the mesh uniformly to this level before the simulation starts.
+"""
+function T8codeMesh{NDIMS}(meshfile::String; kwargs...) where {NDIMS}
+    @assert NDIMS == 2 # Only support for NDIMS = 2 yet.
+
+    # Prevent `t8code` from crashing Julia if the file doesn't exist.
+    @assert isfile(meshfile)
+
+    meshfile_prefix, meshfile_suffix = splitext(meshfile)
+
+    cmesh = t8_cmesh_from_msh_file(meshfile_prefix, 0, mpi_comm(), NDIMS, 0, 0)
+
+    return T8codeMesh{NDIMS}(cmesh; kwargs...)
+end
+
+# TODO: Just a placeholder. Will be implemented later when MPI is supported.
+function balance!(mesh::T8codeMesh, init_fn = C_NULL)
+    return nothing
+end
+
+# TODO: Just a placeholder. Will be implemented later when MPI is supported.
+function partition!(mesh::T8codeMesh; allow_coarsening = true, weight_fn = C_NULL)
+    return nothing
+end
diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl
index 2536cfe0bf2..495e0ffc4a4 100644
--- a/src/solvers/dg.jl
+++ b/src/solvers/dg.jl
@@ -363,7 +363,8 @@ function get_element_variables!(element_variables, u, mesh, equations, dg::DG, c
                            dg, cache)
 end
 
-const MeshesDGSEM = Union{TreeMesh, StructuredMesh, UnstructuredMesh2D, P4estMesh}
+const MeshesDGSEM = Union{TreeMesh, StructuredMesh, UnstructuredMesh2D, P4estMesh,
+                          T8codeMesh}
 
 @inline function ndofs(mesh::MeshesDGSEM, dg::DG, cache)
     nelements(cache.elements) * nnodes(dg)^ndims(mesh)
@@ -679,4 +680,5 @@ include("dgsem_tree/dg.jl")
 include("dgsem_structured/dg.jl")
 include("dgsem_unstructured/dg.jl")
 include("dgsem_p4est/dg.jl")
+include("dgsem_t8code/dg.jl")
 end # @muladd
diff --git a/src/solvers/dgsem_p4est/containers.jl b/src/solvers/dgsem_p4est/containers.jl
index 2b9c6987d24..0176f5c6346 100644
--- a/src/solvers/dgsem_p4est/containers.jl
+++ b/src/solvers/dgsem_p4est/containers.jl
@@ -81,7 +81,8 @@ function Base.resize!(elements::P4estElementContainer, capacity)
 end
 
 # Create element container and initialize element data
-function init_elements(mesh::P4estMesh{NDIMS, RealT}, equations,
+function init_elements(mesh::Union{P4estMesh{NDIMS, RealT}, T8codeMesh{NDIMS, RealT}},
+                       equations,
                        basis,
                        ::Type{uEltype}) where {NDIMS, RealT <: Real, uEltype <: Real}
     nelements = ncells(mesh)
@@ -165,7 +166,7 @@ function Base.resize!(interfaces::P4estInterfaceContainer, capacity)
 end
 
 # Create interface container and initialize interface data.
-function init_interfaces(mesh::P4estMesh, equations, basis, elements)
+function init_interfaces(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, elements)
     NDIMS = ndims(elements)
     uEltype = eltype(elements)
 
@@ -240,7 +241,7 @@ function Base.resize!(boundaries::P4estBoundaryContainer, capacity)
 end
 
 # Create interface container and initialize interface data in `elements`.
-function init_boundaries(mesh::P4estMesh, equations, basis, elements)
+function init_boundaries(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, elements)
     NDIMS = ndims(elements)
     uEltype = eltype(elements)
 
@@ -371,7 +372,7 @@ function Base.resize!(mortars::P4estMortarContainer, capacity)
 end
 
 # Create mortar container and initialize mortar data.
-function init_mortars(mesh::P4estMesh, equations, basis, elements)
+function init_mortars(mesh::Union{P4estMesh, T8codeMesh}, equations, basis, elements)
     NDIMS = ndims(elements)
     uEltype = eltype(elements)
 
diff --git a/src/solvers/dgsem_p4est/containers_2d.jl b/src/solvers/dgsem_p4est/containers_2d.jl
index 11747f1f175..236d7d24c06 100644
--- a/src/solvers/dgsem_p4est/containers_2d.jl
+++ b/src/solvers/dgsem_p4est/containers_2d.jl
@@ -6,7 +6,8 @@
 #! format: noindent
 
 # Initialize data structures in element container
-function init_elements!(elements, mesh::P4estMesh{2}, basis::LobattoLegendreBasis)
+function init_elements!(elements, mesh::Union{P4estMesh{2}, T8codeMesh{2}},
+                        basis::LobattoLegendreBasis)
     @unpack node_coordinates, jacobian_matrix,
     contravariant_vectors, inverse_jacobian = elements
 
@@ -25,7 +26,7 @@ end
 
 # Interpolate tree_node_coordinates to each quadrant at the nodes of the specified basis
 function calc_node_coordinates!(node_coordinates,
-                                mesh::P4estMesh{2},
+                                mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                                 basis::LobattoLegendreBasis)
     # Hanging nodes will cause holes in the mesh if its polydeg is higher
     # than the polydeg of the solver.
diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl
index bc7d9edb6ef..97b931fa325 100644
--- a/src/solvers/dgsem_p4est/dg_2d.jl
+++ b/src/solvers/dgsem_p4est/dg_2d.jl
@@ -7,8 +7,8 @@
 
 # The methods below are specialized on the mortar type
 # and called from the basic `create_cache` method at the top.
-function create_cache(mesh::P4estMesh{2}, equations, mortar_l2::LobattoLegendreMortarL2,
-                      uEltype)
+function create_cache(mesh::Union{P4estMesh{2}, T8codeMesh{2}}, equations,
+                      mortar_l2::LobattoLegendreMortarL2, uEltype)
     # TODO: Taal performance using different types
     MA2d = MArray{Tuple{nvariables(equations), nnodes(mortar_l2)},
                   uEltype, 2,
@@ -58,7 +58,7 @@ end
 
 # We pass the `surface_integral` argument solely for dispatch
 function prolong2interfaces!(cache, u,
-                             mesh::P4estMesh{2},
+                             mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                              equations, surface_integral, dg::DG)
     @unpack interfaces = cache
     index_range = eachnode(dg)
@@ -114,7 +114,7 @@ function prolong2interfaces!(cache, u,
 end
 
 function calc_interface_flux!(surface_flux_values,
-                              mesh::P4estMesh{2},
+                              mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                               nonconservative_terms,
                               equations, surface_integral, dg::DG, cache)
     @unpack neighbor_ids, node_indices = cache.interfaces
@@ -182,7 +182,7 @@ end
 
 # Inlined version of the interface flux computation for conservation laws
 @inline function calc_interface_flux!(surface_flux_values,
-                                      mesh::P4estMesh{2},
+                                      mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                                       nonconservative_terms::False, equations,
                                       surface_integral, dg::DG, cache,
                                       interface_index, normal_direction,
@@ -206,7 +206,7 @@ end
 
 # Inlined version of the interface flux computation for equations with conservative and nonconservative terms
 @inline function calc_interface_flux!(surface_flux_values,
-                                      mesh::P4estMesh{2},
+                                      mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                                       nonconservative_terms::True, equations,
                                       surface_integral, dg::DG, cache,
                                       interface_index, normal_direction,
@@ -247,7 +247,7 @@ end
 end
 
 function prolong2boundaries!(cache, u,
-                             mesh::P4estMesh{2},
+                             mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                              equations, surface_integral, dg::DG)
     @unpack boundaries = cache
     index_range = eachnode(dg)
@@ -276,7 +276,7 @@ function prolong2boundaries!(cache, u,
 end
 
 function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing,
-                             mesh::P4estMesh{2},
+                             mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                              equations, surface_integral, dg::DG)
     @unpack boundaries = cache
     @unpack surface_flux_values = cache.elements
@@ -312,7 +312,7 @@ end
 
 # inlined version of the boundary flux calculation along a physical interface
 @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition,
-                                     mesh::P4estMesh{2},
+                                     mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                                      nonconservative_terms::False, equations,
                                      surface_integral, dg::DG, cache,
                                      i_index, j_index,
@@ -343,7 +343,7 @@ end
 
 # inlined version of the boundary flux with nonconservative terms calculation along a physical interface
 @inline function calc_boundary_flux!(surface_flux_values, t, boundary_condition,
-                                     mesh::P4estMesh{2},
+                                     mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                                      nonconservative_terms::True, equations,
                                      surface_integral, dg::DG, cache,
                                      i_index, j_index,
@@ -385,7 +385,7 @@ end
 end
 
 function prolong2mortars!(cache, u,
-                          mesh::P4estMesh{2}, equations,
+                          mesh::Union{P4estMesh{2}, T8codeMesh{2}}, equations,
                           mortar_l2::LobattoLegendreMortarL2,
                           surface_integral, dg::DGSEM)
     @unpack neighbor_ids, node_indices = cache.mortars
@@ -452,7 +452,7 @@ function prolong2mortars!(cache, u,
 end
 
 function calc_mortar_flux!(surface_flux_values,
-                           mesh::P4estMesh{2},
+                           mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                            nonconservative_terms, equations,
                            mortar_l2::LobattoLegendreMortarL2,
                            surface_integral, dg::DG, cache)
@@ -511,7 +511,7 @@ end
 
 # Inlined version of the mortar flux computation on small elements for conservation laws
 @inline function calc_mortar_flux!(fstar,
-                                   mesh::P4estMesh{2},
+                                   mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                                    nonconservative_terms::False, equations,
                                    surface_integral, dg::DG, cache,
                                    mortar_index, position_index, normal_direction,
@@ -531,7 +531,7 @@ end
 # Inlined version of the mortar flux computation on small elements for equations with conservative and
 # nonconservative terms
 @inline function calc_mortar_flux!(fstar,
-                                   mesh::P4estMesh{2},
+                                   mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                                    nonconservative_terms::True, equations,
                                    surface_integral, dg::DG, cache,
                                    mortar_index, position_index, normal_direction,
@@ -559,7 +559,8 @@ end
 end
 
 @inline function mortar_fluxes_to_elements!(surface_flux_values,
-                                            mesh::P4estMesh{2}, equations,
+                                            mesh::Union{P4estMesh{2}, T8codeMesh{2}},
+                                            equations,
                                             mortar_l2::LobattoLegendreMortarL2,
                                             dg::DGSEM, cache, mortar, fstar, u_buffer)
     @unpack neighbor_ids, node_indices = cache.mortars
@@ -620,7 +621,7 @@ end
 end
 
 function calc_surface_integral!(du, u,
-                                mesh::P4estMesh{2},
+                                mesh::Union{P4estMesh{2}, T8codeMesh{2}},
                                 equations,
                                 surface_integral::SurfaceIntegralWeakForm,
                                 dg::DGSEM, cache)
diff --git a/src/solvers/dgsem_structured/dg_2d.jl b/src/solvers/dgsem_structured/dg_2d.jl
index c013bf62d98..3e8ce759b30 100644
--- a/src/solvers/dgsem_structured/dg_2d.jl
+++ b/src/solvers/dgsem_structured/dg_2d.jl
@@ -52,7 +52,7 @@ end
 @inline function weak_form_kernel!(du, u,
                                    element,
                                    mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
-                                               P4estMesh{2}},
+                                               P4estMesh{2}, T8codeMesh{2}},
                                    nonconservative_terms::False, equations,
                                    dg::DGSEM, cache, alpha = true)
     # true * [some floating point value] == [exactly the same floating point value]
@@ -93,8 +93,8 @@ end
 @inline function flux_differencing_kernel!(du, u,
                                            element,
                                            mesh::Union{StructuredMesh{2},
-                                                       UnstructuredMesh2D, P4estMesh{2}
-                                                       },
+                                                       UnstructuredMesh2D, P4estMesh{2},
+                                                       T8codeMesh{2}},
                                            nonconservative_terms::False, equations,
                                            volume_flux, dg::DGSEM, cache, alpha = true)
     @unpack derivative_split = dg.basis
@@ -150,8 +150,8 @@ end
 @inline function flux_differencing_kernel!(du, u,
                                            element,
                                            mesh::Union{StructuredMesh{2},
-                                                       UnstructuredMesh2D, P4estMesh{2}
-                                                       },
+                                                       UnstructuredMesh2D, P4estMesh{2},
+                                                       T8codeMesh{2}},
                                            nonconservative_terms::True, equations,
                                            volume_flux, dg::DGSEM, cache, alpha = true)
     @unpack derivative_split = dg.basis
@@ -219,7 +219,7 @@ end
 # [arXiv: 2008.12044v2](https://arxiv.org/pdf/2008.12044)
 @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u,
                               mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
-                                          P4estMesh{2}},
+                                          P4estMesh{2}, T8codeMesh{2}},
                               nonconservative_terms::False, equations,
                               volume_flux_fv, dg::DGSEM, element, cache)
     @unpack contravariant_vectors = cache.elements
@@ -289,7 +289,7 @@ end
 @inline function calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R,
                               u::AbstractArray{<:Any, 4},
                               mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
-                                          P4estMesh{2}},
+                                          P4estMesh{2}, T8codeMesh{2}},
                               nonconservative_terms::True, equations,
                               volume_flux_fv, dg::DGSEM, element, cache)
     @unpack contravariant_vectors = cache.elements
@@ -609,9 +609,8 @@ function calc_boundary_flux!(cache, u, t, boundary_conditions::NamedTuple,
 end
 
 function apply_jacobian!(du,
-                         mesh::Union{StructuredMesh{2}, UnstructuredMesh2D, P4estMesh{2
-                                                                                      }
-                                     },
+                         mesh::Union{StructuredMesh{2}, UnstructuredMesh2D,
+                                     P4estMesh{2}, T8codeMesh{2}},
                          equations, dg::DG, cache)
     @unpack inverse_jacobian = cache.elements
 
diff --git a/src/solvers/dgsem_t8code/containers.jl b/src/solvers/dgsem_t8code/containers.jl
new file mode 100644
index 00000000000..093feb2985a
--- /dev/null
+++ b/src/solvers/dgsem_t8code/containers.jl
@@ -0,0 +1,60 @@
+function reinitialize_containers!(mesh::T8codeMesh, equations, dg::DGSEM, cache)
+    # Re-initialize elements container.
+    @unpack elements = cache
+    resize!(elements, ncells(mesh))
+    init_elements!(elements, mesh, dg.basis)
+
+    count_required_surfaces!(mesh)
+
+    # Resize interfaces container.
+    @unpack interfaces = cache
+    resize!(interfaces, mesh.ninterfaces)
+
+    # Resize mortars container.
+    @unpack mortars = cache
+    resize!(mortars, mesh.nmortars)
+
+    # Resize boundaries container.
+    @unpack boundaries = cache
+    resize!(boundaries, mesh.nboundaries)
+
+    trixi_t8_fill_mesh_info(mesh.forest, elements, interfaces, mortars, boundaries,
+                            mesh.boundary_names)
+
+    return nothing
+end
+
+function count_required_surfaces!(mesh::T8codeMesh)
+    counts = trixi_t8_count_interfaces(mesh.forest)
+
+    mesh.nmortars = counts.mortars
+    mesh.ninterfaces = counts.interfaces
+    mesh.nboundaries = counts.boundaries
+
+    return counts
+end
+
+# Compatibility to `dgsem_p4est/containers.jl`.
+function count_required_surfaces(mesh::T8codeMesh)
+    return (interfaces = mesh.ninterfaces,
+            mortars = mesh.nmortars,
+            boundaries = mesh.nboundaries)
+end
+
+# Compatibility to `dgsem_p4est/containers.jl`.
+function init_interfaces!(interfaces, mesh::T8codeMesh)
+    # Already computed. Do nothing.
+    return nothing
+end
+
+# Compatibility to `dgsem_p4est/containers.jl`.
+function init_mortars!(mortars, mesh::T8codeMesh)
+    # Already computed. Do nothing.
+    return nothing
+end
+
+# Compatibility to `dgsem_p4est/containers.jl`.
+function init_boundaries!(boundaries, mesh::T8codeMesh)
+    # Already computed. Do nothing.
+    return nothing
+end
diff --git a/src/solvers/dgsem_t8code/containers_2d.jl b/src/solvers/dgsem_t8code/containers_2d.jl
new file mode 100644
index 00000000000..029e6674afb
--- /dev/null
+++ b/src/solvers/dgsem_t8code/containers_2d.jl
@@ -0,0 +1,58 @@
+@muladd begin
+#! format: noindent
+
+# Interpolate tree_node_coordinates to each quadrant at the specified nodes.
+function calc_node_coordinates!(node_coordinates,
+                                mesh::T8codeMesh{2},
+                                nodes::AbstractVector)
+    # We use `StrideArray`s here since these buffers are used in performance-critical
+    # places and the additional information passed to the compiler makes them faster
+    # than native `Array`s.
+    tmp1 = StrideArray(undef, real(mesh),
+                       StaticInt(2), static_length(nodes), static_length(mesh.nodes))
+    matrix1 = StrideArray(undef, real(mesh),
+                          static_length(nodes), static_length(mesh.nodes))
+    matrix2 = similar(matrix1)
+    baryweights_in = barycentric_weights(mesh.nodes)
+
+    num_local_trees = t8_forest_get_num_local_trees(mesh.forest)
+
+    current_index = 0
+    for itree in 0:(num_local_trees - 1)
+        tree_class = t8_forest_get_tree_class(mesh.forest, itree)
+        eclass_scheme = t8_forest_get_eclass_scheme(mesh.forest, tree_class)
+        num_elements_in_tree = t8_forest_get_tree_num_elements(mesh.forest, itree)
+
+        for ielement in 0:(num_elements_in_tree - 1)
+            element = t8_forest_get_element_in_tree(mesh.forest, itree, ielement)
+            element_level = t8_element_level(eclass_scheme, element)
+
+            element_length = t8_quad_len(element_level) / t8_quad_root_len
+
+            element_coords = Array{Float64}(undef, 3)
+            t8_element_vertex_reference_coords(eclass_scheme, element, 0,
+                                               pointer(element_coords))
+
+            nodes_out_x = 2 *
+                          (element_length * 1 / 2 * (nodes .+ 1) .+ element_coords[1]) .-
+                          1
+            nodes_out_y = 2 *
+                          (element_length * 1 / 2 * (nodes .+ 1) .+ element_coords[2]) .-
+                          1
+
+            polynomial_interpolation_matrix!(matrix1, mesh.nodes, nodes_out_x,
+                                             baryweights_in)
+            polynomial_interpolation_matrix!(matrix2, mesh.nodes, nodes_out_y,
+                                             baryweights_in)
+
+            multiply_dimensionwise!(view(node_coordinates, :, :, :, current_index += 1),
+                                    matrix1, matrix2,
+                                    view(mesh.tree_node_coordinates, :, :, :,
+                                         itree + 1),
+                                    tmp1)
+        end
+    end
+
+    return node_coordinates
+end
+end # @muladd
diff --git a/src/solvers/dgsem_t8code/dg.jl b/src/solvers/dgsem_t8code/dg.jl
new file mode 100644
index 00000000000..16a9d7d35b1
--- /dev/null
+++ b/src/solvers/dgsem_t8code/dg.jl
@@ -0,0 +1,31 @@
+@muladd begin
+#! format: noindent
+
+# This method is called when a SemidiscretizationHyperbolic is constructed.
+# It constructs the basic `cache` used throughout the simulation to compute
+# the RHS etc.
+function create_cache(mesh::T8codeMesh, equations::AbstractEquations, dg::DG, ::Any,
+                      ::Type{uEltype}) where {uEltype <: Real}
+    count_required_surfaces!(mesh)
+
+    elements = init_elements(mesh, equations, dg.basis, uEltype)
+    interfaces = init_interfaces(mesh, equations, dg.basis, elements)
+    boundaries = init_boundaries(mesh, equations, dg.basis, elements)
+    mortars = init_mortars(mesh, equations, dg.basis, elements)
+
+    trixi_t8_fill_mesh_info(mesh.forest, elements, interfaces, mortars, boundaries,
+                            mesh.boundary_names)
+
+    cache = (; elements, interfaces, boundaries, mortars)
+
+    # Add specialized parts of the cache required to compute the volume integral etc.
+    cache = (; cache...,
+             create_cache(mesh, equations, dg.volume_integral, dg, uEltype)...)
+    cache = (; cache..., create_cache(mesh, equations, dg.mortar, uEltype)...)
+
+    return cache
+end
+
+include("containers.jl")
+include("containers_2d.jl")
+end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_2d.jl b/src/solvers/dgsem_tree/dg_2d.jl
index 6c5e0cee0cf..c30d0a8e01a 100644
--- a/src/solvers/dgsem_tree/dg_2d.jl
+++ b/src/solvers/dgsem_tree/dg_2d.jl
@@ -37,14 +37,14 @@ end
 # The methods below are specialized on the volume integral type
 # and called from the basic `create_cache` method at the top.
 function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
-                                  P4estMesh{2}},
+                                  P4estMesh{2}, T8codeMesh{2}},
                       equations, volume_integral::VolumeIntegralFluxDifferencing,
                       dg::DG, uEltype)
     NamedTuple()
 end
 
 function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
-                                  P4estMesh{2}}, equations,
+                                  P4estMesh{2}, T8codeMesh{2}}, equations,
                       volume_integral::VolumeIntegralShockCapturingHG, dg::DG, uEltype)
     element_ids_dg = Int[]
     element_ids_dgfv = Int[]
@@ -70,7 +70,7 @@ function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMe
 end
 
 function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
-                                  P4estMesh{2}}, equations,
+                                  P4estMesh{2}, T8codeMesh{2}}, equations,
                       volume_integral::VolumeIntegralPureLGLFiniteVolume, dg::DG,
                       uEltype)
     A3dp1_x = Array{uEltype, 3}
@@ -92,7 +92,7 @@ end
 # The methods below are specialized on the mortar type
 # and called from the basic `create_cache` method at the top.
 function create_cache(mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
-                                  P4estMesh{2}},
+                                  P4estMesh{2}, T8codeMesh{2}},
                       equations, mortar_l2::LobattoLegendreMortarL2, uEltype)
     # TODO: Taal performance using different types
     MA2d = MArray{Tuple{nvariables(equations), nnodes(mortar_l2)}, uEltype, 2,
@@ -110,7 +110,7 @@ end
 # TODO: Taal discuss/refactor timer, allowing users to pass a custom timer?
 
 function rhs!(du, u, t,
-              mesh::Union{TreeMesh{2}, P4estMesh{2}}, equations,
+              mesh::Union{TreeMesh{2}, P4estMesh{2}, T8codeMesh{2}}, equations,
               initial_condition, boundary_conditions, source_terms::Source,
               dg::DG, cache) where {Source}
     # Reset du
@@ -180,7 +180,8 @@ end
 
 function calc_volume_integral!(du, u,
                                mesh::Union{TreeMesh{2}, StructuredMesh{2},
-                                           UnstructuredMesh2D, P4estMesh{2}},
+                                           UnstructuredMesh2D, P4estMesh{2},
+                                           T8codeMesh{2}},
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralWeakForm,
                                dg::DGSEM, cache)
@@ -226,7 +227,8 @@ end
 # from the evaluation of the physical fluxes in each Cartesian direction
 function calc_volume_integral!(du, u,
                                mesh::Union{TreeMesh{2}, StructuredMesh{2},
-                                           UnstructuredMesh2D, P4estMesh{2}},
+                                           UnstructuredMesh2D, P4estMesh{2},
+                                           T8codeMesh{2}},
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralFluxDifferencing,
                                dg::DGSEM, cache)
@@ -322,7 +324,8 @@ end
 # TODO: Taal dimension agnostic
 function calc_volume_integral!(du, u,
                                mesh::Union{TreeMesh{2}, StructuredMesh{2},
-                                           UnstructuredMesh2D, P4estMesh{2}},
+                                           UnstructuredMesh2D, P4estMesh{2},
+                                           T8codeMesh{2}},
                                nonconservative_terms, equations,
                                volume_integral::VolumeIntegralShockCapturingHG,
                                dg::DGSEM, cache)
@@ -381,7 +384,8 @@ end
 
 @inline function fv_kernel!(du, u,
                             mesh::Union{TreeMesh{2}, StructuredMesh{2},
-                                        UnstructuredMesh2D, P4estMesh{2}},
+                                        UnstructuredMesh2D, P4estMesh{2}, T8codeMesh{2}
+                                        },
                             nonconservative_terms, equations,
                             volume_flux_fv, dg::DGSEM, cache, element, alpha = true)
     @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded = cache
diff --git a/src/solvers/dgsem_tree/indicators_2d.jl b/src/solvers/dgsem_tree/indicators_2d.jl
index f7c78547174..2f34e0eb661 100644
--- a/src/solvers/dgsem_tree/indicators_2d.jl
+++ b/src/solvers/dgsem_tree/indicators_2d.jl
@@ -208,7 +208,8 @@ end
 end
 
 # Diffuse alpha values by setting each alpha to at least 50% of neighboring elements' alpha
-function apply_smoothing!(mesh::Union{TreeMesh{2}, P4estMesh{2}}, alpha, alpha_tmp, dg,
+function apply_smoothing!(mesh::Union{TreeMesh{2}, P4estMesh{2}, T8codeMesh{2}}, alpha,
+                          alpha_tmp, dg,
                           cache)
     # Copy alpha values such that smoothing is indpedenent of the element access order
     alpha_tmp .= alpha
diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl
index 95dec027a82..7b8dafdddd2 100644
--- a/src/solvers/dgsem_unstructured/dg_2d.jl
+++ b/src/solvers/dgsem_unstructured/dg_2d.jl
@@ -307,14 +307,14 @@ end
 
 # TODO: Taal dimension agnostic
 function calc_boundary_flux!(cache, t, boundary_condition::BoundaryConditionPeriodic,
-                             mesh::Union{UnstructuredMesh2D, P4estMesh},
+                             mesh::Union{UnstructuredMesh2D, P4estMesh, T8codeMesh},
                              equations, surface_integral, dg::DG)
     @assert isempty(eachboundary(dg, cache))
 end
 
 # Function barrier for type stability
 function calc_boundary_flux!(cache, t, boundary_conditions,
-                             mesh::Union{UnstructuredMesh2D, P4estMesh},
+                             mesh::Union{UnstructuredMesh2D, P4estMesh, T8codeMesh},
                              equations, surface_integral, dg::DG)
     @unpack boundary_condition_types, boundary_indices = boundary_conditions
 
@@ -327,7 +327,8 @@ end
 # in a type-stable way using "lispy tuple programming".
 function calc_boundary_flux_by_type!(cache, t, BCs::NTuple{N, Any},
                                      BC_indices::NTuple{N, Vector{Int}},
-                                     mesh::Union{UnstructuredMesh2D, P4estMesh},
+                                     mesh::Union{UnstructuredMesh2D, P4estMesh,
+                                                 T8codeMesh},
                                      equations, surface_integral, dg::DG) where {N}
     # Extract the boundary condition type and index vector
     boundary_condition = first(BCs)
@@ -350,7 +351,8 @@ end
 
 # terminate the type-stable iteration over tuples
 function calc_boundary_flux_by_type!(cache, t, BCs::Tuple{}, BC_indices::Tuple{},
-                                     mesh::Union{UnstructuredMesh2D, P4estMesh},
+                                     mesh::Union{UnstructuredMesh2D, P4estMesh,
+                                                 T8codeMesh},
                                      equations, surface_integral, dg::DG)
     nothing
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index f76811dddbf..1d7eefe1fcb 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,113 +4,119 @@ using MPI: mpiexec
 # run tests on Travis CI in parallel
 const TRIXI_TEST = get(ENV, "TRIXI_TEST", "all")
 const TRIXI_MPI_NPROCS = clamp(Sys.CPU_THREADS, 2, 3)
-const TRIXI_NTHREADS   = clamp(Sys.CPU_THREADS, 2, 3)
+const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3)
 
 @time @testset "Trixi.jl tests" begin
-  # This is placed first since tests error out otherwise if `TRIXI_TEST == "all"`,
-  # at least on some systems.
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "mpi"
-    # Do a dummy `@test true`:
-    # If the process errors out the testset would error out as well,
-    # cf. https://github.com/JuliaParallel/MPI.jl/pull/391
-    @test true
-
-    # There are spurious test failures of Trixi.jl with MPI on Windows, see
-    # https://github.com/trixi-framework/Trixi.jl/issues/901
-    # To reduce their impact, we do not test MPI with coverage on Windows.
-    # This reduces the chance to hit a spurious test failure by one half.
-    # In addition, it looks like the Linux GitHub runners run out of memory during the 3D tests
-    # with coverage, so we currently do not test MPI with coverage on Linux. For more details,
-    # see the discussion at https://github.com/trixi-framework/Trixi.jl/pull/1062#issuecomment-1035901020
-    cmd = string(Base.julia_cmd())
-    coverage = occursin("--code-coverage", cmd) && !occursin("--code-coverage=none", cmd)
-    if !(coverage && Sys.iswindows()) && !(coverage && Sys.islinux())
-      # We provide a `--heap-size-hint` to avoid/reduce out-of-memory errors during CI testing
-      mpiexec() do cmd
-        run(`$cmd -n $TRIXI_MPI_NPROCS $(Base.julia_cmd()) --threads=1 --check-bounds=yes --heap-size-hint=1G $(abspath("test_mpi.jl"))`)
-      end
-    end
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "threaded" || TRIXI_TEST == "threaded_legacy"
-    # Do a dummy `@test true`:
-    # If the process errors out the testset would error out as well,
-    # cf. https://github.com/JuliaParallel/MPI.jl/pull/391
-    @test true
-
-    run(`$(Base.julia_cmd()) --threads=$TRIXI_NTHREADS --check-bounds=yes --code-coverage=none $(abspath("test_threaded.jl"))`)
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "tree_part1"
-    include("test_tree_1d.jl")
-    include("test_tree_2d_part1.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "tree_part2"
-    include("test_tree_2d_part2.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "tree_part3"
-    include("test_tree_2d_part3.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "tree_part4"
-    include("test_tree_3d_part1.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "tree_part5"
-    include("test_tree_3d_part2.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "tree_part6"
-    include("test_tree_3d_part3.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "structured"
-    include("test_structured_1d.jl")
-    include("test_structured_2d.jl")
-    include("test_structured_3d.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "p4est_part1"
-    include("test_p4est_2d.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "p4est_part2"
-    include("test_p4est_3d.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "unstructured_dgmulti"
-    include("test_unstructured_2d.jl")
-    include("test_dgmulti_1d.jl")
-    include("test_dgmulti_2d.jl")
-    include("test_dgmulti_3d.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "parabolic"
-    include("test_parabolic_1d.jl")
-    include("test_parabolic_2d.jl")
-    include("test_parabolic_3d.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "misc_part1"
-    include("test_unit.jl")
-    include("test_visualization.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "misc_part2"
-    include("test_special_elixirs.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "performance_specializations_part1"
-    include("test_performance_specializations_2d.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "performance_specializations_part2"
-    include("test_performance_specializations_3d.jl")
-  end
-
-  @time if TRIXI_TEST == "all" || TRIXI_TEST == "paper_self_gravitating_gas_dynamics"
-    include("test_paper_self_gravitating_gas_dynamics.jl")
-  end
+    # This is placed first since tests error out otherwise if `TRIXI_TEST == "all"`,
+    # at least on some systems.
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "mpi"
+        # Do a dummy `@test true`:
+        # If the process errors out the testset would error out as well,
+        # cf. https://github.com/JuliaParallel/MPI.jl/pull/391
+        @test true
+
+        # There are spurious test failures of Trixi.jl with MPI on Windows, see
+        # https://github.com/trixi-framework/Trixi.jl/issues/901
+        # To reduce their impact, we do not test MPI with coverage on Windows.
+        # This reduces the chance to hit a spurious test failure by one half.
+        # In addition, it looks like the Linux GitHub runners run out of memory during the 3D tests
+        # with coverage, so we currently do not test MPI with coverage on Linux. For more details,
+        # see the discussion at https://github.com/trixi-framework/Trixi.jl/pull/1062#issuecomment-1035901020
+        cmd = string(Base.julia_cmd())
+        coverage = occursin("--code-coverage", cmd) &&
+                   !occursin("--code-coverage=none", cmd)
+        if !(coverage && Sys.iswindows()) && !(coverage && Sys.islinux())
+            # We provide a `--heap-size-hint` to avoid/reduce out-of-memory errors during CI testing
+            mpiexec() do cmd
+                run(`$cmd -n $TRIXI_MPI_NPROCS $(Base.julia_cmd()) --threads=1 --check-bounds=yes --heap-size-hint=1G $(abspath("test_mpi.jl"))`)
+            end
+        end
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "threaded" ||
+             TRIXI_TEST == "threaded_legacy"
+        # Do a dummy `@test true`:
+        # If the process errors out the testset would error out as well,
+        # cf. https://github.com/JuliaParallel/MPI.jl/pull/391
+        @test true
+
+        run(`$(Base.julia_cmd()) --threads=$TRIXI_NTHREADS --check-bounds=yes --code-coverage=none $(abspath("test_threaded.jl"))`)
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "tree_part1"
+        include("test_tree_1d.jl")
+        include("test_tree_2d_part1.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "tree_part2"
+        include("test_tree_2d_part2.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "tree_part3"
+        include("test_tree_2d_part3.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "tree_part4"
+        include("test_tree_3d_part1.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "tree_part5"
+        include("test_tree_3d_part2.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "tree_part6"
+        include("test_tree_3d_part3.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "structured"
+        include("test_structured_1d.jl")
+        include("test_structured_2d.jl")
+        include("test_structured_3d.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "p4est_part1"
+        include("test_p4est_2d.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "p4est_part2"
+        include("test_p4est_3d.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "t8code_part1"
+        include("test_t8code_2d.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "unstructured_dgmulti"
+        include("test_unstructured_2d.jl")
+        include("test_dgmulti_1d.jl")
+        include("test_dgmulti_2d.jl")
+        include("test_dgmulti_3d.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "parabolic"
+        include("test_parabolic_1d.jl")
+        include("test_parabolic_2d.jl")
+        include("test_parabolic_3d.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "misc_part1"
+        include("test_unit.jl")
+        include("test_visualization.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "misc_part2"
+        include("test_special_elixirs.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "performance_specializations_part1"
+        include("test_performance_specializations_2d.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "performance_specializations_part2"
+        include("test_performance_specializations_3d.jl")
+    end
+
+    @time if TRIXI_TEST == "all" || TRIXI_TEST == "paper_self_gravitating_gas_dynamics"
+        include("test_paper_self_gravitating_gas_dynamics.jl")
+    end
 end
diff --git a/test/test_t8code_2d.jl b/test/test_t8code_2d.jl
new file mode 100644
index 00000000000..a424c9df84b
--- /dev/null
+++ b/test/test_t8code_2d.jl
@@ -0,0 +1,182 @@
+module TestExamplesT8codeMesh2D
+
+using Test
+using Trixi
+
+include("test_trixi.jl")
+
+EXAMPLES_DIR = joinpath(examples_dir(), "t8code_2d_dgsem")
+
+# Start with a clean environment: remove Trixi.jl output directory if it exists
+outdir = "out"
+isdir(outdir) && rm(outdir, recursive = true)
+mkdir(outdir)
+
+@testset "T8codeMesh2D" begin
+
+    @trixi_testset "test save_mesh_file" begin
+      @test_throws Exception begin
+        # Save mesh file support will be added in the future. The following
+        # lines of code are here for satisfying code coverage.
+
+        # Create dummy mesh.
+        mesh = T8codeMesh((1, 1), polydeg = 1,
+                          mapping = Trixi.coordinates2mapping((-1.0, -1.0),  ( 1.0,  1.0)),
+                          initial_refinement_level = 1)
+
+        # This call throws an error.
+        Trixi.save_mesh_file(mesh, "dummy")
+      end
+    end
+
+    @trixi_testset "elixir_advection_basic.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_basic.jl"),
+                            # Expected errors are exactly the same as with TreeMesh!
+                            l2=[8.311947673061856e-6],
+                            linf=[6.627000273229378e-5])
+    end
+
+    @trixi_testset "elixir_advection_nonconforming_flag.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                     "elixir_advection_nonconforming_flag.jl"),
+                            l2=[3.198940059144588e-5],
+                            linf=[0.00030636069494005547])
+    end
+
+    @trixi_testset "elixir_advection_unstructured_flag.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_unstructured_flag.jl"),
+                            l2=[0.0005379687442422346],
+                            linf=[0.007438525029884735])
+    end
+
+    @trixi_testset "elixir_advection_amr_unstructured_flag.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                     "elixir_advection_amr_unstructured_flag.jl"),
+                            l2=[0.001993165013217687],
+                            linf=[0.032891018571625796],
+                            coverage_override=(maxiters = 6,))
+    end
+
+    @trixi_testset "elixir_advection_amr_solution_independent.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                     "elixir_advection_amr_solution_independent.jl"),
+                            # Expected errors are exactly the same as with StructuredMesh!
+                            l2=[4.949660644033807e-5],
+                            linf=[0.0004867846262313763],
+                            coverage_override=(maxiters = 6,))
+    end
+
+    @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_flag.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                     "elixir_euler_source_terms_nonconforming_unstructured_flag.jl"),
+                            l2=[
+                                0.0034516244508588046,
+                                0.0023420334036925493,
+                                0.0024261923964557187,
+                                0.004731710454271893,
+                            ],
+                            linf=[
+                                0.04155789011775046,
+                                0.024772109862748914,
+                                0.03759938693042297,
+                                0.08039824959535657,
+                            ])
+    end
+
+    @trixi_testset "elixir_euler_free_stream.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_free_stream.jl"),
+                            l2=[
+                                2.063350241405049e-15,
+                                1.8571016296925367e-14,
+                                3.1769447886391905e-14,
+                                1.4104095258528071e-14,
+                            ],
+                            linf=[1.9539925233402755e-14, 2e-12, 4.8e-12, 4e-12],
+                            atol=2.0e-12,)
+    end
+
+    @trixi_testset "elixir_euler_shockcapturing_ec.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_shockcapturing_ec.jl"),
+                            l2=[
+                                9.53984675e-02,
+                                1.05633455e-01,
+                                1.05636158e-01,
+                                3.50747237e-01,
+                            ],
+                            linf=[
+                                2.94357464e-01,
+                                4.07893014e-01,
+                                3.97334516e-01,
+                                1.08142520e+00,
+                            ],
+                            tspan=(0.0, 1.0))
+    end
+
+    @trixi_testset "elixir_euler_sedov.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov.jl"),
+                            l2=[
+                                3.76149952e-01,
+                                2.46970327e-01,
+                                2.46970327e-01,
+                                1.28889042e+00,
+                            ],
+                            linf=[
+                                1.22139001e+00,
+                                1.17742626e+00,
+                                1.17742626e+00,
+                                6.20638482e+00,
+                            ],
+                            tspan=(0.0, 0.3))
+    end
+
+    @trixi_testset "elixir_shallowwater_source_terms.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_source_terms.jl"),
+                            l2=[
+                                9.168126407325352e-5,
+                                0.0009795410115453788,
+                                0.002546408320320785,
+                                3.941189812642317e-6,
+                            ],
+                            linf=[
+                                0.0009903782521019089,
+                                0.0059752684687262025,
+                                0.010941106525454103,
+                                1.2129488214718265e-5,
+                            ],
+                            tspan=(0.0, 0.1))
+    end
+
+    @trixi_testset "elixir_mhd_alfven_wave.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_alfven_wave.jl"),
+                            l2=[1.0513414461545583e-5, 1.0517900957166411e-6,
+                                1.0517900957304043e-6, 1.511816606372376e-6,
+                                1.0443997728645063e-6, 7.879639064990798e-7,
+                                7.879639065049896e-7, 1.0628631669056271e-6,
+                                4.3382328912336153e-7],
+                            linf=[4.255466285174592e-5, 1.0029706745823264e-5,
+                                1.0029706747467781e-5, 1.2122265939010224e-5,
+                                5.4791097160444835e-6, 5.18922042269665e-6,
+                                5.189220422141538e-6, 9.552667261422676e-6,
+                                1.4237578427628152e-6])
+    end
+
+    @trixi_testset "elixir_mhd_rotor.jl" begin
+        @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_rotor.jl"),
+                            l2=[0.44211360369891683, 0.8805178316216257, 0.8262710688468049,
+                                0.0,
+                                0.9616090460973586, 0.10386643568745411,
+                                0.15403457366543802, 0.0,
+                                2.8399715649715473e-5],
+                            linf=[10.04369305341599, 17.995640564998403, 9.576041548174265,
+                                0.0,
+                                19.429658884314534, 1.3821395681242314, 1.818559351543182,
+                                0.0,
+                                0.002261930217575465],
+                            tspan=(0.0, 0.02))
+    end
+end
+
+# Clean up afterwards: delete Trixi.jl output directory
+@test_nowarn rm(outdir, recursive = true)
+
+end # module
diff --git a/test/test_threaded.jl b/test/test_threaded.jl
index 77fa16ad33e..9b30836d0ed 100644
--- a/test/test_threaded.jl
+++ b/test/test_threaded.jl
@@ -235,6 +235,22 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
   end
 
 
+  @testset "T8codeMesh" begin
+    @trixi_testset "elixir_euler_source_terms_nonconforming_unstructured_flag.jl" begin
+      @test_trixi_include(joinpath(examples_dir(), "t8code_2d_dgsem", "elixir_euler_source_terms_nonconforming_unstructured_flag.jl"),
+        l2   = [0.0034516244508588046, 0.0023420334036925493, 0.0024261923964557187, 0.004731710454271893],
+        linf = [0.04155789011775046, 0.024772109862748914, 0.03759938693042297, 0.08039824959535657])
+    end
+
+    @trixi_testset "elixir_eulergravity_convergence.jl" begin
+      @test_trixi_include(joinpath(examples_dir(), "t8code_2d_dgsem", "elixir_eulergravity_convergence.jl"),
+        l2   = [0.00024871265138964204, 0.0003370077102132591, 0.0003370077102131964, 0.0007231525513793697],
+        linf = [0.0015813032944647087, 0.0020494288423820173, 0.0020494288423824614, 0.004793821195083758],
+        tspan = (0.0, 0.1))
+    end
+  end
+
+
   @testset "DGMulti" begin
     @trixi_testset "elixir_euler_weakform.jl (SBP, EC)" begin
       @test_trixi_include(joinpath(examples_dir(), "dgmulti_2d", "elixir_euler_weakform.jl"),

From e1e680ca8574acd10daa2e5bc5e1f49e1ce008f9 Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Fri, 28 Jul 2023 04:35:33 +0200
Subject: [PATCH 104/163] Enstrophy for 2D Navier-Stokes (#1591)

* Doubly periodic shear layer

* test if prject toml shows up in git diff

* remove chnages

* Enstrophy for 2D Navier-Stokes
---
 Project.toml                                  |  2 +-
 .../elixir_navierstokes_shear_layer.jl        | 71 +++++++++++++++++++
 src/callbacks_step/analysis_dg2d.jl           | 18 +++++
 .../compressible_navier_stokes_2d.jl          | 15 ++++
 test/test_parabolic_2d.jl                     |  4 ++
 5 files changed, 109 insertions(+), 1 deletion(-)
 create mode 100644 examples/tree_2d_dgsem/elixir_navierstokes_shear_layer.jl

diff --git a/Project.toml b/Project.toml
index db410317851..b3ca99be9ec 100644
--- a/Project.toml
+++ b/Project.toml
@@ -60,8 +60,8 @@ HDF5 = "0.14, 0.15, 0.16"
 IfElse = "0.1"
 LinearMaps = "2.7, 3.0"
 LoopVectorization = "0.12.118"
-Makie = "0.19"
 MPI = "0.20"
+Makie = "0.19"
 MuladdMacro = "0.2.2"
 Octavian = "0.3.5"
 OffsetArrays = "1.3"
diff --git a/examples/tree_2d_dgsem/elixir_navierstokes_shear_layer.jl b/examples/tree_2d_dgsem/elixir_navierstokes_shear_layer.jl
new file mode 100644
index 00000000000..a7cb2fc89f1
--- /dev/null
+++ b/examples/tree_2d_dgsem/elixir_navierstokes_shear_layer.jl
@@ -0,0 +1,71 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Navier-Stokes equations
+
+# TODO: parabolic; unify names of these accessor functions
+prandtl_number() = 0.72
+mu() = 1.0/3.0 * 10^(-3) # equivalent to Re = 3000
+
+equations = CompressibleEulerEquations2D(1.4)
+equations_parabolic = CompressibleNavierStokesDiffusion2D(equations, mu=mu(),
+                                                          Prandtl=prandtl_number())
+
+function initial_condition_shear_layer(x, t, equations::CompressibleEulerEquations2D)
+  k = 80
+  delta = 0.05
+  u0 = 1.0
+  Ms = 0.1 # maximum Mach number
+
+  rho = 1.0
+  v1  = x[2] <= 0.5 ? u0*tanh(k*(x[2]*0.5 - 0.25)) : tanh(k*(0.75 -x[2]*0.5))
+  v2  = u0*delta * sin(2*pi*(x[1]*0.5 + 0.25))
+  p   = (u0 / Ms)^2 * rho / equations.gamma # scaling to get Ms
+
+  return prim2cons(SVector(rho, v1, v2, p), equations)
+end
+initial_condition = initial_condition_shear_layer
+
+volume_flux = flux_ranocha
+solver = DGSEM(polydeg=3, surface_flux=flux_hllc,
+               volume_integral=VolumeIntegralFluxDifferencing(volume_flux))
+
+coordinates_min = (0.0, 0.0)
+coordinates_max = (1.0, 1.0)
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=4,
+                n_cells_max=100_000)
+
+
+semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic),
+                                             initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 2.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 50
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=true,
+                                     extra_analysis_integrals=(energy_kinetic,
+                                                               energy_internal,
+                                                               enstrophy))
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval,)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback)
+
+###############################################################################
+# run the simulation
+
+time_int_tol = 1e-8
+sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
+            ode_default_options()..., callback=callbacks)
+summary_callback() # print the timer summary
\ No newline at end of file
diff --git a/src/callbacks_step/analysis_dg2d.jl b/src/callbacks_step/analysis_dg2d.jl
index 4e456f79872..aecabf0e4b7 100644
--- a/src/callbacks_step/analysis_dg2d.jl
+++ b/src/callbacks_step/analysis_dg2d.jl
@@ -213,6 +213,24 @@ function integrate(func::Func, u,
     end
 end
 
+function integrate(func::Func, u,
+                   mesh::Union{TreeMesh{2}, P4estMesh{2}},
+                   equations, equations_parabolic,
+                   dg::DGSEM,
+                   cache, cache_parabolic; normalize = true) where {Func}
+    gradients_x, gradients_y = cache_parabolic.gradients
+    integrate_via_indices(u, mesh, equations, dg, cache;
+                          normalize = normalize) do u, i, j, element, equations, dg
+        u_local = get_node_vars(u, equations, dg, i, j, element)
+        gradients_1_local = get_node_vars(gradients_x, equations_parabolic, dg, i, j,
+                                          element)
+        gradients_2_local = get_node_vars(gradients_y, equations_parabolic, dg, i, j,
+                                          element)
+        return func(u_local, (gradients_1_local, gradients_2_local),
+                    equations_parabolic)
+    end
+end
+
 function analyze(::typeof(entropy_timederivative), du, u, t,
                  mesh::Union{TreeMesh{2}, StructuredMesh{2}, UnstructuredMesh2D,
                              P4estMesh{2}, T8codeMesh{2}},
diff --git a/src/equations/compressible_navier_stokes_2d.jl b/src/equations/compressible_navier_stokes_2d.jl
index 9b06e0b5abf..a1f11717e69 100644
--- a/src/equations/compressible_navier_stokes_2d.jl
+++ b/src/equations/compressible_navier_stokes_2d.jl
@@ -300,6 +300,21 @@ end
     return T
 end
 
+@inline function enstrophy(u, gradients, equations::CompressibleNavierStokesDiffusion2D)
+    # Enstrophy is 0.5 rho ω⋅ω where ω = ∇ × v
+
+    omega = vorticity(u, gradients, equations)
+    return 0.5 * u[1] * omega^2
+end
+
+@inline function vorticity(u, gradients, equations::CompressibleNavierStokesDiffusion2D)
+    # Ensure that we have velocity `gradients` by way of the `convert_gradient_variables` function.
+    _, dv1dx, dv2dx, _ = convert_derivative_to_primitive(u, gradients[1], equations)
+    _, dv1dy, dv2dy, _ = convert_derivative_to_primitive(u, gradients[2], equations)
+
+    return dv2dx - dv1dy
+end
+
 # TODO: can we generalize this to MHD?
 """
     struct BoundaryConditionNavierStokesWall
diff --git a/test/test_parabolic_2d.jl b/test/test_parabolic_2d.jl
index 471b976e990..57f296b55fe 100644
--- a/test/test_parabolic_2d.jl
+++ b/test/test_parabolic_2d.jl
@@ -136,6 +136,10 @@ isdir(outdir) && rm(outdir, recursive=true)
   @trixi_testset "TreeMesh2D: elixir_navierstokes_convergence.jl" begin
     @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_navierstokes_convergence.jl"),
       initial_refinement_level = 2, tspan=(0.0, 0.1),
+      analysis_callback = AnalysisCallback(semi, interval=analysis_interval,
+      extra_analysis_integrals=(energy_kinetic,
+                                energy_internal,
+                                enstrophy)),
       l2 = [0.002111672530658797, 0.0034322351490857846, 0.0038742528195910416, 0.012469246082568561],
       linf = [0.012006418939223495, 0.035520871209746126, 0.024512747492231427, 0.11191122588756564]
     )

From 73e58dc59ad0e06616507b9338c8fe0bee5b99b4 Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Fri, 28 Jul 2023 23:28:53 -0500
Subject: [PATCH 105/163] remove CI functions that cause preocmpilation errors
 (#1593)

---
 src/solvers/dgsem_p4est/dg_3d_parabolic.jl | 54 ----------------------
 1 file changed, 54 deletions(-)

diff --git a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl
index 5370c927e05..6439cad69bb 100644
--- a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl
+++ b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl
@@ -563,60 +563,6 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
     return nothing
 end
 
-# # Function barrier for type stability
-# !!! TODO: Figure out why this cannot removed eventhough it exists in the dg_2d_parabolic.jl file
-function calc_boundary_flux_gradients!(cache, t, boundary_conditions, mesh::P4estMesh,
-                                       equations, surface_integral, dg::DG)
-    (; boundary_condition_types, boundary_indices) = boundary_conditions
-
-    calc_boundary_flux_by_type!(cache, t, boundary_condition_types, boundary_indices,
-                                Gradient(), mesh, equations, surface_integral, dg)
-    return nothing
-end
-
-function calc_boundary_flux_divergence!(cache, t, boundary_conditions, mesh::P4estMesh,
-                                        equations, surface_integral, dg::DG)
-    (; boundary_condition_types, boundary_indices) = boundary_conditions
-
-    calc_boundary_flux_by_type!(cache, t, boundary_condition_types, boundary_indices,
-                                Divergence(), mesh, equations, surface_integral, dg)
-    return nothing
-end
-
-# Iterate over tuples of boundary condition types and associated indices
-# in a type-stable way using "lispy tuple programming".
-function calc_boundary_flux_by_type!(cache, t, BCs::NTuple{N, Any},
-                                     BC_indices::NTuple{N, Vector{Int}},
-                                     operator_type,
-                                     mesh::P4estMesh,
-                                     equations, surface_integral, dg::DG) where {N}
-    # Extract the boundary condition type and index vector
-    boundary_condition = first(BCs)
-    boundary_condition_indices = first(BC_indices)
-    # Extract the remaining types and indices to be processed later
-    remaining_boundary_conditions = Base.tail(BCs)
-    remaining_boundary_condition_indices = Base.tail(BC_indices)
-
-    # process the first boundary condition type
-    calc_boundary_flux!(cache, t, boundary_condition, boundary_condition_indices,
-                        operator_type, mesh, equations, surface_integral, dg)
-
-    # recursively call this method with the unprocessed boundary types
-    calc_boundary_flux_by_type!(cache, t, remaining_boundary_conditions,
-                                remaining_boundary_condition_indices,
-                                operator_type,
-                                mesh, equations, surface_integral, dg)
-
-    return nothing
-end
-
-# terminate the type-stable iteration over tuples
-function calc_boundary_flux_by_type!(cache, t, BCs::Tuple{}, BC_indices::Tuple{},
-                                     operator_type, mesh::P4estMesh, equations,
-                                     surface_integral, dg::DG)
-    nothing
-end
-
 function calc_boundary_flux!(cache, t,
                              boundary_condition_parabolic, # works with Dict types
                              boundary_condition_indices,

From d05f9c5bfc329db3448a7af18bb1c24cfb75deb2 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Sun, 30 Jul 2023 08:19:37 +0200
Subject: [PATCH 106/163] run only threaded tests by default (#1592)

---
 test/runtests.jl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 1d7eefe1fcb..1b0c745dbfd 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,8 +1,11 @@
 using Test
 using MPI: mpiexec
 
-# run tests on Travis CI in parallel
-const TRIXI_TEST = get(ENV, "TRIXI_TEST", "all")
+# We run tests in parallel with CI jobs setting the `TRIXI_TEST` environment
+# variable to determine the subset of tests to execute.
+# By default, we just run the threaded tests since they are relatively cheap
+# and test a good amount of different functionality.
+const TRIXI_TEST = get(ENV, "TRIXI_TEST", "threaded")
 const TRIXI_MPI_NPROCS = clamp(Sys.CPU_THREADS, 2, 3)
 const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3)
 

From d208cee2690fb5b1d63a0511ad5f73967d340205 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Sun, 30 Jul 2023 09:47:44 +0200
Subject: [PATCH 107/163] set version to v0.5.37

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index b3ca99be9ec..1d06317f53a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.37-pre"
+version = "0.5.37"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From e76ea3932d875774220811ff0c14c8e966c312bf Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Sun, 30 Jul 2023 09:47:57 +0200
Subject: [PATCH 108/163] set development version to v0.5.38-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 1d06317f53a..c22d4b90642 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.37"
+version = "0.5.38-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 6c97c48e53feb9fe372dc020cbd5f3e1e8fef458 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 8 Aug 2023 07:15:15 +0200
Subject: [PATCH 109/163] Bump crate-ci/typos from 1.16.1 to 1.16.2 (#1598)

* Bump crate-ci/typos from 1.16.1 to 1.16.2

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.16.1 to 1.16.2.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.16.1...v1.16.2)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* f_sur -> f_surface

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Hendrik Ranocha <mail@ranocha.de>
---
 .github/workflows/SpellCheck.yml          | 2 +-
 docs/literate/src/files/DGSEM_FluxDiff.jl | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index f72c3b0947b..a1a429cad97 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v3
       - name: Check spelling
-        uses: crate-ci/typos@v1.16.1
+        uses: crate-ci/typos@v1.16.2
diff --git a/docs/literate/src/files/DGSEM_FluxDiff.jl b/docs/literate/src/files/DGSEM_FluxDiff.jl
index cf3b0a1dbd4..5ec156ebbe3 100644
--- a/docs/literate/src/files/DGSEM_FluxDiff.jl
+++ b/docs/literate/src/files/DGSEM_FluxDiff.jl
@@ -96,13 +96,13 @@
 # \begin{align*}
 # J \underline{\dot{u}}(t) &= - M^{-1} B (\underline{f}^* - \underline{f}) - 2D \underline{f}_{vol}(u^-, u^+)\\[5pt]
 # &= - M^{-1} B (\underline{f}^* - \underline{f}_{vol}(\underline{u}, \underline{u})) - 2D \underline{f}_{vol}(u^-, u^+)\\[5pt]
-# &= - M^{-1} B \underline{f}_{sur}^* - (2D - M^{-1} B) \underline{f}_{vol}\\[5pt]
-# &= - M^{-1} B \underline{f}_{sur}^* - D_{split} \underline{f}_{vol}
+# &= - M^{-1} B \underline{f}_{surface}^* - (2D - M^{-1} B) \underline{f}_{vol}\\[5pt]
+# &= - M^{-1} B \underline{f}_{surface}^* - D_{split} \underline{f}_{vol}
 # \end{align*}
 # ```
 # This formulation is in a weak form type formulation and can be implemented by using the derivative
 # split matrix $D_{split}=(2D-M^{-1}B)$ and two different fluxes. We divide between the surface
-# flux $f=f_{sur}$ used for the numerical flux $f_{sur}^*$ and the already mentioned volume
+# flux $f=f_{surface}$ used for the numerical flux $f_{surface}^*$ and the already mentioned volume
 # flux $f_{vol}$ especially for this formulation.
 
 
From ddf089271c65d82b466711e59b5f791c0bd21021 Mon Sep 17 00:00:00 2001
From: Daniel Doehring <doehringd2@gmail.com>
Date: Tue, 8 Aug 2023 10:17:31 +0200
Subject: [PATCH 110/163] Avoid allocations in `boundary flux` for parabolic
 RHS (#1594)

* Remove doubled implementations

* kepp main updated with true main

* Avoid allocations in parabolic boundary fluxes

* Correct shear layer IC

* Whitespaces

* Update examples/tree_2d_dgsem/elixir_navierstokes_convergence.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update examples/tree_3d_dgsem/elixir_navierstokes_convergence.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 examples/tree_2d_dgsem/elixir_navierstokes_convergence.jl | 5 ++++-
 examples/tree_2d_dgsem/elixir_navierstokes_shear_layer.jl | 6 ++++--
 examples/tree_3d_dgsem/elixir_navierstokes_convergence.jl | 5 ++++-
 src/solvers/dgsem_tree/containers_2d.jl                   | 6 +++---
 src/solvers/dgsem_tree/containers_3d.jl                   | 8 ++++----
 5 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/examples/tree_2d_dgsem/elixir_navierstokes_convergence.jl b/examples/tree_2d_dgsem/elixir_navierstokes_convergence.jl
index 36a9f52e39d..b68e9e6c97e 100644
--- a/examples/tree_2d_dgsem/elixir_navierstokes_convergence.jl
+++ b/examples/tree_2d_dgsem/elixir_navierstokes_convergence.jl
@@ -170,7 +170,10 @@ end
 initial_condition = initial_condition_navier_stokes_convergence_test
 
 # BC types
-velocity_bc_top_bottom = NoSlip((x, t, equations) -> initial_condition_navier_stokes_convergence_test(x, t, equations)[2:3])
+velocity_bc_top_bottom = NoSlip() do x, t, equations
+    u = initial_condition_navier_stokes_convergence_test(x, t, equations)
+    return SVector(u[2], u[3])
+end
 heat_bc_top_bottom = Adiabatic((x, t, equations) -> 0.0)
 boundary_condition_top_bottom = BoundaryConditionNavierStokesWall(velocity_bc_top_bottom, heat_bc_top_bottom)
 
diff --git a/examples/tree_2d_dgsem/elixir_navierstokes_shear_layer.jl b/examples/tree_2d_dgsem/elixir_navierstokes_shear_layer.jl
index a7cb2fc89f1..dd26fd8097b 100644
--- a/examples/tree_2d_dgsem/elixir_navierstokes_shear_layer.jl
+++ b/examples/tree_2d_dgsem/elixir_navierstokes_shear_layer.jl
@@ -14,14 +14,16 @@ equations_parabolic = CompressibleNavierStokesDiffusion2D(equations, mu=mu(),
                                                           Prandtl=prandtl_number())
 
 function initial_condition_shear_layer(x, t, equations::CompressibleEulerEquations2D)
+  # Shear layer parameters
   k = 80
   delta = 0.05
   u0 = 1.0
+  
   Ms = 0.1 # maximum Mach number
 
   rho = 1.0
-  v1  = x[2] <= 0.5 ? u0*tanh(k*(x[2]*0.5 - 0.25)) : tanh(k*(0.75 -x[2]*0.5))
-  v2  = u0*delta * sin(2*pi*(x[1]*0.5 + 0.25))
+  v1  = x[2] <= 0.5 ? u0 * tanh(k*(x[2]*0.5 - 0.25)) : u0 * tanh(k*(0.75 -x[2]*0.5))
+  v2  = u0 * delta * sin(2*pi*(x[1]*0.5 + 0.25))
   p   = (u0 / Ms)^2 * rho / equations.gamma # scaling to get Ms
 
   return prim2cons(SVector(rho, v1, v2, p), equations)
diff --git a/examples/tree_3d_dgsem/elixir_navierstokes_convergence.jl b/examples/tree_3d_dgsem/elixir_navierstokes_convergence.jl
index b32355c48df..ebb0137a1bb 100644
--- a/examples/tree_3d_dgsem/elixir_navierstokes_convergence.jl
+++ b/examples/tree_3d_dgsem/elixir_navierstokes_convergence.jl
@@ -220,7 +220,10 @@ end
 initial_condition = initial_condition_navier_stokes_convergence_test
 
 # BC types
-velocity_bc_top_bottom = NoSlip((x, t, equations) -> initial_condition_navier_stokes_convergence_test(x, t, equations)[2:4])
+velocity_bc_top_bottom = NoSlip() do x, t, equations
+    u = initial_condition_navier_stokes_convergence_test(x, t, equations)
+    return  SVector(u[2], u[3], u[4])
+end
 heat_bc_top_bottom = Adiabatic((x, t, equations) -> 0.0)
 boundary_condition_top_bottom = BoundaryConditionNavierStokesWall(velocity_bc_top_bottom, heat_bc_top_bottom)
 
diff --git a/src/solvers/dgsem_tree/containers_2d.jl b/src/solvers/dgsem_tree/containers_2d.jl
index 5cf256d3499..d80522d42fd 100644
--- a/src/solvers/dgsem_tree/containers_2d.jl
+++ b/src/solvers/dgsem_tree/containers_2d.jl
@@ -764,10 +764,10 @@ end
 
 # Container data structure (structure-of-arrays style) for DG MPI interfaces
 mutable struct MPIInterfaceContainer2D{uEltype <: Real} <: AbstractContainer
-    u::Array{uEltype, 4}           # [leftright, variables, i, interfaces]
+    u::Array{uEltype, 4}            # [leftright, variables, i, interfaces]
     local_neighbor_ids::Vector{Int} # [interfaces]
-    orientations::Vector{Int}      # [interfaces]
-    remote_sides::Vector{Int}      # [interfaces]
+    orientations::Vector{Int}       # [interfaces]
+    remote_sides::Vector{Int}       # [interfaces]
     # internal `resize!`able storage
     _u::Vector{uEltype}
 end
diff --git a/src/solvers/dgsem_tree/containers_3d.jl b/src/solvers/dgsem_tree/containers_3d.jl
index 0318946e34d..5fc027ad001 100644
--- a/src/solvers/dgsem_tree/containers_3d.jl
+++ b/src/solvers/dgsem_tree/containers_3d.jl
@@ -520,14 +520,14 @@ end
 # Left and right are used *both* for the numbering of the mortar faces *and* for the position of the
 # elements with respect to the axis orthogonal to the mortar.
 mutable struct L2MortarContainer3D{uEltype <: Real} <: AbstractContainer
-    u_upper_left::Array{uEltype, 5} # [leftright, variables, i, j, mortars]
+    u_upper_left::Array{uEltype, 5}  # [leftright, variables, i, j, mortars]
     u_upper_right::Array{uEltype, 5} # [leftright, variables, i, j, mortars]
-    u_lower_left::Array{uEltype, 5} # [leftright, variables, i, j, mortars]
+    u_lower_left::Array{uEltype, 5}  # [leftright, variables, i, j, mortars]
     u_lower_right::Array{uEltype, 5} # [leftright, variables, i, j, mortars]
-    neighbor_ids::Array{Int, 2}     # [position, mortars]
+    neighbor_ids::Array{Int, 2}      # [position, mortars]
     # Large sides: left -> 1, right -> 2
     large_sides::Vector{Int}  # [mortars]
-    orientations::Vector{Int}  # [mortars]
+    orientations::Vector{Int} # [mortars]
     # internal `resize!`able storage
     _u_upper_left::Vector{uEltype}
     _u_upper_right::Vector{uEltype}

From 7936e61b46b6a61ac0854b42a6082204700c7eca Mon Sep 17 00:00:00 2001
From: Daniel Doehring <doehringd2@gmail.com>
Date: Wed, 9 Aug 2023 13:33:59 +0200
Subject: [PATCH 111/163] Adapt `jacobian_ad_forward` for hyperbolic-parabolic
 semidiscretizations (#1589)

* JacobianAD calls correct RHS for Hyperbolic-Parabolic

* Nonlinear test

* Format

* Bring default _jacobian_ad_forward back

* CI for 2D Taylor-Green

* covered by standard version

* implement rhs directly in jacobina_ad_forward

* Update src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Add reference for 3D Taylor-Green Vortex

* Update doc

* Update tests 2D Taylor-Green Vortex

* Fix copy-paste error

* Viscous TGV comment

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 ...elixir_navierstokes_taylor_green_vortex.jl | 78 +++++++++++++++++++
 ...elixir_navierstokes_taylor_green_vortex.jl |  6 +-
 ...semidiscretization_hyperbolic_parabolic.jl | 15 ++++
 test/test_parabolic_2d.jl                     |  7 ++
 test/test_special_elixirs.jl                  | 18 +++++
 5 files changed, 123 insertions(+), 1 deletion(-)
 create mode 100644 examples/tree_2d_dgsem/elixir_navierstokes_taylor_green_vortex.jl

diff --git a/examples/tree_2d_dgsem/elixir_navierstokes_taylor_green_vortex.jl b/examples/tree_2d_dgsem/elixir_navierstokes_taylor_green_vortex.jl
new file mode 100644
index 00000000000..c3cbc858f7b
--- /dev/null
+++ b/examples/tree_2d_dgsem/elixir_navierstokes_taylor_green_vortex.jl
@@ -0,0 +1,78 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Navier-Stokes equations
+
+# TODO: parabolic; unify names of these accessor functions
+prandtl_number() = 0.72
+mu() = 6.25e-4 # equivalent to Re = 1600
+
+equations = CompressibleEulerEquations2D(1.4)
+equations_parabolic = CompressibleNavierStokesDiffusion2D(equations, mu=mu(),
+                                                          Prandtl=prandtl_number())
+
+"""
+    initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations2D)
+
+The classical viscous Taylor-Green vortex in 2D.
+This forms the basis behind the 3D case found for instance in
+  - Jonathan R. Bull and Antony Jameson
+  Simulation of the Compressible Taylor Green Vortex using High-Order Flux Reconstruction Schemes
+  [DOI: 10.2514/6.2014-3210](https://doi.org/10.2514/6.2014-3210)
+"""
+function initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations2D)
+  A  = 1.0 # magnitude of speed
+  Ms = 0.1 # maximum Mach number
+
+  rho = 1.0
+  v1  =  A * sin(x[1]) * cos(x[2])
+  v2  = -A * cos(x[1]) * sin(x[2])
+  p   = (A / Ms)^2 * rho / equations.gamma # scaling to get Ms
+  p   = p + 1.0/4.0 * A^2 * rho * (cos(2*x[1]) + cos(2*x[2]))
+
+  return prim2cons(SVector(rho, v1, v2, p), equations)
+end
+initial_condition = initial_condition_taylor_green_vortex
+
+volume_flux = flux_ranocha
+solver = DGSEM(polydeg=3, surface_flux=flux_hllc,
+               volume_integral=VolumeIntegralFluxDifferencing(volume_flux))
+
+coordinates_min = (-1.0, -1.0) .* pi
+coordinates_max = ( 1.0,  1.0) .* pi
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=4,
+                n_cells_max=100_000)
+
+
+semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic),
+                                             initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 20.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=true,
+                                     extra_analysis_integrals=(energy_kinetic,
+                                                               energy_internal))
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval,)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback)
+
+###############################################################################
+# run the simulation
+
+time_int_tol = 1e-9
+sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
+            ode_default_options()..., callback=callbacks)
+summary_callback() # print the timer summary
\ No newline at end of file
diff --git a/examples/tree_3d_dgsem/elixir_navierstokes_taylor_green_vortex.jl b/examples/tree_3d_dgsem/elixir_navierstokes_taylor_green_vortex.jl
index 9cb73a462b7..5556831a59d 100644
--- a/examples/tree_3d_dgsem/elixir_navierstokes_taylor_green_vortex.jl
+++ b/examples/tree_3d_dgsem/elixir_navierstokes_taylor_green_vortex.jl
@@ -16,7 +16,11 @@ equations_parabolic = CompressibleNavierStokesDiffusion3D(equations, mu=mu(),
 """
     initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D)
 
-The classical inviscid Taylor-Green vortex.
+The classical viscous Taylor-Green vortex, as found for instance in
+
+- Jonathan R. Bull and Antony Jameson
+  Simulation of the Compressible Taylor Green Vortex using High-Order Flux Reconstruction Schemes
+  [DOI: 10.2514/6.2014-3210](https://doi.org/10.2514/6.2014-3210)
 """
 function initial_condition_taylor_green_vortex(x, t, equations::CompressibleEulerEquations3D)
   A  = 1.0 # magnitude of speed
diff --git a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl
index 8f1e38c891b..b12ecadb58b 100644
--- a/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl
+++ b/src/semidiscretization/semidiscretization_hyperbolic_parabolic.jl
@@ -330,4 +330,19 @@ function rhs_parabolic!(du_ode, u_ode, semi::SemidiscretizationHyperbolicParabol
 
     return nothing
 end
+
+function _jacobian_ad_forward(semi::SemidiscretizationHyperbolicParabolic, t0, u0_ode,
+                              du_ode, config)
+    new_semi = remake(semi, uEltype = eltype(config))
+
+    du_ode_hyp = Vector{eltype(config)}(undef, length(du_ode))
+    J = ForwardDiff.jacobian(du_ode, u0_ode, config) do du_ode, u_ode
+        # Implementation of split ODE problem in OrdinaryDiffEq
+        rhs!(du_ode_hyp, u_ode, new_semi, t0)
+        rhs_parabolic!(du_ode, u_ode, new_semi, t0)
+        du_ode .+= du_ode_hyp
+    end
+
+    return J
+end
 end # @muladd
diff --git a/test/test_parabolic_2d.jl b/test/test_parabolic_2d.jl
index 57f296b55fe..e3bb1ed9fb1 100644
--- a/test/test_parabolic_2d.jl
+++ b/test/test_parabolic_2d.jl
@@ -188,6 +188,13 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
+  @trixi_testset "TreeMesh2D: elixir_navierstokes_taylor_green_vortex.jl" begin
+    @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_navierstokes_taylor_green_vortex.jl"),
+      l2 = [0.0009279657228109691, 0.012454661988687185, 0.012454661988689886, 0.030487112728612178],
+      linf = [0.002435582543096171, 0.024824039368199546, 0.024824039368212758, 0.06731583711777489]
+    )
+  end
+
   @trixi_testset "P4estMesh2D: elixir_advection_diffusion_periodic.jl" begin
     @test_trixi_include(joinpath(examples_dir(), "p4est_2d_dgsem", "elixir_advection_diffusion_periodic.jl"),
       trees_per_dimension = (1, 1), initial_refinement_level = 2, tspan=(0.0, 0.5),
diff --git a/test/test_special_elixirs.jl b/test/test_special_elixirs.jl
index 23017059eaa..c05dfbdfca1 100644
--- a/test/test_special_elixirs.jl
+++ b/test/test_special_elixirs.jl
@@ -107,6 +107,15 @@ coverage = occursin("--code-coverage", cmd) && !occursin("--code-coverage=none",
       @test maximum(real, λ) < 10 * sqrt(eps(real(semi)))
     end
 
+    @timed_testset "Linear advection-diffusion" begin
+      trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "tree_2d_dgsem", "elixir_advection_diffusion.jl"),
+                    tspan=(0.0, 0.0), initial_refinement_level=2)
+
+      J = jacobian_ad_forward(semi)
+      λ = eigvals(J)
+      @test maximum(real, λ) < 10 * sqrt(eps(real(semi)))
+    end
+
     @timed_testset "Compressible Euler equations" begin
       trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "tree_2d_dgsem", "elixir_euler_density_wave.jl"),
                     tspan=(0.0, 0.0), initial_refinement_level=1)
@@ -165,6 +174,15 @@ coverage = occursin("--code-coverage", cmd) && !occursin("--code-coverage=none",
       end
     end
 
+    @timed_testset "Navier-Stokes" begin
+      trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "tree_2d_dgsem", "elixir_navierstokes_taylor_green_vortex.jl"),
+                    tspan=(0.0, 0.0), initial_refinement_level=2)
+
+      J = jacobian_ad_forward(semi)
+      λ = eigvals(J)
+      @test maximum(real, λ) < 0.2
+    end
+
     @timed_testset "MHD" begin
       trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "tree_2d_dgsem", "elixir_mhd_alfven_wave.jl"),
                     tspan=(0.0, 0.0), initial_refinement_level=0)

From ce81702ef7c092e6b8c783a405312e7c461dfc04 Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Wed, 9 Aug 2023 14:39:32 +0200
Subject: [PATCH 112/163] Fix typo (#1600)

* Fix typo

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 src/semidiscretization/semidiscretization.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/semidiscretization/semidiscretization.jl b/src/semidiscretization/semidiscretization.jl
index fbdcd73e2a8..c784f716426 100644
--- a/src/semidiscretization/semidiscretization.jl
+++ b/src/semidiscretization/semidiscretization.jl
@@ -363,7 +363,7 @@ end
 #
 # In some sense, having plain multidimensional `Array`s not support `resize!`
 # isn't necessarily a bug (although it would be nice to add this possibility to
-# base Julia) but can turn out to be a feature for us, because it will aloow us
+# base Julia) but can turn out to be a feature for us, because it will allow us
 # more specializations.
 # Since we can use multiple dispatch, these kinds of specializations can be
 # tailored specifically to each combinations of mesh/solver etc.

From 3ca93afed4ab4efbd6022f65aa88b9a3a7608906 Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Thu, 10 Aug 2023 22:25:36 +0200
Subject: [PATCH 113/163] L2 Mortars for Parabolic Terms on TreeMeshes (#1571)

* First try mortars for parabolic terms

* Use correct interface values in calc_fstar!

* Format parabolic 2d dgsem

* Remove unused function parameters

* L2 Mortars for 3D DGSEM TreeMesh

* Format

* Back to original example

* Dispatch 2D DGSEm rhs_parabolic for p4est and classic tree

* Re-use standard prolong2mortars in gradient comp

* Back to original version

* Add tests for L2 mortars for hyp-para

* remove whitespaces

* Use original analysis callback

* Test Taylor-Green with different integrator

* Remove whitespace

* check coverage status

* Stick to CK2N54 for 3D test

* Add more explicit dispatch

* Less invasive treatment for mortars and p4est

* Revert "Add more explicit dispatch"

This reverts commit 491c923d09ba335c03524894d9f59acf1d6ee699.

* More explicit dispatch

* Remove additional end

* Remove doubled implementations

* kepp main updated with true main

* Add comment

* comment parabolic 3d

* whitespace

* Avoid allocations in parabolic boundary fluxes

* Update src/solvers/dgsem_tree/dg_2d_parabolic.jl

Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>

* Update src/solvers/dgsem_tree/dg_3d_parabolic.jl

Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>

* Update src/solvers/dgsem_tree/dg_3d_parabolic.jl

Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>

* revert alloc BC (other PR)

* Revert alloc BC (other PR)

* Name & News

* Update NEWS.md

Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>

* Update src/solvers/dgsem_p4est/dg_2d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Update src/solvers/dgsem_p4est/dg_3d_parabolic.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Check allocations

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>
---
 AUTHORS.md                                 |   1 +
 NEWS.md                                    |   1 +
 src/solvers/dgsem_p4est/dg_2d_parabolic.jl |  91 ++++++
 src/solvers/dgsem_p4est/dg_3d_parabolic.jl |  99 +++++++
 src/solvers/dgsem_tree/dg_2d_parabolic.jl  | 252 +++++++++++++++-
 src/solvers/dgsem_tree/dg_3d_parabolic.jl  | 323 ++++++++++++++++++++-
 test/test_parabolic_2d.jl                  |  54 ++++
 test/test_parabolic_3d.jl                  |  58 +++-
 8 files changed, 870 insertions(+), 9 deletions(-)

diff --git a/AUTHORS.md b/AUTHORS.md
index abaa3e7e037..74bfaa9c852 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -28,6 +28,7 @@ are listed in alphabetical order:
 * Jesse Chan
 * Lars Christmann
 * Christof Czernik
+* Daniel Doehring
 * Patrick Ersing
 * Erik Faulhaber
 * Gregor Gassner
diff --git a/NEWS.md b/NEWS.md
index 8e374d9ce99..10125c40d17 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -9,6 +9,7 @@ for human readability.
 #### Added
 
 - Experimental support for 3D parabolic diffusion terms has been added.
+- Non-uniform `TreeMesh` available for hyperbolic-parabolic equations.
 - Capability to set truly discontinuous initial conditions in 1D.
 - Wetting and drying feature and examples for 1D and 2D shallow water equations
 
diff --git a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
index 7e90a83a9ca..a04523d2fb4 100644
--- a/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
+++ b/src/solvers/dgsem_p4est/dg_2d_parabolic.jl
@@ -22,6 +22,97 @@ function create_cache_parabolic(mesh::P4estMesh{2}, equations_hyperbolic::Abstra
     return cache
 end
 
+# TODO: Remove in favor of the implementation for the TreeMesh 
+#       once the P4estMesh can handle mortars as well
+function rhs_parabolic!(du, u, t, mesh::P4estMesh{2},
+                        equations_parabolic::AbstractEquationsParabolic,
+                        initial_condition, boundary_conditions_parabolic, source_terms,
+                        dg::DG, parabolic_scheme, cache, cache_parabolic)
+    (; u_transformed, gradients, flux_viscous) = cache_parabolic
+
+    # Convert conservative variables to a form more suitable for viscous flux calculations
+    @trixi_timeit timer() "transform variables" begin
+        transform_variables!(u_transformed, u, mesh, equations_parabolic,
+                             dg, parabolic_scheme, cache, cache_parabolic)
+    end
+
+    # Compute the gradients of the transformed variables
+    @trixi_timeit timer() "calculate gradient" begin
+        calc_gradient!(gradients, u_transformed, t, mesh, equations_parabolic,
+                       boundary_conditions_parabolic, dg, cache, cache_parabolic)
+    end
+
+    # Compute and store the viscous fluxes
+    @trixi_timeit timer() "calculate viscous fluxes" begin
+        calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh,
+                             equations_parabolic, dg, cache, cache_parabolic)
+    end
+
+    # The remainder of this function is essentially a regular rhs! for parabolic
+    # equations (i.e., it computes the divergence of the viscous fluxes)
+    #
+    # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have
+    # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the
+    # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the
+    # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it
+    # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values*
+    # and *not the solution*.  The advantage is that a) we do not need to allocate more storage, b) we
+    # do not need to recreate the existing data structure only with a different name, and c) we do not
+    # need to interpolate solutions *and* gradients to the surfaces.
+
+    # TODO: parabolic; reconsider current data structure reuse strategy
+
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, flux_viscous, mesh, equations_parabolic, dg, cache)
+    end
+
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache_parabolic, flux_viscous, mesh, equations_parabolic,
+                            dg.surface_integral, dg, cache)
+    end
+
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache_parabolic.elements.surface_flux_values, mesh,
+                             equations_parabolic, dg, cache_parabolic)
+    end
+
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache_parabolic, flux_viscous, mesh, equations_parabolic,
+                            dg.surface_integral, dg, cache)
+    end
+
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux_divergence!(cache_parabolic, t,
+                                       boundary_conditions_parabolic, mesh,
+                                       equations_parabolic,
+                                       dg.surface_integral, dg)
+    end
+
+    # TODO: parabolic; extend to mortars
+    @assert nmortars(dg, cache) == 0
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations_parabolic,
+                               dg.surface_integral, dg, cache_parabolic)
+    end
+
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" begin
+        apply_jacobian_parabolic!(du, mesh, equations_parabolic, dg, cache_parabolic)
+    end
+
+    return nothing
+end
+
 function calc_gradient!(gradients, u_transformed, t,
                         mesh::P4estMesh{2}, equations_parabolic,
                         boundary_conditions_parabolic, dg::DG,
diff --git a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl
index 6439cad69bb..2d26c1aff50 100644
--- a/src/solvers/dgsem_p4est/dg_3d_parabolic.jl
+++ b/src/solvers/dgsem_p4est/dg_3d_parabolic.jl
@@ -22,6 +22,105 @@ function create_cache_parabolic(mesh::P4estMesh{3}, equations_hyperbolic::Abstra
     return cache
 end
 
+# This file collects all methods that have been updated to work with parabolic systems of equations
+#
+# assumptions: parabolic terms are of the form div(f(u, grad(u))) and
+# will be discretized first order form as follows:
+#               1. compute grad(u)
+#               2. compute f(u, grad(u))
+#               3. compute div(f(u, grad(u))) (i.e., the "regular" rhs! call)
+# boundary conditions will be applied to both grad(u) and div(f(u, grad(u))).
+# TODO: Remove in favor of the implementation for the TreeMesh 
+#       once the P4estMesh can handle mortars as well
+function rhs_parabolic!(du, u, t, mesh::P4estMesh{3},
+                        equations_parabolic::AbstractEquationsParabolic,
+                        initial_condition, boundary_conditions_parabolic, source_terms,
+                        dg::DG, parabolic_scheme, cache, cache_parabolic)
+    @unpack u_transformed, gradients, flux_viscous = cache_parabolic
+
+    # Convert conservative variables to a form more suitable for viscous flux calculations
+    @trixi_timeit timer() "transform variables" begin
+        transform_variables!(u_transformed, u, mesh, equations_parabolic,
+                             dg, parabolic_scheme, cache, cache_parabolic)
+    end
+
+    # Compute the gradients of the transformed variables
+    @trixi_timeit timer() "calculate gradient" begin
+        calc_gradient!(gradients, u_transformed, t, mesh, equations_parabolic,
+                       boundary_conditions_parabolic, dg, cache, cache_parabolic)
+    end
+
+    # Compute and store the viscous fluxes
+    @trixi_timeit timer() "calculate viscous fluxes" begin
+        calc_viscous_fluxes!(flux_viscous, gradients, u_transformed, mesh,
+                             equations_parabolic, dg, cache, cache_parabolic)
+    end
+
+    # The remainder of this function is essentially a regular rhs! for parabolic
+    # equations (i.e., it computes the divergence of the viscous fluxes)
+    #
+    # OBS! In `calc_viscous_fluxes!`, the viscous flux values at the volume nodes of each element have
+    # been computed and stored in `fluxes_viscous`. In the following, we *reuse* (abuse) the
+    # `interfaces` and `boundaries` containers in `cache_parabolic` to interpolate and store the
+    # *fluxes* at the element surfaces, as opposed to interpolating and storing the *solution* (as it
+    # is done in the hyperbolic operator). That is, `interfaces.u`/`boundaries.u` store *viscous flux values*
+    # and *not the solution*.  The advantage is that a) we do not need to allocate more storage, b) we
+    # do not need to recreate the existing data structure only with a different name, and c) we do not
+    # need to interpolate solutions *and* gradients to the surfaces.
+
+    # TODO: parabolic; reconsider current data structure reuse strategy
+
+    # Reset du
+    @trixi_timeit timer() "reset ∂u/∂t" reset_du!(du, dg, cache)
+
+    # Calculate volume integral
+    @trixi_timeit timer() "volume integral" begin
+        calc_volume_integral!(du, flux_viscous, mesh, equations_parabolic, dg, cache)
+    end
+
+    # Prolong solution to interfaces
+    @trixi_timeit timer() "prolong2interfaces" begin
+        prolong2interfaces!(cache_parabolic, flux_viscous, mesh, equations_parabolic,
+                            dg.surface_integral, dg, cache)
+    end
+
+    # Calculate interface fluxes
+    @trixi_timeit timer() "interface flux" begin
+        calc_interface_flux!(cache_parabolic.elements.surface_flux_values, mesh,
+                             equations_parabolic, dg, cache_parabolic)
+    end
+
+    # Prolong solution to boundaries
+    @trixi_timeit timer() "prolong2boundaries" begin
+        prolong2boundaries!(cache_parabolic, flux_viscous, mesh, equations_parabolic,
+                            dg.surface_integral, dg, cache)
+    end
+
+    # Calculate boundary fluxes
+    @trixi_timeit timer() "boundary flux" begin
+        calc_boundary_flux_divergence!(cache_parabolic, t,
+                                       boundary_conditions_parabolic,
+                                       mesh, equations_parabolic,
+                                       dg.surface_integral, dg)
+    end
+
+    # TODO: parabolic; extend to mortars
+    @assert nmortars(dg, cache) == 0
+
+    # Calculate surface integrals
+    @trixi_timeit timer() "surface integral" begin
+        calc_surface_integral!(du, u, mesh, equations_parabolic,
+                               dg.surface_integral, dg, cache_parabolic)
+    end
+
+    # Apply Jacobian from mapping to reference element
+    @trixi_timeit timer() "Jacobian" begin
+        apply_jacobian_parabolic!(du, mesh, equations_parabolic, dg, cache_parabolic)
+    end
+
+    return nothing
+end
+
 function calc_gradient!(gradients, u_transformed, t,
                         mesh::P4estMesh{3}, equations_parabolic,
                         boundary_conditions_parabolic, dg::DG,
diff --git a/src/solvers/dgsem_tree/dg_2d_parabolic.jl b/src/solvers/dgsem_tree/dg_2d_parabolic.jl
index c5862579992..0da25230380 100644
--- a/src/solvers/dgsem_tree/dg_2d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_2d_parabolic.jl
@@ -13,7 +13,7 @@
 #               2. compute f(u, grad(u))
 #               3. compute div(f(u, grad(u))) (i.e., the "regular" rhs! call)
 # boundary conditions will be applied to both grad(u) and div(f(u, grad(u))).
-function rhs_parabolic!(du, u, t, mesh::Union{TreeMesh{2}, P4estMesh{2}},
+function rhs_parabolic!(du, u, t, mesh::TreeMesh{2},
                         equations_parabolic::AbstractEquationsParabolic,
                         initial_condition, boundary_conditions_parabolic, source_terms,
                         dg::DG, parabolic_scheme, cache, cache_parabolic)
@@ -85,8 +85,18 @@ function rhs_parabolic!(du, u, t, mesh::Union{TreeMesh{2}, P4estMesh{2}},
                                        dg.surface_integral, dg)
     end
 
-    # TODO: parabolic; extend to mortars
-    @assert nmortars(dg, cache) == 0
+    # Prolong solution to mortars
+    @trixi_timeit timer() "prolong2mortars" begin
+        prolong2mortars!(cache, flux_viscous, mesh, equations_parabolic,
+                         dg.mortar, dg.surface_integral, dg)
+    end
+
+    # Calculate mortar fluxes
+    @trixi_timeit timer() "mortar flux" begin
+        calc_mortar_flux!(cache_parabolic.elements.surface_flux_values, mesh,
+                          equations_parabolic,
+                          dg.mortar, dg.surface_integral, dg, cache)
+    end
 
     # Calculate surface integrals
     @trixi_timeit timer() "surface integral" begin
@@ -500,6 +510,227 @@ function calc_boundary_flux_by_direction_divergence!(surface_flux_values::Abstra
     return nothing
 end
 
+function prolong2mortars!(cache, flux_viscous::Tuple{AbstractArray, AbstractArray},
+                          mesh::TreeMesh{2},
+                          equations_parabolic::AbstractEquationsParabolic,
+                          mortar_l2::LobattoLegendreMortarL2, surface_integral,
+                          dg::DGSEM)
+    flux_viscous_x, flux_viscous_y = flux_viscous
+    @threaded for mortar in eachmortar(dg, cache)
+        large_element = cache.mortars.neighbor_ids[3, mortar]
+        upper_element = cache.mortars.neighbor_ids[2, mortar]
+        lower_element = cache.mortars.neighbor_ids[1, mortar]
+
+        # Copy solution small to small
+        if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                for l in eachnode(dg)
+                    for v in eachvariable(equations_parabolic)
+                        cache.mortars.u_upper[2, v, l, mortar] = flux_viscous_x[v, 1, l,
+                                                                                upper_element]
+                        cache.mortars.u_lower[2, v, l, mortar] = flux_viscous_x[v, 1, l,
+                                                                                lower_element]
+                    end
+                end
+            else
+                # L2 mortars in y-direction
+                for l in eachnode(dg)
+                    for v in eachvariable(equations_parabolic)
+                        cache.mortars.u_upper[2, v, l, mortar] = flux_viscous_y[v, l, 1,
+                                                                                upper_element]
+                        cache.mortars.u_lower[2, v, l, mortar] = flux_viscous_y[v, l, 1,
+                                                                                lower_element]
+                    end
+                end
+            end
+        else # large_sides[mortar] == 2 -> small elements on left side
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                for l in eachnode(dg)
+                    for v in eachvariable(equations_parabolic)
+                        cache.mortars.u_upper[1, v, l, mortar] = flux_viscous_x[v,
+                                                                                nnodes(dg),
+                                                                                l,
+                                                                                upper_element]
+                        cache.mortars.u_lower[1, v, l, mortar] = flux_viscous_x[v,
+                                                                                nnodes(dg),
+                                                                                l,
+                                                                                lower_element]
+                    end
+                end
+            else
+                # L2 mortars in y-direction
+                for l in eachnode(dg)
+                    for v in eachvariable(equations_parabolic)
+                        cache.mortars.u_upper[1, v, l, mortar] = flux_viscous_y[v, l,
+                                                                                nnodes(dg),
+                                                                                upper_element]
+                        cache.mortars.u_lower[1, v, l, mortar] = flux_viscous_y[v, l,
+                                                                                nnodes(dg),
+                                                                                lower_element]
+                    end
+                end
+            end
+        end
+
+        # Interpolate large element face data to small interface locations
+        if cache.mortars.large_sides[mortar] == 1 # -> large element on left side
+            leftright = 1
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                u_large = view(flux_viscous_x, :, nnodes(dg), :, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large)
+            else
+                # L2 mortars in y-direction
+                u_large = view(flux_viscous_y, :, :, nnodes(dg), large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large)
+            end
+        else # large_sides[mortar] == 2 -> large element on right side
+            leftright = 2
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                u_large = view(flux_viscous_x, :, 1, :, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large)
+            else
+                # L2 mortars in y-direction
+                u_large = view(flux_viscous_y, :, :, 1, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large)
+            end
+        end
+    end
+
+    return nothing
+end
+
+# NOTE: Use analogy to "calc_mortar_flux!" for hyperbolic eqs with no nonconservative terms.
+# Reasoning: "calc_interface_flux!" for parabolic part is implemented as the version for 
+# hyperbolic terms with conserved terms only, i.e., no nonconservative terms.
+function calc_mortar_flux!(surface_flux_values,
+                           mesh::TreeMesh{2},
+                           equations_parabolic::AbstractEquationsParabolic,
+                           mortar_l2::LobattoLegendreMortarL2,
+                           surface_integral, dg::DG, cache)
+    @unpack surface_flux = surface_integral
+    @unpack u_lower, u_upper, orientations = cache.mortars
+    @unpack fstar_upper_threaded, fstar_lower_threaded = cache
+
+    @threaded for mortar in eachmortar(dg, cache)
+        # Choose thread-specific pre-allocated container
+        fstar_upper = fstar_upper_threaded[Threads.threadid()]
+        fstar_lower = fstar_lower_threaded[Threads.threadid()]
+
+        # Calculate fluxes
+        orientation = orientations[mortar]
+        calc_fstar!(fstar_upper, equations_parabolic, surface_flux, dg, u_upper, mortar,
+                    orientation)
+        calc_fstar!(fstar_lower, equations_parabolic, surface_flux, dg, u_lower, mortar,
+                    orientation)
+
+        mortar_fluxes_to_elements!(surface_flux_values,
+                                   mesh, equations_parabolic, mortar_l2, dg, cache,
+                                   mortar, fstar_upper, fstar_lower)
+    end
+
+    return nothing
+end
+
+@inline function calc_fstar!(destination::AbstractArray{<:Any, 2},
+                             equations_parabolic::AbstractEquationsParabolic,
+                             surface_flux, dg::DGSEM,
+                             u_interfaces, interface, orientation)
+    for i in eachnode(dg)
+        # Call pointwise two-point numerical flux function
+        u_ll, u_rr = get_surface_node_vars(u_interfaces, equations_parabolic, dg, i,
+                                           interface)
+        # TODO: parabolic; only BR1 at the moment
+        flux = 0.5 * (u_ll + u_rr)
+
+        # Copy flux to left and right element storage
+        set_node_vars!(destination, flux, equations_parabolic, dg, i)
+    end
+
+    return nothing
+end
+
+@inline function mortar_fluxes_to_elements!(surface_flux_values,
+                                            mesh::TreeMesh{2},
+                                            equations_parabolic::AbstractEquationsParabolic,
+                                            mortar_l2::LobattoLegendreMortarL2,
+                                            dg::DGSEM, cache,
+                                            mortar, fstar_upper, fstar_lower)
+    large_element = cache.mortars.neighbor_ids[3, mortar]
+    upper_element = cache.mortars.neighbor_ids[2, mortar]
+    lower_element = cache.mortars.neighbor_ids[1, mortar]
+
+    # Copy flux small to small
+    if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
+        if cache.mortars.orientations[mortar] == 1
+            # L2 mortars in x-direction
+            direction = 1
+        else
+            # L2 mortars in y-direction
+            direction = 3
+        end
+    else # large_sides[mortar] == 2 -> small elements on left side
+        if cache.mortars.orientations[mortar] == 1
+            # L2 mortars in x-direction
+            direction = 2
+        else
+            # L2 mortars in y-direction
+            direction = 4
+        end
+    end
+    surface_flux_values[:, :, direction, upper_element] .= fstar_upper
+    surface_flux_values[:, :, direction, lower_element] .= fstar_lower
+
+    # Project small fluxes to large element
+    if cache.mortars.large_sides[mortar] == 1 # -> large element on left side
+        if cache.mortars.orientations[mortar] == 1
+            # L2 mortars in x-direction
+            direction = 2
+        else
+            # L2 mortars in y-direction
+            direction = 4
+        end
+    else # large_sides[mortar] == 2 -> large element on right side
+        if cache.mortars.orientations[mortar] == 1
+            # L2 mortars in x-direction
+            direction = 1
+        else
+            # L2 mortars in y-direction
+            direction = 3
+        end
+    end
+
+    # TODO: Taal performance
+    # for v in eachvariable(equations)
+    #   # The code below is semantically equivalent to
+    #   # surface_flux_values[v, :, direction, large_element] .=
+    #   #   (mortar_l2.reverse_upper * fstar_upper[v, :] + mortar_l2.reverse_lower * fstar_lower[v, :])
+    #   # but faster and does not allocate.
+    #   # Note that `true * some_float == some_float` in Julia, i.e. `true` acts as
+    #   # a universal `one`. Hence, the second `mul!` means "add the matrix-vector
+    #   # product to the current value of the destination".
+    #   @views mul!(surface_flux_values[v, :, direction, large_element],
+    #               mortar_l2.reverse_upper, fstar_upper[v, :])
+    #   @views mul!(surface_flux_values[v, :, direction, large_element],
+    #               mortar_l2.reverse_lower,  fstar_lower[v, :], true, true)
+    # end
+    # The code above could be replaced by the following code. However, the relative efficiency
+    # depends on the types of fstar_upper/fstar_lower and dg.l2mortar_reverse_upper.
+    # Using StaticArrays for both makes the code above faster for common test cases.
+    multiply_dimensionwise!(view(surface_flux_values, :, :, direction, large_element),
+                            mortar_l2.reverse_upper, fstar_upper,
+                            mortar_l2.reverse_lower, fstar_lower)
+
+    return nothing
+end
+
 # Calculate the gradient of the transformed variables
 function calc_gradient!(gradients, u_transformed, t,
                         mesh::TreeMesh{2}, equations_parabolic,
@@ -589,7 +820,20 @@ function calc_gradient!(gradients, u_transformed, t,
                                       dg.surface_integral, dg)
     end
 
-    # TODO: parabolic; mortars
+    # Prolong solution to mortars
+    # NOTE: This re-uses the implementation for hyperbolic terms in "dg_2d.jl"
+    @trixi_timeit timer() "prolong2mortars" begin
+        prolong2mortars!(cache, u_transformed, mesh, equations_parabolic,
+                         dg.mortar, dg.surface_integral, dg)
+    end
+
+    # Calculate mortar fluxes
+    @trixi_timeit timer() "mortar flux" begin
+        calc_mortar_flux!(surface_flux_values,
+                          mesh,
+                          equations_parabolic,
+                          dg.mortar, dg.surface_integral, dg, cache)
+    end
 
     # Calculate surface integrals
     @trixi_timeit timer() "surface integral" begin
diff --git a/src/solvers/dgsem_tree/dg_3d_parabolic.jl b/src/solvers/dgsem_tree/dg_3d_parabolic.jl
index 5b63b971cd8..2745d312b37 100644
--- a/src/solvers/dgsem_tree/dg_3d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_3d_parabolic.jl
@@ -13,7 +13,7 @@
 #               2. compute f(u, grad(u))
 #               3. compute div(f(u, grad(u))) (i.e., the "regular" rhs! call)
 # boundary conditions will be applied to both grad(u) and div(f(u, grad(u))).
-function rhs_parabolic!(du, u, t, mesh::Union{TreeMesh{3}, P4estMesh{3}},
+function rhs_parabolic!(du, u, t, mesh::TreeMesh{3},
                         equations_parabolic::AbstractEquationsParabolic,
                         initial_condition, boundary_conditions_parabolic, source_terms,
                         dg::DG, parabolic_scheme, cache, cache_parabolic)
@@ -85,8 +85,18 @@ function rhs_parabolic!(du, u, t, mesh::Union{TreeMesh{3}, P4estMesh{3}},
                                        dg.surface_integral, dg)
     end
 
-    # TODO: parabolic; extend to mortars
-    @assert nmortars(dg, cache) == 0
+    # Prolong solution to mortars
+    @trixi_timeit timer() "prolong2mortars" begin
+        prolong2mortars!(cache, flux_viscous, mesh, equations_parabolic,
+                         dg.mortar, dg.surface_integral, dg)
+    end
+
+    # Calculate mortar fluxes
+    @trixi_timeit timer() "mortar flux" begin
+        calc_mortar_flux!(cache_parabolic.elements.surface_flux_values, mesh,
+                          equations_parabolic,
+                          dg.mortar, dg.surface_integral, dg, cache)
+    end
 
     # Calculate surface integrals
     @trixi_timeit timer() "surface integral" begin
@@ -583,6 +593,298 @@ function calc_boundary_flux_by_direction_divergence!(surface_flux_values::Abstra
     return nothing
 end
 
+function prolong2mortars!(cache,
+                          flux_viscous::Tuple{AbstractArray, AbstractArray,
+                                              AbstractArray},
+                          mesh::TreeMesh{3},
+                          equations_parabolic::AbstractEquationsParabolic,
+                          mortar_l2::LobattoLegendreMortarL2,
+                          surface_integral, dg::DGSEM)
+    # temporary buffer for projections
+    @unpack fstar_tmp1_threaded = cache
+
+    flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
+    @threaded for mortar in eachmortar(dg, cache)
+        fstar_tmp1 = fstar_tmp1_threaded[Threads.threadid()]
+
+        lower_left_element = cache.mortars.neighbor_ids[1, mortar]
+        lower_right_element = cache.mortars.neighbor_ids[2, mortar]
+        upper_left_element = cache.mortars.neighbor_ids[3, mortar]
+        upper_right_element = cache.mortars.neighbor_ids[4, mortar]
+        large_element = cache.mortars.neighbor_ids[5, mortar]
+
+        # Copy solution small to small
+        if cache.mortars.large_sides[mortar] == 1 # -> small elements on right side
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                for k in eachnode(dg), j in eachnode(dg)
+                    for v in eachvariable(equations_parabolic)
+                        cache.mortars.u_upper_left[2, v, j, k, mortar] = flux_viscous_x[v,
+                                                                                        1,
+                                                                                        j,
+                                                                                        k,
+                                                                                        upper_left_element]
+                        cache.mortars.u_upper_right[2, v, j, k, mortar] = flux_viscous_x[v,
+                                                                                         1,
+                                                                                         j,
+                                                                                         k,
+                                                                                         upper_right_element]
+                        cache.mortars.u_lower_left[2, v, j, k, mortar] = flux_viscous_x[v,
+                                                                                        1,
+                                                                                        j,
+                                                                                        k,
+                                                                                        lower_left_element]
+                        cache.mortars.u_lower_right[2, v, j, k, mortar] = flux_viscous_x[v,
+                                                                                         1,
+                                                                                         j,
+                                                                                         k,
+                                                                                         lower_right_element]
+                    end
+                end
+            elseif cache.mortars.orientations[mortar] == 2
+                # L2 mortars in y-direction
+                for k in eachnode(dg), i in eachnode(dg)
+                    for v in eachvariable(equations_parabolic)
+                        cache.mortars.u_upper_left[2, v, i, k, mortar] = flux_viscous_y[v,
+                                                                                        i,
+                                                                                        1,
+                                                                                        k,
+                                                                                        upper_left_element]
+                        cache.mortars.u_upper_right[2, v, i, k, mortar] = flux_viscous_y[v,
+                                                                                         i,
+                                                                                         1,
+                                                                                         k,
+                                                                                         upper_right_element]
+                        cache.mortars.u_lower_left[2, v, i, k, mortar] = flux_viscous_y[v,
+                                                                                        i,
+                                                                                        1,
+                                                                                        k,
+                                                                                        lower_left_element]
+                        cache.mortars.u_lower_right[2, v, i, k, mortar] = flux_viscous_y[v,
+                                                                                         i,
+                                                                                         1,
+                                                                                         k,
+                                                                                         lower_right_element]
+                    end
+                end
+            else # orientations[mortar] == 3
+                # L2 mortars in z-direction
+                for j in eachnode(dg), i in eachnode(dg)
+                    for v in eachvariable(equations_parabolic)
+                        cache.mortars.u_upper_left[2, v, i, j, mortar] = flux_viscous_z[v,
+                                                                                        i,
+                                                                                        j,
+                                                                                        1,
+                                                                                        upper_left_element]
+                        cache.mortars.u_upper_right[2, v, i, j, mortar] = flux_viscous_z[v,
+                                                                                         i,
+                                                                                         j,
+                                                                                         1,
+                                                                                         upper_right_element]
+                        cache.mortars.u_lower_left[2, v, i, j, mortar] = flux_viscous_z[v,
+                                                                                        i,
+                                                                                        j,
+                                                                                        1,
+                                                                                        lower_left_element]
+                        cache.mortars.u_lower_right[2, v, i, j, mortar] = flux_viscous_z[v,
+                                                                                         i,
+                                                                                         j,
+                                                                                         1,
+                                                                                         lower_right_element]
+                    end
+                end
+            end
+        else # large_sides[mortar] == 2 -> small elements on left side
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                for k in eachnode(dg), j in eachnode(dg)
+                    for v in eachvariable(equations_parabolic)
+                        cache.mortars.u_upper_left[1, v, j, k, mortar] = flux_viscous_x[v,
+                                                                                        nnodes(dg),
+                                                                                        j,
+                                                                                        k,
+                                                                                        upper_left_element]
+                        cache.mortars.u_upper_right[1, v, j, k, mortar] = flux_viscous_x[v,
+                                                                                         nnodes(dg),
+                                                                                         j,
+                                                                                         k,
+                                                                                         upper_right_element]
+                        cache.mortars.u_lower_left[1, v, j, k, mortar] = flux_viscous_x[v,
+                                                                                        nnodes(dg),
+                                                                                        j,
+                                                                                        k,
+                                                                                        lower_left_element]
+                        cache.mortars.u_lower_right[1, v, j, k, mortar] = flux_viscous_x[v,
+                                                                                         nnodes(dg),
+                                                                                         j,
+                                                                                         k,
+                                                                                         lower_right_element]
+                    end
+                end
+            elseif cache.mortars.orientations[mortar] == 2
+                # L2 mortars in y-direction
+                for k in eachnode(dg), i in eachnode(dg)
+                    for v in eachvariable(equations_parabolic)
+                        cache.mortars.u_upper_left[1, v, i, k, mortar] = flux_viscous_y[v,
+                                                                                        i,
+                                                                                        nnodes(dg),
+                                                                                        k,
+                                                                                        upper_left_element]
+                        cache.mortars.u_upper_right[1, v, i, k, mortar] = flux_viscous_y[v,
+                                                                                         i,
+                                                                                         nnodes(dg),
+                                                                                         k,
+                                                                                         upper_right_element]
+                        cache.mortars.u_lower_left[1, v, i, k, mortar] = flux_viscous_y[v,
+                                                                                        i,
+                                                                                        nnodes(dg),
+                                                                                        k,
+                                                                                        lower_left_element]
+                        cache.mortars.u_lower_right[1, v, i, k, mortar] = flux_viscous_y[v,
+                                                                                         i,
+                                                                                         nnodes(dg),
+                                                                                         k,
+                                                                                         lower_right_element]
+                    end
+                end
+            else # if cache.mortars.orientations[mortar] == 3
+                # L2 mortars in z-direction
+                for j in eachnode(dg), i in eachnode(dg)
+                    for v in eachvariable(equations_parabolic)
+                        cache.mortars.u_upper_left[1, v, i, j, mortar] = flux_viscous_z[v,
+                                                                                        i,
+                                                                                        j,
+                                                                                        nnodes(dg),
+                                                                                        upper_left_element]
+                        cache.mortars.u_upper_right[1, v, i, j, mortar] = flux_viscous_z[v,
+                                                                                         i,
+                                                                                         j,
+                                                                                         nnodes(dg),
+                                                                                         upper_right_element]
+                        cache.mortars.u_lower_left[1, v, i, j, mortar] = flux_viscous_z[v,
+                                                                                        i,
+                                                                                        j,
+                                                                                        nnodes(dg),
+                                                                                        lower_left_element]
+                        cache.mortars.u_lower_right[1, v, i, j, mortar] = flux_viscous_z[v,
+                                                                                         i,
+                                                                                         j,
+                                                                                         nnodes(dg),
+                                                                                         lower_right_element]
+                    end
+                end
+            end
+        end
+
+        # Interpolate large element face data to small interface locations
+        if cache.mortars.large_sides[mortar] == 1 # -> large element on left side
+            leftright = 1
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                u_large = view(flux_viscous_x, :, nnodes(dg), :, :, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large, fstar_tmp1)
+            elseif cache.mortars.orientations[mortar] == 2
+                # L2 mortars in y-direction
+                u_large = view(flux_viscous_y, :, :, nnodes(dg), :, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large, fstar_tmp1)
+            else # cache.mortars.orientations[mortar] == 3
+                # L2 mortars in z-direction
+                u_large = view(flux_viscous_z, :, :, :, nnodes(dg), large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large, fstar_tmp1)
+            end
+        else # large_sides[mortar] == 2 -> large element on right side
+            leftright = 2
+            if cache.mortars.orientations[mortar] == 1
+                # L2 mortars in x-direction
+                u_large = view(flux_viscous_x, :, 1, :, :, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large, fstar_tmp1)
+            elseif cache.mortars.orientations[mortar] == 2
+                # L2 mortars in y-direction
+                u_large = view(flux_viscous_y, :, :, 1, :, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large, fstar_tmp1)
+            else # cache.mortars.orientations[mortar] == 3
+                # L2 mortars in z-direction
+                u_large = view(flux_viscous_z, :, :, :, 1, large_element)
+                element_solutions_to_mortars!(cache.mortars, mortar_l2, leftright,
+                                              mortar, u_large, fstar_tmp1)
+            end
+        end
+    end
+
+    return nothing
+end
+
+# NOTE: Use analogy to "calc_mortar_flux!" for hyperbolic eqs with no nonconservative terms.
+# Reasoning: "calc_interface_flux!" for parabolic part is implemented as the version for 
+# hyperbolic terms with conserved terms only, i.e., no nonconservative terms.
+function calc_mortar_flux!(surface_flux_values,
+                           mesh::TreeMesh{3},
+                           equations_parabolic::AbstractEquationsParabolic,
+                           mortar_l2::LobattoLegendreMortarL2,
+                           surface_integral, dg::DG, cache)
+    @unpack surface_flux = surface_integral
+    @unpack u_lower_left, u_lower_right, u_upper_left, u_upper_right, orientations = cache.mortars
+    @unpack (fstar_upper_left_threaded, fstar_upper_right_threaded,
+    fstar_lower_left_threaded, fstar_lower_right_threaded,
+    fstar_tmp1_threaded) = cache
+
+    @threaded for mortar in eachmortar(dg, cache)
+        # Choose thread-specific pre-allocated container
+        fstar_upper_left = fstar_upper_left_threaded[Threads.threadid()]
+        fstar_upper_right = fstar_upper_right_threaded[Threads.threadid()]
+        fstar_lower_left = fstar_lower_left_threaded[Threads.threadid()]
+        fstar_lower_right = fstar_lower_right_threaded[Threads.threadid()]
+        fstar_tmp1 = fstar_tmp1_threaded[Threads.threadid()]
+
+        # Calculate fluxes
+        orientation = orientations[mortar]
+        calc_fstar!(fstar_upper_left, equations_parabolic, surface_flux, dg,
+                    u_upper_left, mortar,
+                    orientation)
+        calc_fstar!(fstar_upper_right, equations_parabolic, surface_flux, dg,
+                    u_upper_right,
+                    mortar, orientation)
+        calc_fstar!(fstar_lower_left, equations_parabolic, surface_flux, dg,
+                    u_lower_left, mortar,
+                    orientation)
+        calc_fstar!(fstar_lower_right, equations_parabolic, surface_flux, dg,
+                    u_lower_right,
+                    mortar, orientation)
+
+        mortar_fluxes_to_elements!(surface_flux_values,
+                                   mesh, equations_parabolic, mortar_l2, dg, cache,
+                                   mortar,
+                                   fstar_upper_left, fstar_upper_right,
+                                   fstar_lower_left, fstar_lower_right,
+                                   fstar_tmp1)
+    end
+
+    return nothing
+end
+
+@inline function calc_fstar!(destination::AbstractArray{<:Any, 3},
+                             equations_parabolic::AbstractEquationsParabolic,
+                             surface_flux, dg::DGSEM,
+                             u_interfaces, interface, orientation)
+    for j in eachnode(dg), i in eachnode(dg)
+        # Call pointwise two-point numerical flux function
+        u_ll, u_rr = get_surface_node_vars(u_interfaces, equations_parabolic, dg, i, j,
+                                           interface)
+        # TODO: parabolic; only BR1 at the moment
+        flux = 0.5 * (u_ll + u_rr)
+
+        # Copy flux to left and right element storage
+        set_node_vars!(destination, flux, equations_parabolic, dg, i, j)
+    end
+
+    return nothing
+end
+
 # Calculate the gradient of the transformed variables
 function calc_gradient!(gradients, u_transformed, t,
                         mesh::TreeMesh{3}, equations_parabolic,
@@ -679,7 +981,20 @@ function calc_gradient!(gradients, u_transformed, t,
                                       dg.surface_integral, dg)
     end
 
-    # TODO: parabolic; mortars
+    # Prolong solution to mortars
+    # NOTE: This re-uses the implementation for hyperbolic terms in "dg_3d.jl"
+    @trixi_timeit timer() "prolong2mortars" begin
+        prolong2mortars!(cache, u_transformed, mesh, equations_parabolic,
+                         dg.mortar, dg.surface_integral, dg)
+    end
+
+    # Calculate mortar fluxes
+    @trixi_timeit timer() "mortar flux" begin
+        calc_mortar_flux!(surface_flux_values,
+                          mesh,
+                          equations_parabolic,
+                          dg.mortar, dg.surface_integral, dg, cache)
+    end
 
     # Calculate surface integrals
     @trixi_timeit timer() "surface integral" begin
diff --git a/test/test_parabolic_2d.jl b/test/test_parabolic_2d.jl
index e3bb1ed9fb1..1564a33dc41 100644
--- a/test/test_parabolic_2d.jl
+++ b/test/test_parabolic_2d.jl
@@ -125,6 +125,39 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
+  @trixi_testset "TreeMesh2D: elixir_advection_diffusion.jl (Refined mesh)" begin
+    @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_diffusion.jl"),
+      tspan=(0.0, 0.0))
+      LLID = Trixi.local_leaf_cells(mesh.tree)
+      num_leafs = length(LLID)
+      @assert num_leafs % 8 == 0
+      Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs/8)])
+      tspan=(0.0, 1.5)
+      semi = SemidiscretizationHyperbolicParabolic(mesh,
+                                                   (equations, equations_parabolic),
+                                                   initial_condition, solver;
+                                                   boundary_conditions=(boundary_conditions,
+                                                                        boundary_conditions_parabolic))
+      ode = semidiscretize(semi, tspan)
+      analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+      callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
+      sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
+            ode_default_options()..., callback=callbacks)
+      ac_sol = analysis_callback(sol)
+      @test ac_sol.l2[1] ≈ 1.67452550744728e-6
+      @test ac_sol.linf[1] ≈ 7.905059166368744e-6
+
+      # Ensure that we do not have excessive memory allocations 
+      # (e.g., from type instabilities) 
+      let 
+        t = sol.t[end] 
+        u_ode = sol.u[end] 
+        du_ode = similar(u_ode) 
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 100
+        @test (@allocated Trixi.rhs_parabolic!(du_ode, u_ode, semi, t)) < 100
+      end
+  end
+
   @trixi_testset "TreeMesh2D: elixir_advection_diffusion_nonperiodic.jl" begin
     @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_diffusion_nonperiodic.jl"),
       initial_refinement_level = 2, tspan=(0.0, 0.1),
@@ -180,6 +213,27 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
+  @trixi_testset "TreeMesh2D: elixir_navierstokes_convergence.jl (Refined mesh)" begin
+    @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_navierstokes_convergence.jl"),
+      tspan=(0.0, 0.0), initial_refinement_level=3)
+      LLID = Trixi.local_leaf_cells(mesh.tree)
+      num_leafs = length(LLID)
+      @assert num_leafs % 4 == 0
+      Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs/4)])
+      tspan=(0.0, 0.5)
+      semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic), initial_condition, solver;
+                                                   boundary_conditions=(boundary_conditions, boundary_conditions_parabolic),
+                                                   source_terms=source_terms_navier_stokes_convergence_test)
+      ode = semidiscretize(semi, tspan)
+      analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+      callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
+      sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol, dt = 1e-5,
+            ode_default_options()..., callback=callbacks)           
+      ac_sol = analysis_callback(sol)
+      @test ac_sol.l2 ≈ [0.00024296959173852447; 0.0002093263158670915; 0.0005390572390977262; 0.00026753561392341537]
+      @test ac_sol.linf ≈ [0.0016210102053424436; 0.002593287648655501; 0.002953907343823712; 0.002077119120180271]
+  end
+
   @trixi_testset "TreeMesh2D: elixir_navierstokes_lid_driven_cavity.jl" begin
     @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_navierstokes_lid_driven_cavity.jl"),
       initial_refinement_level = 2, tspan=(0.0, 0.5),
diff --git a/test/test_parabolic_3d.jl b/test/test_parabolic_3d.jl
index 67a27238969..d607962afa0 100644
--- a/test/test_parabolic_3d.jl
+++ b/test/test_parabolic_3d.jl
@@ -78,6 +78,27 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
+  @trixi_testset "TreeMesh3D: elixir_navierstokes_convergence.jl (Refined mesh)" begin
+    @test_trixi_include(joinpath(examples_dir(), "tree_3d_dgsem", "elixir_navierstokes_convergence.jl"),
+      tspan=(0.0, 0.0))
+      LLID = Trixi.local_leaf_cells(mesh.tree)
+      num_leafs = length(LLID)
+      @assert num_leafs % 16 == 0
+      Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs/16)])
+      tspan=(0.0, 1.0)
+      semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic), initial_condition, solver;
+                                             boundary_conditions=(boundary_conditions, boundary_conditions_parabolic),
+                                             source_terms=source_terms_navier_stokes_convergence_test)
+      ode = semidiscretize(semi, tspan)
+      analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+      callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
+      sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol, dt = 1e-5,
+            ode_default_options()..., callback=callbacks)
+      ac_sol = analysis_callback(sol)
+      @test ac_sol.l2 ≈ [0.0003991794175622818; 0.0008853745163670504; 0.0010658655552066817; 0.0008785559918324284; 0.001403163458422815]
+      @test ac_sol.linf ≈ [0.0035306410538458177; 0.01505692306169911; 0.008862444161110705; 0.015065647972869856; 0.030402714743065218]
+  end
+
   @trixi_testset "TreeMesh3D: elixir_navierstokes_taylor_green_vortex.jl" begin
     @test_trixi_include(joinpath(examples_dir(), "tree_3d_dgsem", "elixir_navierstokes_taylor_green_vortex.jl"),
       initial_refinement_level = 2, tspan=(0.0, 0.25),
@@ -86,6 +107,41 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
+  @trixi_testset "TreeMesh3D: elixir_navierstokes_taylor_green_vortex.jl (Refined mesh)" begin
+    @test_trixi_include(joinpath(examples_dir(), "tree_3d_dgsem", "elixir_navierstokes_taylor_green_vortex.jl"),
+      tspan=(0.0, 0.0))
+      LLID = Trixi.local_leaf_cells(mesh.tree)
+      num_leafs = length(LLID)
+      @assert num_leafs % 32 == 0
+      Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs/32)])
+      tspan=(0.0, 10.0)
+      semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic),
+                                             initial_condition, solver)
+      ode = semidiscretize(semi, tspan)
+      analysis_callback = AnalysisCallback(semi, interval=analysis_interval, save_analysis=true,
+                                           extra_analysis_integrals=(energy_kinetic,
+                                                                     energy_internal,
+                                                                     enstrophy))
+      callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
+      # Use CarpenterKennedy2N54 since `RDPK3SpFSAL49` gives slightly different results on different machines
+      sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+            dt=5e-3,
+            save_everystep=false, callback=callbacks); 
+      ac_sol = analysis_callback(sol)
+      @test ac_sol.l2 ≈ [0.0013666103707729502; 0.2313581629543744; 0.2308164306264533; 0.17460246787819503; 0.28121914446544005]
+      @test ac_sol.linf ≈ [0.006938093883741336; 1.028235074139312; 1.0345438209717241; 1.0821111605203542; 1.2669636522564645]
+
+      # Ensure that we do not have excessive memory allocations 
+      # (e.g., from type instabilities) 
+      let 
+        t = sol.t[end] 
+        u_ode = sol.u[end] 
+        du_ode = similar(u_ode) 
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 100
+        @test (@allocated Trixi.rhs_parabolic!(du_ode, u_ode, semi, t)) < 100
+      end
+  end
+
   @trixi_testset "P4estMesh3D: elixir_navierstokes_convergence.jl" begin
     @test_trixi_include(joinpath(examples_dir(), "p4est_3d_dgsem", "elixir_navierstokes_convergence.jl"),
       initial_refinement_level = 2, tspan=(0.0, 0.1),
@@ -101,8 +157,8 @@ isdir(outdir) && rm(outdir, recursive=true)
       linf = [0.0006696415247340326, 0.03442565722527785, 0.03442565722577423, 0.06295407168705314, 0.032857472756916195]
     )
   end
-  
 end
+
 # Clean up afterwards: delete Trixi.jl output directory
 @test_nowarn isdir(outdir) && rm(outdir, recursive=true)
 

From d52a0419f0fe5dbee8dd58e3b12a23bc9fa67fc1 Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Fri, 11 Aug 2023 09:43:31 +0200
Subject: [PATCH 114/163] Navier-Stokes 1D (#1597)

* Remove doubled implementations

* kepp main updated with true main

* Avoid allocations in parabolic boundary fluxes

* Correct shear layer IC

* Whitespaces

* Restore main

* restore main

* 1D Navier Stokes

* Conventional notation for heat flux

* remove multi-dim artefacts

* Move general part into own file

* Slip Wall BC for 1D Compressible Euler

* Correct arguments for 1D BCs

* format

* Add convergence test with walls

* Test gradient with entropy variables

* Test isothermal BC, test gradient in entropy vars

* Correct test data

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 ...lixir_navierstokes_convergence_periodic.jl | 136 ++++++
 .../elixir_navierstokes_convergence_walls.jl  | 160 +++++++
 src/Trixi.jl                                  |   3 +-
 src/equations/compressible_euler_1d.jl        |  51 +++
 src/equations/compressible_navier_stokes.jl   |  70 +++
 .../compressible_navier_stokes_1d.jl          | 403 ++++++++++++++++++
 .../compressible_navier_stokes_2d.jl          |  77 +---
 .../compressible_navier_stokes_3d.jl          |   6 +-
 src/equations/equations_parabolic.jl          |   2 +
 test/test_parabolic_1d.jl                     |  35 +-
 10 files changed, 864 insertions(+), 79 deletions(-)
 create mode 100644 examples/tree_1d_dgsem/elixir_navierstokes_convergence_periodic.jl
 create mode 100644 examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls.jl
 create mode 100644 src/equations/compressible_navier_stokes.jl
 create mode 100644 src/equations/compressible_navier_stokes_1d.jl

diff --git a/examples/tree_1d_dgsem/elixir_navierstokes_convergence_periodic.jl b/examples/tree_1d_dgsem/elixir_navierstokes_convergence_periodic.jl
new file mode 100644
index 00000000000..3f72d319b0b
--- /dev/null
+++ b/examples/tree_1d_dgsem/elixir_navierstokes_convergence_periodic.jl
@@ -0,0 +1,136 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Navier-Stokes equations
+
+# TODO: parabolic; unify names of these accessor functions
+prandtl_number() = 0.72
+mu() = 6.25e-4 # equivalent to Re = 1600
+
+equations = CompressibleEulerEquations1D(1.4)
+equations_parabolic = CompressibleNavierStokesDiffusion1D(equations, mu=mu(),
+                                                          Prandtl=prandtl_number())
+
+# This convergence test setup was originally derived by Andrew Winters (@andrewwinters5000)
+# (Simplified version of the 2D)
+function initial_condition_navier_stokes_convergence_test(x, t, equations)
+  # Amplitude and shift
+  A = 0.5
+  c = 2.0
+
+  # convenience values for trig. functions
+  pi_x = pi * x[1]
+  pi_t = pi * t
+
+  rho = c + A * sin(pi_x) * cos(pi_t)
+  v1  = sin(pi_x) * cos(pi_t)
+  p   = rho^2
+
+  return prim2cons(SVector(rho, v1, p), equations)
+end
+initial_condition = initial_condition_navier_stokes_convergence_test
+
+@inline function source_terms_navier_stokes_convergence_test(u, x, t, equations)
+  # we currently need to hardcode these parameters until we fix the "combined equation" issue
+  # see also https://github.com/trixi-framework/Trixi.jl/pull/1160
+  inv_gamma_minus_one = inv(equations.gamma - 1)
+  Pr = prandtl_number()
+  mu_ = mu()
+
+  # Same settings as in `initial_condition`
+  # Amplitude and shift
+  A = 0.5
+  c = 2.0
+
+  # convenience values for trig. functions
+  pi_x = pi * x[1]
+  pi_t = pi * t
+
+  # compute the manufactured solution and all necessary derivatives
+  rho    =  c  + A * sin(pi_x) * cos(pi_t)
+  rho_t  = -pi * A * sin(pi_x) * sin(pi_t)
+  rho_x  =  pi * A * cos(pi_x) * cos(pi_t)
+  rho_xx = -pi * pi * A * sin(pi_x) * cos(pi_t)
+
+  v1    =       sin(pi_x) * cos(pi_t)
+  v1_t  = -pi * sin(pi_x) * sin(pi_t)
+  v1_x  =  pi * cos(pi_x) * cos(pi_t)
+  v1_xx = -pi * pi * sin(pi_x) * cos(pi_t)
+
+  p    = rho * rho
+  p_t  = 2.0 * rho * rho_t
+  p_x  = 2.0 * rho * rho_x
+  p_xx = 2.0 * rho * rho_xx + 2.0 * rho_x * rho_x
+
+  E   = p * inv_gamma_minus_one + 0.5 * rho * v1^2
+  E_t = p_t * inv_gamma_minus_one + 0.5 * rho_t * v1^2 + rho * v1 * v1_t
+  E_x = p_x * inv_gamma_minus_one + 0.5 * rho_x * v1^2 + rho * v1 * v1_x
+
+  # Some convenience constants
+  T_const = equations.gamma * inv_gamma_minus_one / Pr
+  inv_rho_cubed = 1.0 / (rho^3)
+
+  # compute the source terms
+  # density equation
+  du1 = rho_t + rho_x * v1 + rho * v1_x
+
+  # x-momentum equation
+  du2 = ( rho_t * v1 + rho * v1_t 
+         + p_x + rho_x * v1^2 + 2.0 * rho * v1 * v1_x
+    # stress tensor from x-direction
+         - v1_xx * mu_)
+
+  # total energy equation
+  du3 = ( E_t + v1_x * (E + p) + v1 * (E_x + p_x)
+    # stress tensor and temperature gradient terms from x-direction
+                                - v1_xx * v1   * mu_
+                                - v1_x  * v1_x * mu_
+         - T_const * inv_rho_cubed * (        p_xx * rho   * rho
+                                      - 2.0 * p_x  * rho   * rho_x
+                                      + 2.0 * p    * rho_x * rho_x
+                                      -       p    * rho   * rho_xx ) * mu_)
+
+  return SVector(du1, du2, du3)
+end
+
+volume_flux = flux_ranocha
+solver = DGSEM(polydeg=3, surface_flux=flux_hllc,
+               volume_integral=VolumeIntegralFluxDifferencing(volume_flux))
+
+coordinates_min = -1.0
+coordinates_max =  1.0
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=4,
+                n_cells_max=100_000)
+
+
+semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic),
+                                             initial_condition, solver,
+                                             source_terms = source_terms_navier_stokes_convergence_test)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 10.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval,)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback)
+
+###############################################################################
+# run the simulation
+
+time_int_tol = 1e-9
+sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
+            ode_default_options()..., callback=callbacks)
+summary_callback() # print the timer summary
\ No newline at end of file
diff --git a/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls.jl b/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls.jl
new file mode 100644
index 00000000000..181a2cb209f
--- /dev/null
+++ b/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls.jl
@@ -0,0 +1,160 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the ideal compressible Navier-Stokes equations
+
+prandtl_number() = 0.72
+mu() = 0.01
+
+equations = CompressibleEulerEquations1D(1.4)
+equations_parabolic = CompressibleNavierStokesDiffusion1D(equations, mu=mu(), Prandtl=prandtl_number(),
+                                                          gradient_variables=GradientVariablesPrimitive())
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs,
+               volume_integral=VolumeIntegralWeakForm())
+
+coordinates_min = -1.0
+coordinates_max =  1.0
+
+# Create a uniformly refined mesh with periodic boundaries
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=3,
+                periodicity=false,
+                n_cells_max=30_000) # set maximum capacity of tree data structure
+
+# Note: the initial condition cannot be specialized to `CompressibleNavierStokesDiffusion1D`
+#       since it is called by both the parabolic solver (which passes in `CompressibleNavierStokesDiffusion1D`)
+#       and by the initial condition (which passes in `CompressibleEulerEquations1D`).
+# This convergence test setup was originally derived by Andrew Winters (@andrewwinters5000)
+function initial_condition_navier_stokes_convergence_test(x, t, equations)
+  # Amplitude and shift
+  A = 0.5
+  c = 2.0
+
+  # convenience values for trig. functions
+  pi_x = pi * x[1]
+  pi_t = pi * t
+
+  rho = c + A * cos(pi_x) * cos(pi_t)
+  v1  = log(x[1] + 2.0) * (1.0 - exp(-A * (x[1] - 1.0)) ) * cos(pi_t)
+  p   = rho^2
+
+  return prim2cons(SVector(rho, v1, p), equations)
+end
+
+@inline function source_terms_navier_stokes_convergence_test(u, x, t, equations)
+  x = x[1]
+
+  # TODO: parabolic
+  # we currently need to hardcode these parameters until we fix the "combined equation" issue
+  # see also https://github.com/trixi-framework/Trixi.jl/pull/1160
+  inv_gamma_minus_one = inv(equations.gamma - 1)
+  Pr = prandtl_number()
+  mu_ = mu()
+
+  # Same settings as in `initial_condition`
+  # Amplitude and shift
+  A = 0.5
+  c = 2.0
+
+  # convenience values for trig. functions
+  pi_x = pi * x
+  pi_t = pi * t
+
+  # compute the manufactured solution and all necessary derivatives
+  rho    =  c  + A * cos(pi_x) * cos(pi_t)
+  rho_t  = -pi * A * cos(pi_x) * sin(pi_t)
+  rho_x  = -pi * A * sin(pi_x) * cos(pi_t)
+  rho_xx = -pi * pi * A * cos(pi_x) * cos(pi_t)
+
+  v1    =       log(x + 2.0) * (1.0 - exp(-A * (x - 1.0))) * cos(pi_t)
+  v1_t  = -pi * log(x + 2.0) * (1.0 - exp(-A * (x - 1.0))) * sin(pi_t)
+  v1_x  =       (A * log(x + 2.0) * exp(-A * (x - 1.0)) + (1.0 - exp(-A * (x - 1.0))) / (x + 2.0)) * cos(pi_t)
+  v1_xx = (( 2.0 * A * exp(-A * (x - 1.0)) / (x + 2.0)
+                         - A * A * log(x + 2.0) * exp(-A * (x - 1.0))
+                         - (1.0 - exp(-A * (x - 1.0))) / ((x + 2.0) * (x + 2.0))) * cos(pi_t))
+
+  p    = rho * rho
+  p_t  = 2.0 * rho * rho_t
+  p_x  = 2.0 * rho * rho_x
+  p_xx = 2.0 * rho * rho_xx + 2.0 * rho_x * rho_x
+
+  # Note this simplifies slightly because the ansatz assumes that v1 = v2
+  E   = p * inv_gamma_minus_one + 0.5 * rho * v1^2
+  E_t = p_t * inv_gamma_minus_one + 0.5 * rho_t * v1^2 + rho * v1 * v1_t
+  E_x = p_x * inv_gamma_minus_one + 0.5 * rho_x * v1^2 + rho * v1 * v1_x
+
+  # Some convenience constants
+  T_const = equations.gamma * inv_gamma_minus_one / Pr
+  inv_rho_cubed = 1.0 / (rho^3)
+
+  # compute the source terms
+  # density equation
+  du1 = rho_t + rho_x * v1 + rho * v1_x
+
+  # y-momentum equation
+  du2 = ( rho_t * v1 + rho * v1_t 
+         + p_x + rho_x * v1^2 + 2.0 * rho * v1 * v1_x
+    # stress tensor from y-direction
+         - v1_xx * mu_)
+
+  # total energy equation
+  du3 = ( E_t + v1_x * (E + p) + v1 * (E_x + p_x)
+    # stress tensor and temperature gradient terms from x-direction
+                                - v1_xx * v1   * mu_
+                                - v1_x  * v1_x * mu_
+         - T_const * inv_rho_cubed * (        p_xx * rho   * rho
+                                      - 2.0 * p_x  * rho   * rho_x
+                                      + 2.0 * p    * rho_x * rho_x
+                                      -       p    * rho   * rho_xx ) * mu_ )
+
+  return SVector(du1, du2, du3)
+end
+
+initial_condition = initial_condition_navier_stokes_convergence_test
+
+# BC types
+velocity_bc_left_right = NoSlip((x, t, equations) -> initial_condition_navier_stokes_convergence_test(x, t, equations)[2])
+
+heat_bc_left = Isothermal((x, t, equations) -> 
+                          Trixi.temperature(initial_condition_navier_stokes_convergence_test(x, t, equations), 
+                                            equations_parabolic))
+heat_bc_right = Adiabatic((x, t, equations) -> 0.0)
+
+boundary_condition_left = BoundaryConditionNavierStokesWall(velocity_bc_left_right, heat_bc_left)
+boundary_condition_right = BoundaryConditionNavierStokesWall(velocity_bc_left_right, heat_bc_right)
+
+# define inviscid boundary conditions
+boundary_conditions = (; x_neg = boundary_condition_slip_wall,
+                         x_pos = boundary_condition_slip_wall)
+
+# define viscous boundary conditions
+boundary_conditions_parabolic = (; x_neg = boundary_condition_left,
+                                   x_pos = boundary_condition_right)
+
+semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic), initial_condition, solver;
+                                             boundary_conditions=(boundary_conditions, boundary_conditions_parabolic),
+                                             source_terms=source_terms_navier_stokes_convergence_test)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span `tspan`
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+alive_callback = AliveCallback(alive_interval=10)
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
+
+###############################################################################
+# run the simulation
+
+time_int_tol = 1e-8
+sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol, dt = 1e-5,
+            ode_default_options()..., callback=callbacks)
+summary_callback() # print the timer summary
\ No newline at end of file
diff --git a/src/Trixi.jl b/src/Trixi.jl
index 990c33f3c94..78ddaa3ca7f 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -152,7 +152,8 @@ export AcousticPerturbationEquations2D,
        LinearizedEulerEquations2D
 
 export LaplaceDiffusion1D, LaplaceDiffusion2D,
-       CompressibleNavierStokesDiffusion2D, CompressibleNavierStokesDiffusion3D
+       CompressibleNavierStokesDiffusion1D, CompressibleNavierStokesDiffusion2D,
+       CompressibleNavierStokesDiffusion3D
 
 export GradientVariablesPrimitive, GradientVariablesEntropy
 
diff --git a/src/equations/compressible_euler_1d.jl b/src/equations/compressible_euler_1d.jl
index e4fd0997eae..9204989e8be 100644
--- a/src/equations/compressible_euler_1d.jl
+++ b/src/equations/compressible_euler_1d.jl
@@ -198,6 +198,57 @@ function initial_condition_eoc_test_coupled_euler_gravity(x, t,
     return prim2cons(SVector(rho, v1, p), equations)
 end
 
+"""
+    boundary_condition_slip_wall(u_inner, orientation, direction, x, t,
+                                 surface_flux_function, equations::CompressibleEulerEquations1D)
+Determine the boundary numerical surface flux for a slip wall condition.
+Imposes a zero normal velocity at the wall.
+Density is taken from the internal solution state and pressure is computed as an
+exact solution of a 1D Riemann problem. Further details about this boundary state
+are available in the paper:
+- J. J. W. van der Vegt and H. van der Ven (2002)
+  Slip flow boundary conditions in discontinuous Galerkin discretizations of
+  the Euler equations of gas dynamics
+  [PDF](https://reports.nlr.nl/bitstream/handle/10921/692/TP-2002-300.pdf?sequence=1)
+
+  Should be used together with [`TreeMesh`](@ref).
+"""
+@inline function boundary_condition_slip_wall(u_inner, orientation,
+                                              direction, x, t,
+                                              surface_flux_function,
+                                              equations::CompressibleEulerEquations1D)
+    # compute the primitive variables
+    rho_local, v_normal, p_local = cons2prim(u_inner, equations)
+
+    if isodd(direction) # flip sign of normal to make it outward pointing
+        v_normal *= -1
+    end
+
+    # Get the solution of the pressure Riemann problem
+    # See Section 6.3.3 of
+    # Eleuterio F. Toro (2009)
+    # Riemann Solvers and Numerical Methods for Fluid Dynamics: A Practical Introduction
+    # [DOI: 10.1007/b79761](https://doi.org/10.1007/b79761)
+    if v_normal <= 0.0
+        sound_speed = sqrt(equations.gamma * p_local / rho_local) # local sound speed
+        p_star = p_local *
+                 (1 + 0.5 * (equations.gamma - 1) * v_normal / sound_speed)^(2 *
+                                                                             equations.gamma *
+                                                                             equations.inv_gamma_minus_one)
+    else # v_normal > 0.0
+        A = 2 / ((equations.gamma + 1) * rho_local)
+        B = p_local * (equations.gamma - 1) / (equations.gamma + 1)
+        p_star = p_local +
+                 0.5 * v_normal / A *
+                 (v_normal + sqrt(v_normal^2 + 4 * A * (p_local + B)))
+    end
+
+    # For the slip wall we directly set the flux as the normal velocity is zero
+    return SVector(zero(eltype(u_inner)),
+                   p_star,
+                   zero(eltype(u_inner)))
+end
+
 # Calculate 1D flux for a single point
 @inline function flux(u, orientation::Integer, equations::CompressibleEulerEquations1D)
     rho, rho_v1, rho_e = u
diff --git a/src/equations/compressible_navier_stokes.jl b/src/equations/compressible_navier_stokes.jl
new file mode 100644
index 00000000000..af7897d4586
--- /dev/null
+++ b/src/equations/compressible_navier_stokes.jl
@@ -0,0 +1,70 @@
+# TODO: can we generalize this to MHD?
+"""
+    struct BoundaryConditionNavierStokesWall
+
+Creates a wall-type boundary conditions for the compressible Navier-Stokes equations.
+The fields `boundary_condition_velocity` and `boundary_condition_heat_flux` are intended
+to be boundary condition types such as the `NoSlip` velocity boundary condition and the
+`Adiabatic` or `Isothermal` heat boundary condition.
+
+!!! warning "Experimental feature"
+    This is an experimental feature and may change in future releases.
+"""
+struct BoundaryConditionNavierStokesWall{V, H}
+    boundary_condition_velocity::V
+    boundary_condition_heat_flux::H
+end
+
+"""
+    struct NoSlip
+
+Use to create a no-slip boundary condition with `BoundaryConditionNavierStokesWall`. The field `boundary_value_function`
+should be a function with signature `boundary_value_function(x, t, equations)`
+and should return a `SVector{NDIMS}` whose entries are the velocity vector at a
+point `x` and time `t`.
+"""
+struct NoSlip{F}
+    boundary_value_function::F # value of the velocity vector on the boundary
+end
+
+"""
+    struct Isothermal
+
+Used to create a no-slip boundary condition with [`BoundaryConditionNavierStokesWall`](@ref).
+The field `boundary_value_function` should be a function with signature
+`boundary_value_function(x, t, equations)` and return a scalar value for the
+temperature at point `x` and time `t`.
+"""
+struct Isothermal{F}
+    boundary_value_function::F # value of the temperature on the boundary
+end
+
+"""
+    struct Adiabatic
+
+Used to create a no-slip boundary condition with [`BoundaryConditionNavierStokesWall`](@ref).
+The field `boundary_value_normal_flux_function` should be a function with signature
+`boundary_value_normal_flux_function(x, t, equations)` and return a scalar value for the
+normal heat flux at point `x` and time `t`.
+"""
+struct Adiabatic{F}
+    boundary_value_normal_flux_function::F # scaled heat flux 1/T * kappa * dT/dn
+end
+
+"""
+!!! warning "Experimental code"
+    This code is experimental and may be changed or removed in any future release.
+
+`GradientVariablesPrimitive` and `GradientVariablesEntropy` are gradient variable type parameters
+for `CompressibleNavierStokesDiffusion1D`. By default, the gradient variables are set to be
+`GradientVariablesPrimitive`. Specifying `GradientVariablesEntropy` instead uses the entropy variable
+formulation from
+- Hughes, Mallet, Franca (1986)
+  A new finite element formulation for computational fluid dynamics: I. Symmetric forms of the
+  compressible Euler and Navier-Stokes equations and the second law of thermodynamics.
+  [https://doi.org/10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
+
+Under `GradientVariablesEntropy`, the Navier-Stokes discretization is provably entropy stable.
+"""
+struct GradientVariablesPrimitive end
+struct GradientVariablesEntropy end
diff --git a/src/equations/compressible_navier_stokes_1d.jl b/src/equations/compressible_navier_stokes_1d.jl
new file mode 100644
index 00000000000..dca846cac1e
--- /dev/null
+++ b/src/equations/compressible_navier_stokes_1d.jl
@@ -0,0 +1,403 @@
+@doc raw"""
+    CompressibleNavierStokesDiffusion1D(equations; mu, Pr,
+                                        gradient_variables=GradientVariablesPrimitive())
+
+Contains the diffusion (i.e. parabolic) terms applied
+to mass, momenta, and total energy together with the advective terms from
+the [`CompressibleEulerEquations1D`](@ref).
+
+- `equations`: instance of the [`CompressibleEulerEquations1D`](@ref)
+- `mu`: dynamic viscosity,
+- `Pr`: Prandtl number,
+- `gradient_variables`: which variables the gradients are taken with respect to.
+                        Defaults to `GradientVariablesPrimitive()`.
+
+Fluid properties such as the dynamic viscosity ``\mu`` can be provided in any consistent unit system, e.g.,
+[``\mu``] = kg m⁻¹ s⁻¹.
+
+The particular form of the compressible Navier-Stokes implemented is
+```math
+\frac{\partial}{\partial t}
+\begin{pmatrix}
+\rho \\ \rho v \\ \rho e
+\end{pmatrix}
++
+\frac{\partial}{\partial x}
+\begin{pmatrix}
+ \rho v \\ \rho v^2 + p \\ (\rho e + p) v
+\end{pmatrix}
+=
+\frac{\partial}{\partial x}
+\begin{pmatrix}
+0 \\ \tau \\ \tau v - q
+\end{pmatrix}
+```
+where the system is closed with the ideal gas assumption giving
+```math
+p = (\gamma - 1) \left( \rho e - \frac{1}{2} \rho v^2 \right)
+```
+as the pressure. The value of the adiabatic constant `gamma` is taken from the [`CompressibleEulerEquations1D`](@ref).
+The terms on the right hand side of the system above
+are built from the viscous stress
+```math
+\tau = \mu \frac{\partial}{\partial x} v
+```
+where the heat flux is
+```math
+q = -\kappa \frac{\partial}{\partial x} \left(T\right),\quad T = \frac{p}{R\rho}
+```
+where ``T`` is the temperature and ``\kappa`` is the thermal conductivity for Fick's law.
+Under the assumption that the gas has a constant Prandtl number,
+the thermal conductivity is
+```math
+\kappa = \frac{\gamma \mu R}{(\gamma - 1)\textrm{Pr}}.
+```
+From this combination of temperature ``T`` and thermal conductivity ``\kappa`` we see
+that the gas constant `R` cancels and the heat flux becomes
+```math
+q = -\kappa \frac{\partial}{\partial x} \left(T\right) = -\frac{\gamma \mu}{(\gamma - 1)\textrm{Pr}} \frac{\partial}{\partial x} \left(\frac{p}{\rho}\right)
+```
+which is the form implemented below in the [`flux`](@ref) function.
+
+In one spatial dimensions we require gradients for two quantities, e.g.,
+primitive quantities
+```math
+\frac{\partial}{\partial x} v,\, \frac{\partial}{\partial x} T
+```
+or the entropy variables
+```math
+\frac{\partial}{\partial x} w_2,\, \frac{\partial}{\partial x} w_3
+```
+where
+```math
+w_2 = \frac{\rho v1}{p},\, w_3 = -\frac{\rho}{p}
+```
+
+!!! warning "Experimental code"
+    This code is experimental and may be changed or removed in any future release.
+"""
+struct CompressibleNavierStokesDiffusion1D{GradientVariables, RealT <: Real,
+                                           E <: AbstractCompressibleEulerEquations{1}} <:
+       AbstractCompressibleNavierStokesDiffusion{1, 3}
+    # TODO: parabolic
+    # 1) For now save gamma and inv(gamma-1) again, but could potentially reuse them from the Euler equations
+    # 2) Add NGRADS as a type parameter here and in AbstractEquationsParabolic, add `ngradients(...)` accessor function
+    gamma::RealT               # ratio of specific heats
+    inv_gamma_minus_one::RealT # = inv(gamma - 1); can be used to write slow divisions as fast multiplications
+
+    mu::RealT                  # viscosity
+    Pr::RealT                  # Prandtl number
+    kappa::RealT               # thermal diffusivity for Fick's law
+
+    equations_hyperbolic::E    # CompressibleEulerEquations1D
+    gradient_variables::GradientVariables # GradientVariablesPrimitive or GradientVariablesEntropy
+end
+
+# default to primitive gradient variables
+function CompressibleNavierStokesDiffusion1D(equations::CompressibleEulerEquations1D;
+                                             mu, Prandtl,
+                                             gradient_variables = GradientVariablesPrimitive())
+    gamma = equations.gamma
+    inv_gamma_minus_one = equations.inv_gamma_minus_one
+    μ, Pr = promote(mu, Prandtl)
+
+    # Under the assumption of constant Prandtl number the thermal conductivity
+    # constant is kappa = gamma μ / ((gamma-1) Pr).
+    # Important note! Factor of μ is accounted for later in `flux`.
+    kappa = gamma * inv_gamma_minus_one / Pr
+
+    CompressibleNavierStokesDiffusion1D{typeof(gradient_variables), typeof(gamma),
+                                        typeof(equations)}(gamma, inv_gamma_minus_one,
+                                                           μ, Pr, kappa,
+                                                           equations, gradient_variables)
+end
+
+# TODO: parabolic
+# This is the flexibility a user should have to select the different gradient variable types
+# varnames(::typeof(cons2prim)   , ::CompressibleNavierStokesDiffusion1D) = ("v1", "v2", "T")
+# varnames(::typeof(cons2entropy), ::CompressibleNavierStokesDiffusion1D) = ("w2", "w3", "w4")
+
+function varnames(variable_mapping,
+                  equations_parabolic::CompressibleNavierStokesDiffusion1D)
+    varnames(variable_mapping, equations_parabolic.equations_hyperbolic)
+end
+
+# we specialize this function to compute gradients of primitive variables instead of
+# conservative variables.
+function gradient_variable_transformation(::CompressibleNavierStokesDiffusion1D{
+                                                                                GradientVariablesPrimitive
+                                                                                })
+    cons2prim
+end
+function gradient_variable_transformation(::CompressibleNavierStokesDiffusion1D{
+                                                                                GradientVariablesEntropy
+                                                                                })
+    cons2entropy
+end
+
+# Explicit formulas for the diffusive Navier-Stokes fluxes are available, e.g., in Section 2
+# of the paper by Rueda-Ramírez, Hennemann, Hindenlang, Winters, and Gassner
+# "An Entropy Stable Nodal Discontinuous Galerkin Method for the resistive
+#  MHD Equations. Part II: Subcell Finite Volume Shock Capturing"
+# where one sets the magnetic field components equal to 0.
+function flux(u, gradients, orientation::Integer,
+              equations::CompressibleNavierStokesDiffusion1D)
+    # Here, `u` is assumed to be the "transformed" variables specified by `gradient_variable_transformation`.
+    rho, v1, _ = convert_transformed_to_primitive(u, equations)
+    # Here `gradients` is assumed to contain the gradients of the primitive variables (rho, v1, v2, T)
+    # either computed directly or reverse engineered from the gradient of the entropy variables
+    # by way of the `convert_gradient_variables` function.
+    _, dv1dx, dTdx = convert_derivative_to_primitive(u, gradients, equations)
+
+    # Viscous stress (tensor)
+    tau_11 = dv1dx
+
+    # Fick's law q = -kappa * grad(T) = -kappa * grad(p / (R rho))
+    # with thermal diffusivity constant kappa = gamma μ R / ((gamma-1) Pr)
+    # Note, the gas constant cancels under this formulation, so it is not present
+    # in the implementation
+    q1 = equations.kappa * dTdx
+
+    # Constant dynamic viscosity is copied to a variable for readability.
+    # Offers flexibility for dynamic viscosity via Sutherland's law where it depends
+    # on temperature and reference values, Ts and Tref such that mu(T)
+    mu = equations.mu
+
+    # viscous flux components in the x-direction
+    f1 = zero(rho)
+    f2 = tau_11 * mu
+    f3 = (v1 * tau_11 + q1) * mu
+
+    return SVector(f1, f2, f3)
+end
+
+# Convert conservative variables to primitive
+@inline function cons2prim(u, equations::CompressibleNavierStokesDiffusion1D)
+    rho, rho_v1, _ = u
+
+    v1 = rho_v1 / rho
+    T = temperature(u, equations)
+
+    return SVector(rho, v1, T)
+end
+
+# Convert conservative variables to entropy
+# TODO: parabolic. We can improve efficiency by not computing w_1, which involves logarithms
+# This can be done by specializing `cons2entropy` and `entropy2cons` to `CompressibleNavierStokesDiffusion1D`,
+# but this may be confusing to new users.
+function cons2entropy(u, equations::CompressibleNavierStokesDiffusion1D)
+    cons2entropy(u, equations.equations_hyperbolic)
+end
+function entropy2cons(w, equations::CompressibleNavierStokesDiffusion1D)
+    entropy2cons(w, equations.equations_hyperbolic)
+end
+
+# the `flux` function takes in transformed variables `u` which depend on the type of the gradient variables.
+# For CNS, it is simplest to formulate the viscous terms in primitive variables, so we transform the transformed
+# variables into primitive variables.
+@inline function convert_transformed_to_primitive(u_transformed,
+                                                  equations::CompressibleNavierStokesDiffusion1D{
+                                                                                                 GradientVariablesPrimitive
+                                                                                                 })
+    return u_transformed
+end
+
+# TODO: parabolic. Make this more efficient!
+@inline function convert_transformed_to_primitive(u_transformed,
+                                                  equations::CompressibleNavierStokesDiffusion1D{
+                                                                                                 GradientVariablesEntropy
+                                                                                                 })
+    # note: this uses CompressibleNavierStokesDiffusion1D versions of cons2prim and entropy2cons
+    return cons2prim(entropy2cons(u_transformed, equations), equations)
+end
+
+# Takes the solution values `u` and gradient of the entropy variables (w_2, w_3, w_4) and
+# reverse engineers the gradients to be terms of the primitive variables (v1, v2, T).
+# Helpful because then the diffusive fluxes have the same form as on paper.
+# Note, the first component of `gradient_entropy_vars` contains gradient(rho) which is unused.
+# TODO: parabolic; entropy stable viscous terms
+@inline function convert_derivative_to_primitive(u, gradient,
+                                                 ::CompressibleNavierStokesDiffusion1D{
+                                                                                       GradientVariablesPrimitive
+                                                                                       })
+    return gradient
+end
+
+# the first argument is always the "transformed" variables.
+@inline function convert_derivative_to_primitive(w, gradient_entropy_vars,
+                                                 equations::CompressibleNavierStokesDiffusion1D{
+                                                                                                GradientVariablesEntropy
+                                                                                                })
+
+    # TODO: parabolic. This is inefficient to pass in transformed variables but then transform them back.
+    # We can fix this if we directly compute v1, v2, T from the entropy variables
+    u = entropy2cons(w, equations) # calls a "modified" entropy2cons defined for CompressibleNavierStokesDiffusion1D
+    rho, rho_v1, _ = u
+
+    v1 = rho_v1 / rho
+    T = temperature(u, equations)
+
+    return SVector(gradient_entropy_vars[1],
+                   T * (gradient_entropy_vars[2] + v1 * gradient_entropy_vars[3]), # grad(u) = T*(grad(w_2)+v1*grad(w_3))
+                   T * T * gradient_entropy_vars[3])
+end
+
+# This routine is required because `prim2cons` is called in `initial_condition`, which
+# is called with `equations::CompressibleEulerEquations1D`. This means it is inconsistent
+# with `cons2prim(..., ::CompressibleNavierStokesDiffusion1D)` as defined above.
+# TODO: parabolic. Is there a way to clean this up?
+@inline function prim2cons(u, equations::CompressibleNavierStokesDiffusion1D)
+    prim2cons(u, equations.equations_hyperbolic)
+end
+
+@inline function temperature(u, equations::CompressibleNavierStokesDiffusion1D)
+    rho, rho_v1, rho_e = u
+
+    p = (equations.gamma - 1) * (rho_e - 0.5 * rho_v1^2 / rho)
+    T = p / rho
+    return T
+end
+
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Adiabatic})(flux_inner,
+                                                                                      u_inner,
+                                                                                      orientation::Integer,
+                                                                                      direction,
+                                                                                      x, t,
+                                                                                      operator_type::Gradient,
+                                                                                      equations::CompressibleNavierStokesDiffusion1D{
+                                                                                                                                     GradientVariablesPrimitive
+                                                                                                                                     })
+    v1 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+                                                                                equations)
+    return SVector(u_inner[1], v1, u_inner[3])
+end
+
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Adiabatic})(flux_inner,
+                                                                                      u_inner,
+                                                                                      orientation::Integer,
+                                                                                      direction,
+                                                                                      x, t,
+                                                                                      operator_type::Divergence,
+                                                                                      equations::CompressibleNavierStokesDiffusion1D{
+                                                                                                                                     GradientVariablesPrimitive
+                                                                                                                                     })
+    # rho, v1, v2, _ = u_inner
+    normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x,
+                                                                                                           t,
+                                                                                                           equations)
+    v1 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+                                                                                equations)
+    _, tau_1n, _ = flux_inner # extract fluxes for 2nd equation
+    normal_energy_flux = v1 * tau_1n + normal_heat_flux
+    return SVector(flux_inner[1], flux_inner[2], normal_energy_flux)
+end
+
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Isothermal})(flux_inner,
+                                                                                       u_inner,
+                                                                                       orientation::Integer,
+                                                                                       direction,
+                                                                                       x, t,
+                                                                                       operator_type::Gradient,
+                                                                                       equations::CompressibleNavierStokesDiffusion1D{
+                                                                                                                                      GradientVariablesPrimitive
+                                                                                                                                      })
+    v1 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+                                                                                equations)
+    T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t,
+                                                                                equations)
+    return SVector(u_inner[1], v1, T)
+end
+
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Isothermal})(flux_inner,
+                                                                                       u_inner,
+                                                                                       orientation::Integer,
+                                                                                       direction,
+                                                                                       x, t,
+                                                                                       operator_type::Divergence,
+                                                                                       equations::CompressibleNavierStokesDiffusion1D{
+                                                                                                                                      GradientVariablesPrimitive
+                                                                                                                                      })
+    return flux_inner
+end
+
+# specialized BC impositions for GradientVariablesEntropy.
+
+# This should return a SVector containing the boundary values of entropy variables.
+# Here, `w_inner` are the transformed variables (e.g., entropy variables).
+#
+# Taken from "Entropy stable modal discontinuous Galerkin schemes and wall boundary conditions
+#             for the compressible Navier-Stokes equations" by Chan, Lin, Warburton 2022.
+# DOI: 10.1016/j.jcp.2021.110723
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Adiabatic})(flux_inner,
+                                                                                      w_inner,
+                                                                                      orientation::Integer,
+                                                                                      direction,
+                                                                                      x, t,
+                                                                                      operator_type::Gradient,
+                                                                                      equations::CompressibleNavierStokesDiffusion1D{
+                                                                                                                                     GradientVariablesEntropy
+                                                                                                                                     })
+    v1 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+                                                                                equations)
+    negative_rho_inv_p = w_inner[3] # w_3 = -rho / p
+    return SVector(w_inner[1], -v1 * negative_rho_inv_p, negative_rho_inv_p)
+end
+
+# this is actually identical to the specialization for GradientVariablesPrimitive, but included for completeness.
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Adiabatic})(flux_inner,
+                                                                                      w_inner,
+                                                                                      orientation::Integer,
+                                                                                      direction,
+                                                                                      x, t,
+                                                                                      operator_type::Divergence,
+                                                                                      equations::CompressibleNavierStokesDiffusion1D{
+                                                                                                                                     GradientVariablesEntropy
+                                                                                                                                     })
+    normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x,
+                                                                                                           t,
+                                                                                                           equations)
+    v1 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+                                                                                equations)
+    _, tau_1n, _ = flux_inner # extract fluxes for 2nd equation
+    normal_energy_flux = v1 * tau_1n + normal_heat_flux
+    return SVector(flux_inner[1], flux_inner[2], normal_energy_flux)
+end
+
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Isothermal})(flux_inner,
+                                                                                       w_inner,
+                                                                                       orientation::Integer,
+                                                                                       direction,
+                                                                                       x, t,
+                                                                                       operator_type::Gradient,
+                                                                                       equations::CompressibleNavierStokesDiffusion1D{
+                                                                                                                                      GradientVariablesEntropy
+                                                                                                                                      })
+    v1 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+                                                                                equations)
+    T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t,
+                                                                                equations)
+
+    # the entropy variables w2 = rho * v1 / p = v1 / T = -v1 * w3.
+    w3 = -1 / T
+    return SVector(w_inner[1], -v1 * w3, w3)
+end
+
+@inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
+                                                                        <:Isothermal})(flux_inner,
+                                                                                       w_inner,
+                                                                                       orientation::Integer,
+                                                                                       direction,
+                                                                                       x, t,
+                                                                                       operator_type::Divergence,
+                                                                                       equations::CompressibleNavierStokesDiffusion1D{
+                                                                                                                                      GradientVariablesEntropy
+                                                                                                                                      })
+    return SVector(flux_inner[1], flux_inner[2], flux_inner[3])
+end
diff --git a/src/equations/compressible_navier_stokes_2d.jl b/src/equations/compressible_navier_stokes_2d.jl
index a1f11717e69..f762fe5d5ee 100644
--- a/src/equations/compressible_navier_stokes_2d.jl
+++ b/src/equations/compressible_navier_stokes_2d.jl
@@ -29,7 +29,7 @@ The particular form of the compressible Navier-Stokes implemented is
 =
 \nabla \cdot
 \begin{pmatrix}
-0 \\ \underline{\tau} \\ \underline{\tau}\mathbf{v} - \nabla q
+0 \\ \underline{\tau} \\ \underline{\tau}\mathbf{v} - \mathbf{q}
 \end{pmatrix}
 ```
 where the system is closed with the ideal gas assumption giving
@@ -44,7 +44,7 @@ are built from the viscous stress tensor
 ```
 where ``\underline{I}`` is the ``2\times 2`` identity matrix and the heat flux is
 ```math
-\nabla q = -\kappa\nabla\left(T\right),\quad T = \frac{p}{R\rho}
+\mathbf{q} = -\kappa\nabla\left(T\right),\quad T = \frac{p}{R\rho}
 ```
 where ``T`` is the temperature and ``\kappa`` is the thermal conductivity for Fick's law.
 Under the assumption that the gas has a constant Prandtl number,
@@ -55,7 +55,7 @@ the thermal conductivity is
 From this combination of temperature ``T`` and thermal conductivity ``\kappa`` we see
 that the gas constant `R` cancels and the heat flux becomes
 ```math
-\nabla q = -\kappa\nabla\left(T\right) = -\frac{\gamma \mu}{(\gamma - 1)\textrm{Pr}}\nabla\left(\frac{p}{\rho}\right)
+\mathbf{q} = -\kappa\nabla\left(T\right) = -\frac{\gamma \mu}{(\gamma - 1)\textrm{Pr}}\nabla\left(\frac{p}{\rho}\right)
 ```
 which is the form implemented below in the [`flux`](@ref) function.
 
@@ -93,24 +93,6 @@ struct CompressibleNavierStokesDiffusion2D{GradientVariables, RealT <: Real,
     gradient_variables::GradientVariables # GradientVariablesPrimitive or GradientVariablesEntropy
 end
 
-"""
-!!! warning "Experimental code"
-    This code is experimental and may be changed or removed in any future release.
-
-`GradientVariablesPrimitive` and `GradientVariablesEntropy` are gradient variable type parameters
-for `CompressibleNavierStokesDiffusion2D`. By default, the gradient variables are set to be
-`GradientVariablesPrimitive`. Specifying `GradientVariablesEntropy` instead uses the entropy variable
-formulation from
-- Hughes, Mallet, Franca (1986)
-  A new finite element formulation for computational fluid dynamics: I. Symmetric forms of the
-  compressible Euler and Navier-Stokes equations and the second law of thermodynamics.
-  [https://doi.org/10.1016/0045-7825(86)90127-1](https://doi.org/10.1016/0045-7825(86)90127-1)
-
-Under `GradientVariablesEntropy`, the Navier-Stokes discretization is provably entropy stable.
-"""
-struct GradientVariablesPrimitive end
-struct GradientVariablesEntropy end
-
 # default to primitive gradient variables
 function CompressibleNavierStokesDiffusion2D(equations::CompressibleEulerEquations2D;
                                              mu, Prandtl,
@@ -315,59 +297,6 @@ end
     return dv2dx - dv1dy
 end
 
-# TODO: can we generalize this to MHD?
-"""
-    struct BoundaryConditionNavierStokesWall
-
-Creates a wall-type boundary conditions for the compressible Navier-Stokes equations.
-The fields `boundary_condition_velocity` and `boundary_condition_heat_flux` are intended
-to be boundary condition types such as the `NoSlip` velocity boundary condition and the
-`Adiabatic` or `Isothermal` heat boundary condition.
-
-!!! warning "Experimental feature"
-    This is an experimental feature and may change in future releases.
-"""
-struct BoundaryConditionNavierStokesWall{V, H}
-    boundary_condition_velocity::V
-    boundary_condition_heat_flux::H
-end
-
-"""
-    struct NoSlip
-
-Use to create a no-slip boundary condition with `BoundaryConditionNavierStokesWall`. The field `boundary_value_function`
-should be a function with signature `boundary_value_function(x, t, equations)`
-and should return a `SVector{NDIMS}` whose entries are the velocity vector at a
-point `x` and time `t`.
-"""
-struct NoSlip{F}
-    boundary_value_function::F # value of the velocity vector on the boundary
-end
-
-"""
-    struct Isothermal
-
-Used to create a no-slip boundary condition with [`BoundaryConditionNavierStokesWall`](@ref).
-The field `boundary_value_function` should be a function with signature
-`boundary_value_function(x, t, equations)` and return a scalar value for the
-temperature at point `x` and time `t`.
-"""
-struct Isothermal{F}
-    boundary_value_function::F # value of the temperature on the boundary
-end
-
-"""
-    struct Adiabatic
-
-Used to create a no-slip boundary condition with [`BoundaryConditionNavierStokesWall`](@ref).
-The field `boundary_value_normal_flux_function` should be a function with signature
-`boundary_value_normal_flux_function(x, t, equations)` and return a scalar value for the
-normal heat flux at point `x` and time `t`.
-"""
-struct Adiabatic{F}
-    boundary_value_normal_flux_function::F # scaled heat flux 1/T * kappa * dT/dn
-end
-
 @inline function (boundary_condition::BoundaryConditionNavierStokesWall{<:NoSlip,
                                                                         <:Adiabatic})(flux_inner,
                                                                                       u_inner,
diff --git a/src/equations/compressible_navier_stokes_3d.jl b/src/equations/compressible_navier_stokes_3d.jl
index 0b770dff1ca..166b53bf615 100644
--- a/src/equations/compressible_navier_stokes_3d.jl
+++ b/src/equations/compressible_navier_stokes_3d.jl
@@ -29,7 +29,7 @@ The particular form of the compressible Navier-Stokes implemented is
 =
 \nabla \cdot
 \begin{pmatrix}
-0 \\ \underline{\tau} \\ \underline{\tau}\mathbf{v} - \nabla q
+0 \\ \underline{\tau} \\ \underline{\tau}\mathbf{v} - \mathbf{q}
 \end{pmatrix}
 ```
 where the system is closed with the ideal gas assumption giving
@@ -44,7 +44,7 @@ are built from the viscous stress tensor
 ```
 where ``\underline{I}`` is the ``3\times 3`` identity matrix and the heat flux is
 ```math
-\nabla q = -\kappa\nabla\left(T\right),\quad T = \frac{p}{R\rho}
+\mathbf{q} = -\kappa\nabla\left(T\right),\quad T = \frac{p}{R\rho}
 ```
 where ``T`` is the temperature and ``\kappa`` is the thermal conductivity for Fick's law.
 Under the assumption that the gas has a constant Prandtl number,
@@ -55,7 +55,7 @@ the thermal conductivity is
 From this combination of temperature ``T`` and thermal conductivity ``\kappa`` we see
 that the gas constant `R` cancels and the heat flux becomes
 ```math
-\nabla q = -\kappa\nabla\left(T\right) = -\frac{\gamma \mu}{(\gamma - 1)\textrm{Pr}}\nabla\left(\frac{p}{\rho}\right)
+\mathbf{q} = -\kappa\nabla\left(T\right) = -\frac{\gamma \mu}{(\gamma - 1)\textrm{Pr}}\nabla\left(\frac{p}{\rho}\right)
 ```
 which is the form implemented below in the [`flux`](@ref) function.
 
diff --git a/src/equations/equations_parabolic.jl b/src/equations/equations_parabolic.jl
index 6c0be43798a..66214025044 100644
--- a/src/equations/equations_parabolic.jl
+++ b/src/equations/equations_parabolic.jl
@@ -11,5 +11,7 @@ include("laplace_diffusion_2d.jl")
 # Compressible Navier-Stokes equations
 abstract type AbstractCompressibleNavierStokesDiffusion{NDIMS, NVARS} <:
               AbstractEquationsParabolic{NDIMS, NVARS} end
+include("compressible_navier_stokes.jl")
+include("compressible_navier_stokes_1d.jl")
 include("compressible_navier_stokes_2d.jl")
 include("compressible_navier_stokes_3d.jl")
diff --git a/test/test_parabolic_1d.jl b/test/test_parabolic_1d.jl
index 1aaf23d576a..06a55100d62 100644
--- a/test/test_parabolic_1d.jl
+++ b/test/test_parabolic_1d.jl
@@ -19,7 +19,40 @@ isdir(outdir) && rm(outdir, recursive=true)
       linf = [2.847421658558336e-05]
     )
   end
-  
+
+  @trixi_testset "TreeMesh1D: elixir_navierstokes_convergence_periodic.jl" begin
+    @test_trixi_include(joinpath(examples_dir(), "tree_1d_dgsem", "elixir_navierstokes_convergence_periodic.jl"),
+      l2 = [0.0001133835907077494, 6.226282245610444e-5, 0.0002820171699999139],
+      linf = [0.0006255102377159538, 0.00036195501456059986, 0.0016147729485886941]
+    )
+  end
+
+  @trixi_testset "TreeMesh1D: elixir_navierstokes_convergence_periodic.jl: GradientVariablesEntropy" begin
+    @test_trixi_include(joinpath(examples_dir(), "tree_1d_dgsem", "elixir_navierstokes_convergence_periodic.jl"),
+      equations_parabolic = CompressibleNavierStokesDiffusion1D(equations, mu=mu(),
+                                                                Prandtl=prandtl_number(), 
+                                                                gradient_variables = GradientVariablesEntropy()),
+      l2 = [0.00011310615871043463, 6.216495207074201e-5, 0.00028195843110817814],
+      linf = [0.0006240837363233886, 0.0003616694320713876, 0.0016147339542413874]
+    )
+  end
+
+  @trixi_testset "TreeMesh1D: elixir_navierstokes_convergence_walls.jl" begin
+    @test_trixi_include(joinpath(examples_dir(), "tree_1d_dgsem", "elixir_navierstokes_convergence_walls.jl"),
+      l2 = [0.00047023310868269237, 0.00032181736027057234, 0.0014966266486095025],
+      linf = [0.002996375101363302, 0.002863904256059634, 0.012691132946258676]
+    )
+  end
+
+  @trixi_testset "TreeMesh1D: elixir_navierstokes_convergence_walls.jl: GradientVariablesEntropy" begin
+    @test_trixi_include(joinpath(examples_dir(), "tree_1d_dgsem", "elixir_navierstokes_convergence_walls.jl"),
+      equations_parabolic = CompressibleNavierStokesDiffusion1D(equations, mu=mu(),
+                                                                Prandtl=prandtl_number(), 
+                                                                gradient_variables = GradientVariablesEntropy()),
+      l2 = [0.0004608500483647771, 0.00032431091222851285, 0.0015159733360626845],
+      linf = [0.002754803146635787, 0.0028567714697580906, 0.012941794048176192]
+    )
+  end
 end
 
 # Clean up afterwards: delete Trixi output directory

From 68df09d5a21bd8f7393df90dab915247f9498505 Mon Sep 17 00:00:00 2001
From: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
Date: Fri, 11 Aug 2023 09:44:19 +0200
Subject: [PATCH 115/163] fix typo in Davis wave speed estimate for 1d swe
 (#1601)

Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 src/equations/shallow_water_1d.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/equations/shallow_water_1d.jl b/src/equations/shallow_water_1d.jl
index 57bcb1212e1..32782d5478c 100644
--- a/src/equations/shallow_water_1d.jl
+++ b/src/equations/shallow_water_1d.jl
@@ -653,7 +653,7 @@ end
     c_rr = sqrt(equations.gravity * h_rr)
 
     λ_min = min(v_ll - c_ll, v_rr - c_rr)
-    λ_max = max(v_rr + c_rr, v_rr + c_rr)
+    λ_max = max(v_ll + c_ll, v_rr + c_rr)
 
     return λ_min, λ_max
 end

From 34c123c285d44af6725b68ae176029736c357542 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Sun, 13 Aug 2023 05:50:12 +0200
Subject: [PATCH 116/163] set version to v0.5.38

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index c22d4b90642..6419be4d8fc 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.38-pre"
+version = "0.5.38"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 0b8405a0950944b0883818eb5756ad9b7cd4094e Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Sun, 13 Aug 2023 05:50:27 +0200
Subject: [PATCH 117/163] set development version to v0.5.39-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 6419be4d8fc..dd937ed213b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.38"
+version = "0.5.39-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From add2542c0076dc6526d969f78cec2f732430bc15 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 14 Aug 2023 19:05:34 +0200
Subject: [PATCH 118/163] Bump crate-ci/typos from 1.16.2 to 1.16.5 (#1606)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.16.2 to 1.16.5.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.16.2...v1.16.5)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/SpellCheck.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index a1a429cad97..6ebb288ea30 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v3
       - name: Check spelling
-        uses: crate-ci/typos@v1.16.2
+        uses: crate-ci/typos@v1.16.5

From 7f83a1a938eecd9b841efe215a6e482e67cfdcc1 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Tue, 15 Aug 2023 11:58:32 +0200
Subject: [PATCH 119/163] Enable MPI coverage with Linux and reduce heap size
 hint (#1603)

* Enable MPI coverage with Linux and reduce heap size hint

* Update runtests.jl

* no MPI coverage CI on macOS

* Update runtests.jl

* Update runtests.jl
---
 test/runtests.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/runtests.jl b/test/runtests.jl
index 1b0c745dbfd..f1adbaaf1df 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -28,10 +28,10 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3)
         cmd = string(Base.julia_cmd())
         coverage = occursin("--code-coverage", cmd) &&
                    !occursin("--code-coverage=none", cmd)
-        if !(coverage && Sys.iswindows()) && !(coverage && Sys.islinux())
+        if !(coverage && Sys.iswindows()) && !(coverage && Sys.isapple())
             # We provide a `--heap-size-hint` to avoid/reduce out-of-memory errors during CI testing
             mpiexec() do cmd
-                run(`$cmd -n $TRIXI_MPI_NPROCS $(Base.julia_cmd()) --threads=1 --check-bounds=yes --heap-size-hint=1G $(abspath("test_mpi.jl"))`)
+                run(`$cmd -n $TRIXI_MPI_NPROCS $(Base.julia_cmd()) --threads=1 --check-bounds=yes --heap-size-hint=0.5G $(abspath("test_mpi.jl"))`)
             end
         end
     end

From a4283e1e8253f7ddf2cabf22e2c7b39ce29a644f Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Tue, 15 Aug 2023 17:20:52 +0200
Subject: [PATCH 120/163] Update dependabot.yml (#1608)

---
 .github/dependabot.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 700707ced32..d60f0707fc2 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -4,4 +4,4 @@ updates:
   - package-ecosystem: "github-actions"
     directory: "/" # Location of package manifests
     schedule:
-      interval: "weekly"
+      interval: "monthly"

From 4da5c53776c1d617a2b9bb656da02640f1d6a211 Mon Sep 17 00:00:00 2001
From: Benjamin Bolm <74359358+bennibolm@users.noreply.github.com>
Date: Fri, 18 Aug 2023 12:37:06 +0200
Subject: [PATCH 121/163] Subcell positivity IDP limiting for conservative
 variables (#1476)

* Add IDP positivity limiting for conservative variables

* Add elixir with modified blast wave

* Add documentation

* Fix parameter type

* Adjust output of summary callback

* Merge changes from `subcell-limiting` and `main`

* Fix test with right time stepping

* Implement first suggestions

* Implement suggestions

* Fix elixir

* Relocate `perform_idp_correction!`

* Rename variable in `snake_case`

* Implement other suggestions

* Rename container variables using `snake_case`

* Delete timer

* Merge `subcell-limiting` (Adapt docstrings)

* Merge `subcell-limiting`

* Merge `subcell-limiting` (Renaming and dispatch)

* Fix documentation

* Implement positivty limiter with numbers of cons vars

* Merge suggestions already implemented in `subcell-limiting`

* Fix elixir

* Update docstring and output

* Restructure parameter for positivity limiting

* Add test for "show" routine

* Rename Limiters and Containers

* Rename antidiffusive stage callback

* Relocate subcell limiter code

* Move create_cache routine to specific file

* Implement suggestions

* Implement suggestions

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
---
 NEWS.md                                       |   1 +
 .../elixir_euler_shockcapturing_subcell.jl    |  92 +++++++
 ...ubble_shockcapturing_subcell_positivity.jl | 140 ++++++++++
 src/Trixi.jl                                  |   5 +-
 src/callbacks_stage/callbacks_stage.jl        |   1 +
 .../subcell_limiter_idp_correction.jl         |  69 +++++
 .../subcell_limiter_idp_correction_2d.jl      |  44 ++++
 src/solvers/dg.jl                             |  40 +++
 src/solvers/dgsem_tree/containers_2d.jl       | 136 +++++++++-
 src/solvers/dgsem_tree/dg.jl                  |   5 +
 .../dgsem_tree/dg_2d_subcell_limiters.jl      | 193 ++++++++++++++
 src/solvers/dgsem_tree/subcell_limiters.jl    | 103 ++++++++
 src/solvers/dgsem_tree/subcell_limiters_2d.jl | 114 +++++++++
 src/time_integration/methods_SSP.jl           | 241 ++++++++++++++++++
 src/time_integration/time_integration.jl      |   1 +
 test/test_tree_2d_euler.jl                    |   6 +
 test/test_tree_2d_eulermulti.jl               |   8 +
 test/test_unit.jl                             |  39 +--
 18 files changed, 1218 insertions(+), 20 deletions(-)
 create mode 100644 examples/tree_2d_dgsem/elixir_euler_shockcapturing_subcell.jl
 create mode 100644 examples/tree_2d_dgsem/elixir_eulermulti_shock_bubble_shockcapturing_subcell_positivity.jl
 create mode 100644 src/callbacks_stage/subcell_limiter_idp_correction.jl
 create mode 100644 src/callbacks_stage/subcell_limiter_idp_correction_2d.jl
 create mode 100644 src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl
 create mode 100644 src/solvers/dgsem_tree/subcell_limiters.jl
 create mode 100644 src/solvers/dgsem_tree/subcell_limiters_2d.jl
 create mode 100644 src/time_integration/methods_SSP.jl

diff --git a/NEWS.md b/NEWS.md
index 10125c40d17..4b96e1e2834 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -12,6 +12,7 @@ for human readability.
 - Non-uniform `TreeMesh` available for hyperbolic-parabolic equations.
 - Capability to set truly discontinuous initial conditions in 1D.
 - Wetting and drying feature and examples for 1D and 2D shallow water equations
+- Subcell positivity limiting support for conservative variables in 2D for `TreeMesh`
 
 #### Changed
 
diff --git a/examples/tree_2d_dgsem/elixir_euler_shockcapturing_subcell.jl b/examples/tree_2d_dgsem/elixir_euler_shockcapturing_subcell.jl
new file mode 100644
index 00000000000..6b69e4db563
--- /dev/null
+++ b/examples/tree_2d_dgsem/elixir_euler_shockcapturing_subcell.jl
@@ -0,0 +1,92 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations2D(1.4)
+
+"""
+    initial_condition_blast_wave(x, t, equations::CompressibleEulerEquations2D)
+
+A medium blast wave (modified to lower density and higher pressure) taken from
+- Sebastian Hennemann, Gregor J. Gassner (2020)
+  A provably entropy stable subcell shock capturing approach for high order split form DG
+  [arXiv: 2008.12044](https://arxiv.org/abs/2008.12044)
+"""
+function initial_condition_blast_wave(x, t, equations::CompressibleEulerEquations2D)
+  # Modified From Hennemann & Gassner JCP paper 2020 (Sec. 6.3) -> modified to lower density, higher pressure
+  # Set up polar coordinates
+  inicenter = SVector(0.0, 0.0)
+  x_norm = x[1] - inicenter[1]
+  y_norm = x[2] - inicenter[2]
+  r = sqrt(x_norm^2 + y_norm^2)
+  phi = atan(y_norm, x_norm)
+  sin_phi, cos_phi = sincos(phi)
+
+  # Calculate primitive variables         "normal" medium blast wave
+  rho = r > 0.5 ? 0.1 : 0.2691            # rho = r > 0.5 ? 1 : 1.1691
+  v1  = r > 0.5 ? 0.0 : 0.1882 * cos_phi
+  v2  = r > 0.5 ? 0.0 : 0.1882 * sin_phi
+  p   = r > 0.5 ? 1.0E-1 : 1.245          # p   = r > 0.5 ? 1.0E-3 : 1.245
+
+  return prim2cons(SVector(rho, v1, v2, p), equations)
+end
+initial_condition = initial_condition_blast_wave
+
+surface_flux = flux_lax_friedrichs
+volume_flux  = flux_ranocha
+basis = LobattoLegendreBasis(3)
+limiter_idp = SubcellLimiterIDP(equations, basis;
+                                positivity_variables_cons=[1],
+                                positivity_correction_factor=0.5)
+volume_integral = VolumeIntegralSubcellLimiting(limiter_idp;
+                                                volume_flux_dg=volume_flux,
+                                                volume_flux_fv=surface_flux)
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+coordinates_min = (-2.0, -2.0)
+coordinates_max = ( 2.0,  2.0)
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=5,
+                n_cells_max=100_000)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution = SaveSolutionCallback(interval=100,
+                                     save_initial_solution=true,
+                                     save_final_solution=true,
+                                     solution_variables=cons2prim)
+
+stepsize_callback = StepsizeCallback(cfl=0.6)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback, alive_callback,
+                        save_solution,
+                        stepsize_callback)
+
+
+###############################################################################
+# run the simulation
+
+stage_callbacks = (SubcellLimiterIDPCorrection(),)
+
+sol = Trixi.solve(ode, Trixi.SimpleSSPRK33(stage_callbacks=stage_callbacks);
+                  dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep=false, callback=callbacks);
+summary_callback() # print the timer summary
diff --git a/examples/tree_2d_dgsem/elixir_eulermulti_shock_bubble_shockcapturing_subcell_positivity.jl b/examples/tree_2d_dgsem/elixir_eulermulti_shock_bubble_shockcapturing_subcell_positivity.jl
new file mode 100644
index 00000000000..a67eaeb5b2b
--- /dev/null
+++ b/examples/tree_2d_dgsem/elixir_eulermulti_shock_bubble_shockcapturing_subcell_positivity.jl
@@ -0,0 +1,140 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Euler multicomponent equations
+
+# 1) Dry Air  2) Helium + 28% Air
+equations = CompressibleEulerMulticomponentEquations2D(gammas        = (1.4, 1.648),
+                                                       gas_constants = (0.287, 1.578))
+
+"""
+    initial_condition_shock_bubble(x, t, equations::CompressibleEulerMulticomponentEquations2D{5, 2})
+
+A shock-bubble testcase for multicomponent Euler equations
+- Ayoub Gouasmi, Karthik Duraisamy, Scott Murman
+  Formulation of Entropy-Stable schemes for the multicomponent compressible Euler equations
+  [arXiv: 1904.00972](https://arxiv.org/abs/1904.00972)
+"""
+function initial_condition_shock_bubble(x, t, equations::CompressibleEulerMulticomponentEquations2D{5, 2})
+  # bubble test case, see Gouasmi et al. https://arxiv.org/pdf/1904.00972
+  # other reference: https://www.researchgate.net/profile/Pep_Mulet/publication/222675930_A_flux-split_algorithm_applied_to_conservative_models_for_multicomponent_compressible_flows/links/568da54508aeaa1481ae7af0.pdf
+  # typical domain is rectangular, we change it to a square, as Trixi can only do squares
+  @unpack gas_constants = equations
+
+  # Positivity Preserving Parameter, can be set to zero if scheme is positivity preserving
+  delta   = 0.03
+
+  # Region I
+  rho1_1  = delta
+  rho2_1  = 1.225 * gas_constants[1]/gas_constants[2] - delta
+  v1_1    = zero(delta)
+  v2_1    = zero(delta)
+  p_1     = 101325
+
+  # Region II
+  rho1_2  = 1.225-delta
+  rho2_2  = delta
+  v1_2    = zero(delta)
+  v2_2    = zero(delta)
+  p_2     = 101325
+
+  # Region III
+  rho1_3  = 1.6861 - delta
+  rho2_3  = delta
+  v1_3    = -113.5243
+  v2_3    = zero(delta)
+  p_3     = 159060
+
+  # Set up Region I & II:
+  inicenter = SVector(zero(delta), zero(delta))
+  x_norm = x[1] - inicenter[1]
+  y_norm = x[2] - inicenter[2]
+  r = sqrt(x_norm^2 + y_norm^2)
+
+  if (x[1] > 0.50)
+    # Set up Region III
+    rho1    = rho1_3
+    rho2    = rho2_3
+    v1      = v1_3
+    v2      = v2_3
+    p       = p_3
+  elseif (r < 0.25)
+    # Set up Region I
+    rho1    = rho1_1
+    rho2    = rho2_1
+    v1      = v1_1
+    v2      = v2_1
+    p       = p_1
+  else
+    # Set up Region II
+    rho1    = rho1_2
+    rho2    = rho2_2
+    v1      = v1_2
+    v2      = v2_2
+    p       = p_2
+  end
+
+  return prim2cons(SVector(v1, v2, p, rho1, rho2), equations)
+end
+initial_condition = initial_condition_shock_bubble
+
+surface_flux        = flux_lax_friedrichs
+volume_flux         = flux_ranocha
+basis               = LobattoLegendreBasis(3)
+
+limiter_idp = SubcellLimiterIDP(equations, basis;
+                                positivity_variables_cons=[(i+3 for i in eachcomponent(equations))...])
+
+volume_integral = VolumeIntegralSubcellLimiting(limiter_idp;
+                                                volume_flux_dg=volume_flux,
+                                                volume_flux_fv=surface_flux)
+
+solver = DGSEM(basis, surface_flux, volume_integral)
+
+coordinates_min     = (-2.25, -2.225)
+coordinates_max     = ( 2.20,  2.225)
+mesh                = TreeMesh(coordinates_min, coordinates_max,
+                               initial_refinement_level=3,
+                               n_cells_max=1_000_000)
+
+semi                = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan               = (0.0, 0.01)
+ode                 = semidiscretize(semi, tspan)
+
+summary_callback    = SummaryCallback()
+
+analysis_interval   = 300
+analysis_callback   = AnalysisCallback(semi, interval=analysis_interval,
+                                       extra_analysis_integrals=(Trixi.density,))
+
+alive_callback      = AliveCallback(analysis_interval=analysis_interval)
+
+save_solution       = SaveSolutionCallback(interval=300,
+                                           save_initial_solution=true,
+                                           save_final_solution=true,
+                                           solution_variables=cons2prim)
+
+stepsize_callback   = StepsizeCallback(cfl=0.9)
+
+callbacks           = CallbackSet(summary_callback,
+                                  analysis_callback,
+                                  alive_callback,
+                                  save_solution,
+                                  stepsize_callback)
+
+
+###############################################################################
+# run the simulation
+
+stage_callbacks = (SubcellLimiterIDPCorrection(),)
+
+sol = Trixi.solve(ode, Trixi.SimpleSSPRK33(stage_callbacks=stage_callbacks);
+                  dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+                  save_everystep=false, callback=callbacks);
+summary_callback() # print the timer summary
\ No newline at end of file
diff --git a/src/Trixi.jl b/src/Trixi.jl
index 78ddaa3ca7f..ec4d20558e5 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -121,10 +121,10 @@ include("semidiscretization/semidiscretization_hyperbolic.jl")
 include("semidiscretization/semidiscretization_hyperbolic_parabolic.jl")
 include("semidiscretization/semidiscretization_euler_acoustics.jl")
 include("semidiscretization/semidiscretization_coupled.jl")
+include("time_integration/time_integration.jl")
 include("callbacks_step/callbacks_step.jl")
 include("callbacks_stage/callbacks_stage.jl")
 include("semidiscretization/semidiscretization_euler_gravity.jl")
-include("time_integration/time_integration.jl")
 
 # `trixi_include` and special elixirs such as `convergence_test`
 include("auxiliary/special_elixirs.jl")
@@ -229,6 +229,9 @@ export DG,
        SurfaceIntegralUpwind,
        MortarL2
 
+export VolumeIntegralSubcellLimiting,
+       SubcellLimiterIDP, SubcellLimiterIDPCorrection
+
 export nelements, nnodes, nvariables,
        eachelement, eachnode, eachvariable
 
diff --git a/src/callbacks_stage/callbacks_stage.jl b/src/callbacks_stage/callbacks_stage.jl
index ab0f34efb78..976af327e6f 100644
--- a/src/callbacks_stage/callbacks_stage.jl
+++ b/src/callbacks_stage/callbacks_stage.jl
@@ -6,6 +6,7 @@
 #! format: noindent
 
 include("positivity_zhang_shu.jl")
+include("subcell_limiter_idp_correction.jl")
 # TODO: TrixiShallowWater: move specific limiter file
 include("positivity_shallow_water.jl")
 end # @muladd
diff --git a/src/callbacks_stage/subcell_limiter_idp_correction.jl b/src/callbacks_stage/subcell_limiter_idp_correction.jl
new file mode 100644
index 00000000000..69125ebecd9
--- /dev/null
+++ b/src/callbacks_stage/subcell_limiter_idp_correction.jl
@@ -0,0 +1,69 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+"""
+    SubcellLimiterIDPCorrection()
+
+Perform antidiffusive correction stage for the a posteriori IDP limiter [`SubcellLimiterIDP`](@ref)
+called with [`VolumeIntegralSubcellLimiting`](@ref).
+
+!!! note
+    This callback and the actual limiter [`SubcellLimiterIDP`](@ref) only work together.
+    This is not a replacement but a necessary addition.
+
+## References
+
+- Rueda-Ramírez, Pazner, Gassner (2022)
+  Subcell Limiting Strategies for Discontinuous Galerkin Spectral Element Methods
+  [DOI: 10.1016/j.compfluid.2022.105627](https://doi.org/10.1016/j.compfluid.2022.105627)
+- Pazner (2020)
+  Sparse invariant domain preserving discontinuous Galerkin methods with subcell convex limiting
+  [DOI: 10.1016/j.cma.2021.113876](https://doi.org/10.1016/j.cma.2021.113876)
+
+!!! warning "Experimental implementation"
+    This is an experimental feature and may change in future releases.
+"""
+struct SubcellLimiterIDPCorrection end
+
+function (limiter!::SubcellLimiterIDPCorrection)(u_ode,
+                                                 integrator::Trixi.SimpleIntegratorSSP,
+                                                 stage)
+    semi = integrator.p
+    limiter!(u_ode, semi, integrator.t, integrator.dt,
+             semi.solver.volume_integral)
+end
+
+function (limiter!::SubcellLimiterIDPCorrection)(u_ode, semi, t, dt,
+                                                 volume_integral::VolumeIntegralSubcellLimiting)
+    @trixi_timeit timer() "a posteriori limiter" limiter!(u_ode, semi, t, dt,
+                                                          volume_integral.limiter)
+end
+
+function (limiter!::SubcellLimiterIDPCorrection)(u_ode, semi, t, dt,
+                                                 limiter::SubcellLimiterIDP)
+    mesh, equations, solver, cache = mesh_equations_solver_cache(semi)
+
+    u = wrap_array(u_ode, mesh, equations, solver, cache)
+
+    # Calculate blending factor alpha in [0,1]
+    # f_ij = alpha_ij * f^(FV)_ij + (1 - alpha_ij) * f^(DG)_ij
+    #      = f^(FV)_ij + (1 - alpha_ij) * f^(antidiffusive)_ij
+    @trixi_timeit timer() "blending factors" solver.volume_integral.limiter(u, semi,
+                                                                            solver, t,
+                                                                            dt)
+
+    perform_idp_correction!(u, dt, mesh, equations, solver, cache)
+
+    return nothing
+end
+
+init_callback(limiter!::SubcellLimiterIDPCorrection, semi) = nothing
+
+finalize_callback(limiter!::SubcellLimiterIDPCorrection, semi) = nothing
+
+include("subcell_limiter_idp_correction_2d.jl")
+end # @muladd
diff --git a/src/callbacks_stage/subcell_limiter_idp_correction_2d.jl b/src/callbacks_stage/subcell_limiter_idp_correction_2d.jl
new file mode 100644
index 00000000000..f6b91444578
--- /dev/null
+++ b/src/callbacks_stage/subcell_limiter_idp_correction_2d.jl
@@ -0,0 +1,44 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+function perform_idp_correction!(u, dt, mesh::TreeMesh2D, equations, dg, cache)
+    @unpack inverse_weights = dg.basis
+    @unpack antidiffusive_flux1, antidiffusive_flux2 = cache.antidiffusive_fluxes
+    @unpack alpha1, alpha2 = dg.volume_integral.limiter.cache.subcell_limiter_coefficients
+
+    @threaded for element in eachelement(dg, cache)
+        # Sign switch as in apply_jacobian!
+        inverse_jacobian = -cache.elements.inverse_jacobian[element]
+
+        for j in eachnode(dg), i in eachnode(dg)
+            # Note: antidiffusive_flux1[v, i, xi, element] = antidiffusive_flux2[v, xi, i, element] = 0 for all i in 1:nnodes and xi in {1, nnodes+1}
+            alpha_flux1 = (1 - alpha1[i, j, element]) *
+                          get_node_vars(antidiffusive_flux1, equations, dg, i, j,
+                                        element)
+            alpha_flux1_ip1 = (1 - alpha1[i + 1, j, element]) *
+                              get_node_vars(antidiffusive_flux1, equations, dg, i + 1,
+                                            j, element)
+            alpha_flux2 = (1 - alpha2[i, j, element]) *
+                          get_node_vars(antidiffusive_flux2, equations, dg, i, j,
+                                        element)
+            alpha_flux2_jp1 = (1 - alpha2[i, j + 1, element]) *
+                              get_node_vars(antidiffusive_flux2, equations, dg, i,
+                                            j + 1, element)
+
+            for v in eachvariable(equations)
+                u[v, i, j, element] += dt * inverse_jacobian *
+                                       (inverse_weights[i] *
+                                        (alpha_flux1_ip1[v] - alpha_flux1[v]) +
+                                        inverse_weights[j] *
+                                        (alpha_flux2_jp1[v] - alpha_flux2[v]))
+            end
+        end
+    end
+
+    return nothing
+end
+end # @muladd
diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl
index 495e0ffc4a4..36bbc6de361 100644
--- a/src/solvers/dg.jl
+++ b/src/solvers/dg.jl
@@ -174,6 +174,46 @@ function Base.show(io::IO, ::MIME"text/plain",
     end
 end
 
+"""
+    VolumeIntegralSubcellLimiting(limiter;
+                                  volume_flux_dg, volume_flux_fv)
+
+A subcell limiting volume integral type for DG methods based on subcell blending approaches
+with a low-order FV method. Used with limiter [`SubcellLimiterIDP`](@ref).
+
+!!! warning "Experimental implementation"
+    This is an experimental feature and may change in future releases.
+"""
+struct VolumeIntegralSubcellLimiting{VolumeFluxDG, VolumeFluxFV, Limiter} <:
+       AbstractVolumeIntegral
+    volume_flux_dg::VolumeFluxDG
+    volume_flux_fv::VolumeFluxFV
+    limiter::Limiter
+end
+
+function VolumeIntegralSubcellLimiting(limiter; volume_flux_dg,
+                                       volume_flux_fv)
+    VolumeIntegralSubcellLimiting{typeof(volume_flux_dg), typeof(volume_flux_fv),
+                                  typeof(limiter)}(volume_flux_dg, volume_flux_fv,
+                                                   limiter)
+end
+
+function Base.show(io::IO, mime::MIME"text/plain",
+                   integral::VolumeIntegralSubcellLimiting)
+    @nospecialize integral # reduce precompilation time
+
+    if get(io, :compact, false)
+        show(io, integral)
+    else
+        summary_header(io, "VolumeIntegralSubcellLimiting")
+        summary_line(io, "volume flux DG", integral.volume_flux_dg)
+        summary_line(io, "volume flux FV", integral.volume_flux_fv)
+        summary_line(io, "limiter", integral.limiter |> typeof |> nameof)
+        show(increment_indent(io), mime, integral.limiter)
+        summary_footer(io)
+    end
+end
+
 # TODO: FD. Should this definition live in a different file because it is
 # not strictly a DG method?
 """
diff --git a/src/solvers/dgsem_tree/containers_2d.jl b/src/solvers/dgsem_tree/containers_2d.jl
index d80522d42fd..9148b936312 100644
--- a/src/solvers/dgsem_tree/containers_2d.jl
+++ b/src/solvers/dgsem_tree/containers_2d.jl
@@ -77,7 +77,7 @@ end
     eachelement(elements::ElementContainer2D)
 
 Return an iterator over the indices that specify the location in relevant data structures
-for the elements in `elements`. 
+for the elements in `elements`.
 In particular, not the elements themselves are returned.
 """
 @inline eachelement(elements::ElementContainer2D) = Base.OneTo(nelements(elements))
@@ -1254,4 +1254,138 @@ function init_mpi_mortars!(mpi_mortars, elements, mesh::TreeMesh2D)
 
     return nothing
 end
+
+# Container data structure (structure-of-arrays style) for FCT-type antidiffusive fluxes
+#                            (i, j+1)
+#                               |
+#                          flux2(i, j+1)
+#                               |
+# (i-1, j) ---flux1(i, j)--- (i, j) ---flux1(i+1, j)--- (i+1, j)
+#                               |
+#                          flux2(i, j)
+#                               |
+#                            (i, j-1)
+mutable struct ContainerAntidiffusiveFlux2D{uEltype <: Real}
+    antidiffusive_flux1::Array{uEltype, 4} # [variables, i, j, elements]
+    antidiffusive_flux2::Array{uEltype, 4} # [variables, i, j, elements]
+    # internal `resize!`able storage
+    _antidiffusive_flux1::Vector{uEltype}
+    _antidiffusive_flux2::Vector{uEltype}
+end
+
+function ContainerAntidiffusiveFlux2D{uEltype}(capacity::Integer, n_variables,
+                                               n_nodes) where {uEltype <: Real}
+    nan_uEltype = convert(uEltype, NaN)
+
+    # Initialize fields with defaults
+    _antidiffusive_flux1 = fill(nan_uEltype,
+                                n_variables * (n_nodes + 1) * n_nodes * capacity)
+    antidiffusive_flux1 = unsafe_wrap(Array, pointer(_antidiffusive_flux1),
+                                      (n_variables, n_nodes + 1, n_nodes, capacity))
+
+    _antidiffusive_flux2 = fill(nan_uEltype,
+                                n_variables * n_nodes * (n_nodes + 1) * capacity)
+    antidiffusive_flux2 = unsafe_wrap(Array, pointer(_antidiffusive_flux2),
+                                      (n_variables, n_nodes, n_nodes + 1, capacity))
+
+    return ContainerAntidiffusiveFlux2D{uEltype}(antidiffusive_flux1,
+                                                 antidiffusive_flux2,
+                                                 _antidiffusive_flux1,
+                                                 _antidiffusive_flux2)
+end
+
+nvariables(fluxes::ContainerAntidiffusiveFlux2D) = size(fluxes.antidiffusive_flux1, 1)
+nnodes(fluxes::ContainerAntidiffusiveFlux2D) = size(fluxes.antidiffusive_flux1, 3)
+
+# Only one-dimensional `Array`s are `resize!`able in Julia.
+# Hence, we use `Vector`s as internal storage and `resize!`
+# them whenever needed. Then, we reuse the same memory by
+# `unsafe_wrap`ping multi-dimensional `Array`s around the
+# internal storage.
+function Base.resize!(fluxes::ContainerAntidiffusiveFlux2D, capacity)
+    n_nodes = nnodes(fluxes)
+    n_variables = nvariables(fluxes)
+
+    @unpack _antidiffusive_flux1, _antidiffusive_flux2 = fluxes
+
+    resize!(_antidiffusive_flux1, n_variables * (n_nodes + 1) * n_nodes * capacity)
+    fluxes.antidiffusive_flux1 = unsafe_wrap(Array, pointer(_antidiffusive_flux1),
+                                             (n_variables, n_nodes + 1, n_nodes,
+                                              capacity))
+    resize!(_antidiffusive_flux2, n_variables * n_nodes * (n_nodes + 1) * capacity)
+    fluxes.antidiffusive_flux2 = unsafe_wrap(Array, pointer(_antidiffusive_flux2),
+                                             (n_variables, n_nodes, n_nodes + 1,
+                                              capacity))
+
+    return nothing
+end
+
+# Container data structure (structure-of-arrays style) for variables used for IDP limiting
+mutable struct ContainerSubcellLimiterIDP2D{uEltype <: Real}
+    alpha::Array{uEltype, 3}                  # [i, j, element]
+    alpha1::Array{uEltype, 3}
+    alpha2::Array{uEltype, 3}
+    variable_bounds::Vector{Array{uEltype, 3}}
+    # internal `resize!`able storage
+    _alpha::Vector{uEltype}
+    _alpha1::Vector{uEltype}
+    _alpha2::Vector{uEltype}
+    _variable_bounds::Vector{Vector{uEltype}}
+end
+
+function ContainerSubcellLimiterIDP2D{uEltype}(capacity::Integer, n_nodes,
+                                               length) where {uEltype <: Real}
+    nan_uEltype = convert(uEltype, NaN)
+
+    # Initialize fields with defaults
+    _alpha = fill(nan_uEltype, n_nodes * n_nodes * capacity)
+    alpha = unsafe_wrap(Array, pointer(_alpha), (n_nodes, n_nodes, capacity))
+    _alpha1 = fill(nan_uEltype, (n_nodes + 1) * n_nodes * capacity)
+    alpha1 = unsafe_wrap(Array, pointer(_alpha1), (n_nodes + 1, n_nodes, capacity))
+    _alpha2 = fill(nan_uEltype, n_nodes * (n_nodes + 1) * capacity)
+    alpha2 = unsafe_wrap(Array, pointer(_alpha2), (n_nodes, n_nodes + 1, capacity))
+
+    _variable_bounds = Vector{Vector{uEltype}}(undef, length)
+    variable_bounds = Vector{Array{uEltype, 3}}(undef, length)
+    for i in 1:length
+        _variable_bounds[i] = fill(nan_uEltype, n_nodes * n_nodes * capacity)
+        variable_bounds[i] = unsafe_wrap(Array, pointer(_variable_bounds[i]),
+                                         (n_nodes, n_nodes, capacity))
+    end
+
+    return ContainerSubcellLimiterIDP2D{uEltype}(alpha, alpha1, alpha2,
+                                                 variable_bounds,
+                                                 _alpha, _alpha1, _alpha2,
+                                                 _variable_bounds)
+end
+
+nnodes(container::ContainerSubcellLimiterIDP2D) = size(container.alpha, 1)
+
+# Only one-dimensional `Array`s are `resize!`able in Julia.
+# Hence, we use `Vector`s as internal storage and `resize!`
+# them whenever needed. Then, we reuse the same memory by
+# `unsafe_wrap`ping multi-dimensional `Array`s around the
+# internal storage.
+function Base.resize!(container::ContainerSubcellLimiterIDP2D, capacity)
+    n_nodes = nnodes(container)
+
+    @unpack _alpha, _alpha1, _alpha2 = container
+    resize!(_alpha, n_nodes * n_nodes * capacity)
+    container.alpha = unsafe_wrap(Array, pointer(_alpha), (n_nodes, n_nodes, capacity))
+    resize!(_alpha1, (n_nodes + 1) * n_nodes * capacity)
+    container.alpha1 = unsafe_wrap(Array, pointer(_alpha1),
+                                   (n_nodes + 1, n_nodes, capacity))
+    resize!(_alpha2, n_nodes * (n_nodes + 1) * capacity)
+    container.alpha2 = unsafe_wrap(Array, pointer(_alpha2),
+                                   (n_nodes, n_nodes + 1, capacity))
+
+    @unpack _variable_bounds = container
+    for i in 1:length(_variable_bounds)
+        resize!(_variable_bounds[i], n_nodes * n_nodes * capacity)
+        container.variable_bounds[i] = unsafe_wrap(Array, pointer(_variable_bounds[i]),
+                                                   (n_nodes, n_nodes, capacity))
+    end
+
+    return nothing
+end
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg.jl b/src/solvers/dgsem_tree/dg.jl
index cb28dad968c..6e02bc1d94a 100644
--- a/src/solvers/dgsem_tree/dg.jl
+++ b/src/solvers/dgsem_tree/dg.jl
@@ -71,4 +71,9 @@ include("dg_3d_parabolic.jl")
 # as well as specialized implementations used to improve performance
 include("dg_2d_compressible_euler.jl")
 include("dg_3d_compressible_euler.jl")
+
+# Subcell limiters
+include("subcell_limiters.jl")
+include("subcell_limiters_2d.jl")
+include("dg_2d_subcell_limiters.jl")
 end # @muladd
diff --git a/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl
new file mode 100644
index 00000000000..70ff346740d
--- /dev/null
+++ b/src/solvers/dgsem_tree/dg_2d_subcell_limiters.jl
@@ -0,0 +1,193 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+function create_cache(mesh::TreeMesh{2}, equations,
+                      volume_integral::VolumeIntegralSubcellLimiting, dg::DG, uEltype)
+    cache = create_cache(mesh, equations,
+                         VolumeIntegralPureLGLFiniteVolume(volume_integral.volume_flux_fv),
+                         dg, uEltype)
+
+    A3dp1_x = Array{uEltype, 3}
+    A3dp1_y = Array{uEltype, 3}
+    A3d = Array{uEltype, 3}
+
+    fhat1_threaded = A3dp1_x[A3dp1_x(undef, nvariables(equations), nnodes(dg) + 1,
+                                     nnodes(dg)) for _ in 1:Threads.nthreads()]
+    fhat2_threaded = A3dp1_y[A3dp1_y(undef, nvariables(equations), nnodes(dg),
+                                     nnodes(dg) + 1) for _ in 1:Threads.nthreads()]
+    flux_temp_threaded = A3d[A3d(undef, nvariables(equations), nnodes(dg), nnodes(dg))
+                             for _ in 1:Threads.nthreads()]
+
+    antidiffusive_fluxes = Trixi.ContainerAntidiffusiveFlux2D{uEltype}(0,
+                                                                       nvariables(equations),
+                                                                       nnodes(dg))
+
+    return (; cache..., antidiffusive_fluxes, fhat1_threaded, fhat2_threaded,
+            flux_temp_threaded)
+end
+
+function calc_volume_integral!(du, u,
+                               mesh::TreeMesh{2},
+                               nonconservative_terms, equations,
+                               volume_integral::VolumeIntegralSubcellLimiting,
+                               dg::DGSEM, cache)
+    @unpack limiter = volume_integral
+
+    @threaded for element in eachelement(dg, cache)
+        subcell_limiting_kernel!(du, u, element, mesh,
+                                 nonconservative_terms, equations,
+                                 volume_integral, limiter,
+                                 dg, cache)
+    end
+end
+
+@inline function subcell_limiting_kernel!(du, u,
+                                          element, mesh::TreeMesh{2},
+                                          nonconservative_terms::False, equations,
+                                          volume_integral, limiter::SubcellLimiterIDP,
+                                          dg::DGSEM, cache)
+    @unpack inverse_weights = dg.basis
+    @unpack volume_flux_dg, volume_flux_fv = volume_integral
+
+    # high-order DG fluxes
+    @unpack fhat1_threaded, fhat2_threaded = cache
+
+    fhat1 = fhat1_threaded[Threads.threadid()]
+    fhat2 = fhat2_threaded[Threads.threadid()]
+    calcflux_fhat!(fhat1, fhat2, u, mesh,
+                   nonconservative_terms, equations, volume_flux_dg, dg, element, cache)
+
+    # low-order FV fluxes
+    @unpack fstar1_L_threaded, fstar1_R_threaded, fstar2_L_threaded, fstar2_R_threaded = cache
+
+    fstar1_L = fstar1_L_threaded[Threads.threadid()]
+    fstar2_L = fstar2_L_threaded[Threads.threadid()]
+    fstar1_R = fstar1_R_threaded[Threads.threadid()]
+    fstar2_R = fstar2_R_threaded[Threads.threadid()]
+    calcflux_fv!(fstar1_L, fstar1_R, fstar2_L, fstar2_R, u, mesh,
+                 nonconservative_terms, equations, volume_flux_fv, dg, element, cache)
+
+    # antidiffusive flux
+    calcflux_antidiffusive!(fhat1, fhat2, fstar1_L, fstar2_L, u, mesh,
+                            nonconservative_terms, equations, limiter, dg, element,
+                            cache)
+
+    # Calculate volume integral contribution of low-order FV flux
+    for j in eachnode(dg), i in eachnode(dg)
+        for v in eachvariable(equations)
+            du[v, i, j, element] += inverse_weights[i] *
+                                    (fstar1_L[v, i + 1, j] - fstar1_R[v, i, j]) +
+                                    inverse_weights[j] *
+                                    (fstar2_L[v, i, j + 1] - fstar2_R[v, i, j])
+        end
+    end
+
+    return nothing
+end
+
+# Calculate the DG staggered volume fluxes `fhat` in subcell FV-form inside the element
+# (**without non-conservative terms**).
+#
+# See also `flux_differencing_kernel!`.
+@inline function calcflux_fhat!(fhat1, fhat2, u,
+                                mesh::TreeMesh{2}, nonconservative_terms::False,
+                                equations,
+                                volume_flux, dg::DGSEM, element, cache)
+    @unpack weights, derivative_split = dg.basis
+    @unpack flux_temp_threaded = cache
+
+    flux_temp = flux_temp_threaded[Threads.threadid()]
+
+    # The FV-form fluxes are calculated in a recursive manner, i.e.:
+    # fhat_(0,1)   = w_0 * FVol_0,
+    # fhat_(j,j+1) = fhat_(j-1,j) + w_j * FVol_j,   for j=1,...,N-1,
+    # with the split form volume fluxes FVol_j = -2 * sum_i=0^N D_ji f*_(j,i).
+
+    # To use the symmetry of the `volume_flux`, the split form volume flux is precalculated
+    # like in `calc_volume_integral!` for the `VolumeIntegralFluxDifferencing`
+    # and saved in in `flux_temp`.
+
+    # Split form volume flux in orientation 1: x direction
+    flux_temp .= zero(eltype(flux_temp))
+
+    for j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, element)
+
+        # All diagonal entries of `derivative_split` are zero. Thus, we can skip
+        # the computation of the diagonal terms. In addition, we use the symmetry
+        # of the `volume_flux` to save half of the possible two-point flux
+        # computations.
+        for ii in (i + 1):nnodes(dg)
+            u_node_ii = get_node_vars(u, equations, dg, ii, j, element)
+            flux1 = volume_flux(u_node, u_node_ii, 1, equations)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[i, ii], flux1,
+                                       equations, dg, i, j)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[ii, i], flux1,
+                                       equations, dg, ii, j)
+        end
+    end
+
+    # FV-form flux `fhat` in x direction
+    fhat1[:, 1, :] .= zero(eltype(fhat1))
+    fhat1[:, nnodes(dg) + 1, :] .= zero(eltype(fhat1))
+
+    for j in eachnode(dg), i in 1:(nnodes(dg) - 1), v in eachvariable(equations)
+        fhat1[v, i + 1, j] = fhat1[v, i, j] + weights[i] * flux_temp[v, i, j]
+    end
+
+    # Split form volume flux in orientation 2: y direction
+    flux_temp .= zero(eltype(flux_temp))
+
+    for j in eachnode(dg), i in eachnode(dg)
+        u_node = get_node_vars(u, equations, dg, i, j, element)
+        for jj in (j + 1):nnodes(dg)
+            u_node_jj = get_node_vars(u, equations, dg, i, jj, element)
+            flux2 = volume_flux(u_node, u_node_jj, 2, equations)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[j, jj], flux2,
+                                       equations, dg, i, j)
+            multiply_add_to_node_vars!(flux_temp, derivative_split[jj, j], flux2,
+                                       equations, dg, i, jj)
+        end
+    end
+
+    # FV-form flux `fhat` in y direction
+    fhat2[:, :, 1] .= zero(eltype(fhat2))
+    fhat2[:, :, nnodes(dg) + 1] .= zero(eltype(fhat2))
+
+    for j in 1:(nnodes(dg) - 1), i in eachnode(dg), v in eachvariable(equations)
+        fhat2[v, i, j + 1] = fhat2[v, i, j] + weights[j] * flux_temp[v, i, j]
+    end
+
+    return nothing
+end
+
+# Calculate the antidiffusive flux `antidiffusive_flux` as the subtraction between `fhat` and `fstar`.
+@inline function calcflux_antidiffusive!(fhat1, fhat2, fstar1, fstar2, u, mesh,
+                                         nonconservative_terms, equations,
+                                         limiter::SubcellLimiterIDP, dg, element, cache)
+    @unpack antidiffusive_flux1, antidiffusive_flux2 = cache.antidiffusive_fluxes
+
+    for j in eachnode(dg), i in 2:nnodes(dg)
+        for v in eachvariable(equations)
+            antidiffusive_flux1[v, i, j, element] = fhat1[v, i, j] - fstar1[v, i, j]
+        end
+    end
+    for j in 2:nnodes(dg), i in eachnode(dg)
+        for v in eachvariable(equations)
+            antidiffusive_flux2[v, i, j, element] = fhat2[v, i, j] - fstar2[v, i, j]
+        end
+    end
+
+    antidiffusive_flux1[:, 1, :, element] .= zero(eltype(antidiffusive_flux1))
+    antidiffusive_flux1[:, nnodes(dg) + 1, :, element] .= zero(eltype(antidiffusive_flux1))
+
+    antidiffusive_flux2[:, :, 1, element] .= zero(eltype(antidiffusive_flux2))
+    antidiffusive_flux2[:, :, nnodes(dg) + 1, element] .= zero(eltype(antidiffusive_flux2))
+
+    return nothing
+end
+end # @muladd
diff --git a/src/solvers/dgsem_tree/subcell_limiters.jl b/src/solvers/dgsem_tree/subcell_limiters.jl
new file mode 100644
index 00000000000..3a707de3bc7
--- /dev/null
+++ b/src/solvers/dgsem_tree/subcell_limiters.jl
@@ -0,0 +1,103 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+abstract type AbstractSubcellLimiter end
+
+function create_cache(typ::Type{LimiterType},
+                      semi) where {LimiterType <: AbstractSubcellLimiter}
+    create_cache(typ, mesh_equations_solver_cache(semi)...)
+end
+
+"""
+    SubcellLimiterIDP(equations::AbstractEquations, basis;
+                      positivity_variables_cons = [],
+                      positivity_correction_factor = 0.1)
+
+Subcell invariant domain preserving (IDP) limiting used with [`VolumeIntegralSubcellLimiting`](@ref)
+including:
+- positivity limiting for conservative variables (`positivity_variables_cons`)
+
+The bounds are calculated using the low-order FV solution. The positivity limiter uses
+`positivity_correction_factor` such that `u^new >= positivity_correction_factor * u^FV`.
+
+!!! note
+    This limiter and the correction callback [`SubcellLimiterIDPCorrection`](@ref) only work together.
+    Without the callback, no limiting takes place, leading to a standard flux-differencing DGSEM scheme.
+
+## References
+
+- Rueda-Ramírez, Pazner, Gassner (2022)
+  Subcell Limiting Strategies for Discontinuous Galerkin Spectral Element Methods
+  [DOI: 10.1016/j.compfluid.2022.105627](https://doi.org/10.1016/j.compfluid.2022.105627)
+- Pazner (2020)
+  Sparse invariant domain preserving discontinuous Galerkin methods with subcell convex limiting
+  [DOI: 10.1016/j.cma.2021.113876](https://doi.org/10.1016/j.cma.2021.113876)
+
+!!! warning "Experimental implementation"
+    This is an experimental feature and may change in future releases.
+"""
+struct SubcellLimiterIDP{RealT <: Real, Cache} <: AbstractSubcellLimiter
+    positivity::Bool
+    positivity_variables_cons::Vector{Int}                     # Positivity for conservative variables
+    positivity_correction_factor::RealT
+    cache::Cache
+end
+
+# this method is used when the indicator is constructed as for shock-capturing volume integrals
+function SubcellLimiterIDP(equations::AbstractEquations, basis;
+                           positivity_variables_cons = [],
+                           positivity_correction_factor = 0.1)
+    positivity = (length(positivity_variables_cons) > 0)
+    number_bounds = length(positivity_variables_cons)
+
+    cache = create_cache(SubcellLimiterIDP, equations, basis, number_bounds)
+
+    SubcellLimiterIDP{typeof(positivity_correction_factor), typeof(cache)}(positivity,
+                                                                           positivity_variables_cons,
+                                                                           positivity_correction_factor,
+                                                                           cache)
+end
+
+function Base.show(io::IO, limiter::SubcellLimiterIDP)
+    @nospecialize limiter # reduce precompilation time
+    @unpack positivity = limiter
+
+    print(io, "SubcellLimiterIDP(")
+    if !(positivity)
+        print(io, "No limiter selected => pure DG method")
+    else
+        print(io, "limiter=(")
+        positivity && print(io, "positivity")
+        print(io, "), ")
+    end
+    print(io, ")")
+end
+
+function Base.show(io::IO, ::MIME"text/plain", limiter::SubcellLimiterIDP)
+    @nospecialize limiter # reduce precompilation time
+    @unpack positivity = limiter
+
+    if get(io, :compact, false)
+        show(io, limiter)
+    else
+        if !(positivity)
+            setup = ["limiter" => "No limiter selected => pure DG method"]
+        else
+            setup = ["limiter" => ""]
+            if positivity
+                string = "positivity with conservative variables $(limiter.positivity_variables_cons)"
+                setup = [setup..., "" => string]
+                setup = [
+                    setup...,
+                    "" => "   positivity correction factor = $(limiter.positivity_correction_factor)",
+                ]
+            end
+        end
+        summary_box(io, "SubcellLimiterIDP", setup)
+    end
+end
+end # @muladd
diff --git a/src/solvers/dgsem_tree/subcell_limiters_2d.jl b/src/solvers/dgsem_tree/subcell_limiters_2d.jl
new file mode 100644
index 00000000000..09ab84ed11a
--- /dev/null
+++ b/src/solvers/dgsem_tree/subcell_limiters_2d.jl
@@ -0,0 +1,114 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+# this method is used when the limiter is constructed as for shock-capturing volume integrals
+function create_cache(indicator::Type{SubcellLimiterIDP},
+                      equations::AbstractEquations{2},
+                      basis::LobattoLegendreBasis, number_bounds)
+    subcell_limiter_coefficients = Trixi.ContainerSubcellLimiterIDP2D{real(basis)
+                                                                      }(0,
+                                                                        nnodes(basis),
+                                                                        number_bounds)
+
+    cache = (; subcell_limiter_coefficients)
+
+    return cache
+end
+
+function (limiter::SubcellLimiterIDP)(u::AbstractArray{<:Any, 4}, semi, dg::DGSEM, t,
+                                      dt;
+                                      kwargs...)
+    @unpack alpha = limiter.cache.subcell_limiter_coefficients
+    alpha .= zero(eltype(alpha))
+
+    if limiter.positivity
+        @trixi_timeit timer() "positivity" idp_positivity!(alpha, limiter, u, dt,
+                                                           semi)
+    end
+
+    # Calculate alpha1 and alpha2
+    @unpack alpha1, alpha2 = limiter.cache.subcell_limiter_coefficients
+    @threaded for element in eachelement(dg, semi.cache)
+        for j in eachnode(dg), i in 2:nnodes(dg)
+            alpha1[i, j, element] = max(alpha[i - 1, j, element], alpha[i, j, element])
+        end
+        for j in 2:nnodes(dg), i in eachnode(dg)
+            alpha2[i, j, element] = max(alpha[i, j - 1, element], alpha[i, j, element])
+        end
+        alpha1[1, :, element] .= zero(eltype(alpha1))
+        alpha1[nnodes(dg) + 1, :, element] .= zero(eltype(alpha1))
+        alpha2[:, 1, element] .= zero(eltype(alpha2))
+        alpha2[:, nnodes(dg) + 1, element] .= zero(eltype(alpha2))
+    end
+
+    return nothing
+end
+
+@inline function idp_positivity!(alpha, limiter, u, dt, semi)
+    # Conservative variables
+    for (index, variable) in enumerate(limiter.positivity_variables_cons)
+        idp_positivity!(alpha, limiter, u, dt, semi, variable, index)
+    end
+
+    return nothing
+end
+
+@inline function idp_positivity!(alpha, limiter, u, dt, semi, variable, index)
+    mesh, equations, dg, cache = mesh_equations_solver_cache(semi)
+    @unpack antidiffusive_flux1, antidiffusive_flux2 = cache.antidiffusive_fluxes
+    @unpack inverse_weights = dg.basis
+    @unpack positivity_correction_factor = limiter
+
+    @unpack variable_bounds = limiter.cache.subcell_limiter_coefficients
+
+    var_min = variable_bounds[index]
+
+    @threaded for element in eachelement(dg, semi.cache)
+        inverse_jacobian = cache.elements.inverse_jacobian[element]
+        for j in eachnode(dg), i in eachnode(dg)
+            var = u[variable, i, j, element]
+            if var < 0
+                error("Safe $variable is not safe. element=$element, node: $i $j, value=$var")
+            end
+
+            # Compute bound
+            var_min[i, j, element] = positivity_correction_factor * var
+
+            # Real one-sided Zalesak-type limiter
+            # * Zalesak (1979). "Fully multidimensional flux-corrected transport algorithms for fluids"
+            # * Kuzmin et al. (2010). "Failsafe flux limiting and constrained data projections for equations of gas dynamics"
+            # Note: The Zalesak limiter has to be computed, even if the state is valid, because the correction is
+            #       for each interface, not each node
+            Qm = min(0, (var_min[i, j, element] - var) / dt)
+
+            # Calculate Pm
+            # Note: Boundaries of antidiffusive_flux1/2 are constant 0, so they make no difference here.
+            val_flux1_local = inverse_weights[i] *
+                              antidiffusive_flux1[variable, i, j, element]
+            val_flux1_local_ip1 = -inverse_weights[i] *
+                                  antidiffusive_flux1[variable, i + 1, j, element]
+            val_flux2_local = inverse_weights[j] *
+                              antidiffusive_flux2[variable, i, j, element]
+            val_flux2_local_jp1 = -inverse_weights[j] *
+                                  antidiffusive_flux2[variable, i, j + 1, element]
+
+            Pm = min(0, val_flux1_local) + min(0, val_flux1_local_ip1) +
+                 min(0, val_flux2_local) + min(0, val_flux2_local_jp1)
+            Pm = inverse_jacobian * Pm
+
+            # Compute blending coefficient avoiding division by zero
+            # (as in paper of [Guermond, Nazarov, Popov, Thomas] (4.8))
+            Qm = abs(Qm) / (abs(Pm) + eps(typeof(Qm)) * 100)
+
+            # Calculate alpha
+            alpha[i, j, element] = max(alpha[i, j, element], 1 - Qm)
+        end
+    end
+
+    return nothing
+end
+end # @muladd
diff --git a/src/time_integration/methods_SSP.jl b/src/time_integration/methods_SSP.jl
new file mode 100644
index 00000000000..8ecad69748b
--- /dev/null
+++ b/src/time_integration/methods_SSP.jl
@@ -0,0 +1,241 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+# Abstract base type for time integration schemes of explicit strong stability-preserving (SSP)
+# Runge-Kutta (RK) methods. They are high-order time discretizations that guarantee the TVD property.
+abstract type SimpleAlgorithmSSP end
+
+"""
+    SimpleSSPRK33(; stage_callbacks=())
+
+The third-order SSP Runge-Kutta method of Shu and Osher.
+
+## References
+
+- Shu, Osher (1988)
+  "Efficient Implementation of Essentially Non-oscillatory Shock-Capturing Schemes" (Eq. 2.18)
+  [DOI: 10.1016/0021-9991(88)90177-5](https://doi.org/10.1016/0021-9991(88)90177-5)
+
+!!! warning "Experimental implementation"
+    This is an experimental feature and may change in future releases.
+"""
+struct SimpleSSPRK33{StageCallbacks} <: SimpleAlgorithmSSP
+    a::SVector{3, Float64}
+    b::SVector{3, Float64}
+    c::SVector{3, Float64}
+    stage_callbacks::StageCallbacks
+
+    function SimpleSSPRK33(; stage_callbacks = ())
+        a = SVector(0.0, 3 / 4, 1 / 3)
+        b = SVector(1.0, 1 / 4, 2 / 3)
+        c = SVector(0.0, 1.0, 1 / 2)
+
+        # Butcher tableau
+        #   c |       a
+        #   0 |
+        #   1 |   1
+        # 1/2 | 1/4  1/4
+        # --------------------
+        #   b | 1/6  1/6  2/3
+
+        new{typeof(stage_callbacks)}(a, b, c, stage_callbacks)
+    end
+end
+
+# This struct is needed to fake https://github.com/SciML/OrdinaryDiffEq.jl/blob/0c2048a502101647ac35faabd80da8a5645beac7/src/integrators/type.jl#L1
+mutable struct SimpleIntegratorSSPOptions{Callback}
+    callback::Callback # callbacks; used in Trixi
+    adaptive::Bool # whether the algorithm is adaptive; ignored
+    dtmax::Float64 # ignored
+    maxiters::Int # maximal number of time steps
+    tstops::Vector{Float64} # tstops from https://diffeq.sciml.ai/v6.8/basics/common_solver_opts/#Output-Control-1; ignored
+end
+
+function SimpleIntegratorSSPOptions(callback, tspan; maxiters = typemax(Int), kwargs...)
+    SimpleIntegratorSSPOptions{typeof(callback)}(callback, false, Inf, maxiters,
+                                                 [last(tspan)])
+end
+
+# This struct is needed to fake https://github.com/SciML/OrdinaryDiffEq.jl/blob/0c2048a502101647ac35faabd80da8a5645beac7/src/integrators/type.jl#L77
+# This implements the interface components described at
+# https://diffeq.sciml.ai/v6.8/basics/integrator/#Handing-Integrators-1
+# which are used in Trixi.
+mutable struct SimpleIntegratorSSP{RealT <: Real, uType, Params, Sol, F, Alg,
+                                   SimpleIntegratorSSPOptions}
+    u::uType
+    du::uType
+    r0::uType
+    t::RealT
+    dt::RealT # current time step
+    dtcache::RealT # ignored
+    iter::Int # current number of time steps (iteration)
+    p::Params # will be the semidiscretization from Trixi
+    sol::Sol # faked
+    f::F
+    alg::Alg
+    opts::SimpleIntegratorSSPOptions
+    finalstep::Bool # added for convenience
+end
+
+# Forward integrator.stats.naccept to integrator.iter (see GitHub PR#771)
+function Base.getproperty(integrator::SimpleIntegratorSSP, field::Symbol)
+    if field === :stats
+        return (naccept = getfield(integrator, :iter),)
+    end
+    # general fallback
+    return getfield(integrator, field)
+end
+
+"""
+    solve(ode, alg; dt, callbacks, kwargs...)
+
+The following structures and methods provide the infrastructure for SSP Runge-Kutta methods
+of type `SimpleAlgorithmSSP`.
+
+!!! warning "Experimental implementation"
+    This is an experimental feature and may change in future releases.
+"""
+function solve(ode::ODEProblem, alg = SimpleSSPRK33()::SimpleAlgorithmSSP;
+               dt, callback = nothing, kwargs...)
+    u = copy(ode.u0)
+    du = similar(u)
+    r0 = similar(u)
+    t = first(ode.tspan)
+    iter = 0
+    integrator = SimpleIntegratorSSP(u, du, r0, t, dt, zero(dt), iter, ode.p,
+                                     (prob = ode,), ode.f, alg,
+                                     SimpleIntegratorSSPOptions(callback, ode.tspan;
+                                                                kwargs...), false)
+
+    # resize container
+    resize!(integrator.p, nelements(integrator.p.solver, integrator.p.cache))
+
+    # initialize callbacks
+    if callback isa CallbackSet
+        for cb in callback.continuous_callbacks
+            error("unsupported")
+        end
+        for cb in callback.discrete_callbacks
+            cb.initialize(cb, integrator.u, integrator.t, integrator)
+        end
+    elseif !isnothing(callback)
+        error("unsupported")
+    end
+
+    for stage_callback in alg.stage_callbacks
+        init_callback(stage_callback, integrator.p)
+    end
+
+    solve!(integrator)
+end
+
+function solve!(integrator::SimpleIntegratorSSP)
+    @unpack prob = integrator.sol
+    @unpack alg = integrator
+    t_end = last(prob.tspan)
+    callbacks = integrator.opts.callback
+
+    integrator.finalstep = false
+    while !integrator.finalstep
+        if isnan(integrator.dt)
+            error("time step size `dt` is NaN")
+        end
+
+        # if the next iteration would push the simulation beyond the end time, set dt accordingly
+        if integrator.t + integrator.dt > t_end ||
+           isapprox(integrator.t + integrator.dt, t_end)
+            integrator.dt = t_end - integrator.t
+            terminate!(integrator)
+        end
+
+        @. integrator.r0 = integrator.u
+        for stage in eachindex(alg.c)
+            t_stage = integrator.t + integrator.dt * alg.c[stage]
+            # compute du
+            integrator.f(integrator.du, integrator.u, integrator.p, t_stage)
+
+            # perform forward Euler step
+            @. integrator.u = integrator.u + integrator.dt * integrator.du
+
+            for stage_callback in alg.stage_callbacks
+                stage_callback(integrator.u, integrator, stage)
+            end
+
+            # perform convex combination
+            @. integrator.u = alg.a[stage] * integrator.r0 + alg.b[stage] * integrator.u
+        end
+
+        integrator.iter += 1
+        integrator.t += integrator.dt
+
+        # handle callbacks
+        if callbacks isa CallbackSet
+            for cb in callbacks.discrete_callbacks
+                if cb.condition(integrator.u, integrator.t, integrator)
+                    cb.affect!(integrator)
+                end
+            end
+        end
+
+        # respect maximum number of iterations
+        if integrator.iter >= integrator.opts.maxiters && !integrator.finalstep
+            @warn "Interrupted. Larger maxiters is needed."
+            terminate!(integrator)
+        end
+    end
+
+    for stage_callback in alg.stage_callbacks
+        finalize_callback(stage_callback, integrator.p)
+    end
+
+    return TimeIntegratorSolution((first(prob.tspan), integrator.t),
+                                  (prob.u0, integrator.u), prob)
+end
+
+# get a cache where the RHS can be stored
+get_du(integrator::SimpleIntegratorSSP) = integrator.du
+get_tmp_cache(integrator::SimpleIntegratorSSP) = (integrator.r0,)
+
+# some algorithms from DiffEq like FSAL-ones need to be informed when a callback has modified u
+u_modified!(integrator::SimpleIntegratorSSP, ::Bool) = false
+
+# used by adaptive timestepping algorithms in DiffEq
+function set_proposed_dt!(integrator::SimpleIntegratorSSP, dt)
+    integrator.dt = dt
+end
+
+# stop the time integration
+function terminate!(integrator::SimpleIntegratorSSP)
+    integrator.finalstep = true
+    empty!(integrator.opts.tstops)
+end
+
+# used for AMR
+function Base.resize!(integrator::SimpleIntegratorSSP, new_size)
+    resize!(integrator.u, new_size)
+    resize!(integrator.du, new_size)
+    resize!(integrator.r0, new_size)
+
+    # Resize container
+    resize!(integrator.p, new_size)
+end
+
+function Base.resize!(semi::AbstractSemidiscretization, new_size)
+    resize!(semi, semi.solver.volume_integral, new_size)
+end
+
+Base.resize!(semi, volume_integral::AbstractVolumeIntegral, new_size) = nothing
+
+function Base.resize!(semi, volume_integral::VolumeIntegralSubcellLimiting, new_size)
+    # Resize container antidiffusive_fluxes
+    resize!(semi.cache.antidiffusive_fluxes, new_size)
+
+    # Resize container subcell_limiter_coefficients
+    @unpack limiter = volume_integral
+    resize!(limiter.cache.subcell_limiter_coefficients, new_size)
+end
+end # @muladd
diff --git a/src/time_integration/time_integration.jl b/src/time_integration/time_integration.jl
index 539e00ff700..c1e53527121 100644
--- a/src/time_integration/time_integration.jl
+++ b/src/time_integration/time_integration.jl
@@ -15,4 +15,5 @@ end
 
 include("methods_2N.jl")
 include("methods_3Sstar.jl")
+include("methods_SSP.jl")
 end # @muladd
diff --git a/test/test_tree_2d_euler.jl b/test/test_tree_2d_euler.jl
index 6de380288db..e1e3ad32e7d 100644
--- a/test/test_tree_2d_euler.jl
+++ b/test/test_tree_2d_euler.jl
@@ -63,6 +63,12 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_2d_dgsem")
       linf = [0.18527440131928286, 0.2404798030563736, 0.23269573860381076, 0.6874012187446894])
   end
 
+  @trixi_testset "elixir_euler_shockcapturing_subcell.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_shockcapturing_subcell.jl"),
+      l2   = [0.08508147906199143, 0.04510299017724501, 0.045103019801950375, 0.6930704343869766],
+      linf = [0.31123546471463326, 0.5616274869594462, 0.5619692712224448, 2.88670199345138])
+  end
+
   @trixi_testset "elixir_euler_blast_wave.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_blast_wave.jl"),
       l2   = [0.14170569763947993, 0.11647068900798814, 0.11647072556898294, 0.3391989213659599],
diff --git a/test/test_tree_2d_eulermulti.jl b/test/test_tree_2d_eulermulti.jl
index 800dc31f84f..606afca1034 100644
--- a/test/test_tree_2d_eulermulti.jl
+++ b/test/test_tree_2d_eulermulti.jl
@@ -19,6 +19,14 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_2d_dgsem")
       tspan = (0.0, 0.001))
   end
 
+  @trixi_testset "elixir_eulermulti_shock_bubble_shockcapturing_subcell_positivity.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_eulermulti_shock_bubble_shockcapturing_subcell_positivity.jl"),
+      l2   = [81.52845664909304, 2.5455678559421346, 63229.190712645846, 0.19929478404550321, 0.011068604228443425],
+      linf = [249.21708417382013, 40.33299887640794, 174205.0118831558, 0.6881458768113586, 0.11274401158173972],
+      initial_refinement_level = 3,
+      tspan = (0.0, 0.001))
+  end
+
   @trixi_testset "elixir_eulermulti_ec.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_eulermulti_ec.jl"),
       l2   = [0.050182236154087095, 0.050189894464434635, 0.2258715597305131, 0.06175171559771687],
diff --git a/test/test_unit.jl b/test/test_unit.jl
index e70a9be6a4a..5c5291c2430 100644
--- a/test/test_unit.jl
+++ b/test/test_unit.jl
@@ -402,6 +402,9 @@ isdir(outdir) && rm(outdir, recursive=true)
     indicator_hg = IndicatorHennemannGassner(1.0, 0.0, true, "variable", "cache")
     @test_nowarn show(stdout, indicator_hg)
 
+    limiter_idp = SubcellLimiterIDP(true, [1], 0.1, "cache")
+    @test_nowarn show(stdout, limiter_idp)
+
     # TODO: TrixiShallowWater: move unit test
     indicator_hg_swe = IndicatorHennemannGassnerShallowWater(1.0, 0.0, true, "variable", "cache")
     @test_nowarn show(stdout, indicator_hg_swe)
@@ -637,7 +640,7 @@ isdir(outdir) && rm(outdir, recursive=true)
 
     for normal_direction in normal_directions
       @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
-    end                         
+    end
   end
 
   @timed_testset "Consistency check for HLL flux (naive): SWE" begin
@@ -674,7 +677,7 @@ isdir(outdir) && rm(outdir, recursive=true)
                           SVector(0.0, 1.0),
                           SVector(0.5, -0.5),
                           SVector(-1.2, 0.3)]
-    orientations = [1, 2]                      
+    orientations = [1, 2]
 
     u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1, 0.0),
                 SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2, 0.2),]
@@ -704,7 +707,7 @@ isdir(outdir) && rm(outdir, recursive=true)
 
     for u in u_values, normal_direction in normal_directions
       @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
-    end                  
+    end
   end
 
   @timed_testset "Consistency check for HLL flux with Davis wave speed estimates: CEE" begin
@@ -718,7 +721,7 @@ isdir(outdir) && rm(outdir, recursive=true)
     for orientation in orientations
       @test flux_hll(u, u, orientation, equations) ≈ flux(u, orientation, equations)
     end
-  
+
     equations = CompressibleEulerEquations2D(1.4)
     u = SVector(1.1, -0.5, 2.34, 5.5)
 
@@ -734,7 +737,7 @@ isdir(outdir) && rm(outdir, recursive=true)
 
     for normal_direction in normal_directions
       @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
-    end    
+    end
 
     equations = CompressibleEulerEquations3D(1.4)
     u = SVector(1.1, -0.5, 2.34, 2.4, 5.5)
@@ -752,7 +755,7 @@ isdir(outdir) && rm(outdir, recursive=true)
 
     for normal_direction in normal_directions
       @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
-    end                        
+    end
   end
 
   @timed_testset "Consistency check for HLL flux with Davis wave speed estimates: LEE" begin
@@ -815,7 +818,7 @@ isdir(outdir) && rm(outdir, recursive=true)
                           SVector(0.0, 1.0),
                           SVector(0.5, -0.5),
                           SVector(-1.2, 0.3)]
-    orientations = [1, 2]                      
+    orientations = [1, 2]
 
     u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1, 0.0),
                 SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2, 0.2),]
@@ -845,7 +848,7 @@ isdir(outdir) && rm(outdir, recursive=true)
 
     for u in u_values, normal_direction in normal_directions
       @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
-    end                  
+    end
   end
 
   @timed_testset "Consistency check for HLLE flux: CEE" begin
@@ -873,7 +876,7 @@ isdir(outdir) && rm(outdir, recursive=true)
 
     for normal_direction in normal_directions
       @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
-    end                          
+    end
 
     equations = CompressibleEulerEquations3D(1.4)
     u = SVector(1.1, -0.5, 2.34, 2.4, 5.5)
@@ -891,7 +894,7 @@ isdir(outdir) && rm(outdir, recursive=true)
 
     for normal_direction in normal_directions
       @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
-    end                            
+    end
   end
 
   @timed_testset "Consistency check for HLLE flux: SWE" begin
@@ -907,7 +910,7 @@ isdir(outdir) && rm(outdir, recursive=true)
                          SVector(0.0, 1.0),
                          SVector(0.5, -0.5),
                          SVector(-1.2, 0.3)]
-    orientations = [1, 2]                           
+    orientations = [1, 2]
 
     u = SVector(1, 0.5, 0.5, 0.0)
 
@@ -937,7 +940,7 @@ isdir(outdir) && rm(outdir, recursive=true)
                           SVector(0.0, 1.0),
                           SVector(0.5, -0.5),
                           SVector(-1.2, 0.3)]
-    orientations = [1, 2]  
+    orientations = [1, 2]
 
     u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1, 0.0),
                 SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2, 0.2),]
@@ -956,7 +959,7 @@ isdir(outdir) && rm(outdir, recursive=true)
                         SVector(0.0, 0.0, 1.0),
                         SVector(0.5, -0.5, 0.2),
                         SVector(-1.2, 0.3, 1.4)]
-    orientations = [1, 2, 3]                        
+    orientations = [1, 2, 3]
 
     u_values = [SVector(1.0, 0.4, -0.5, 0.1, 1.0, 0.1, -0.2, 0.1, 0.0),
                 SVector(1.5, -0.2, 0.1, 0.2, 5.0, -0.1, 0.1, 0.2, 0.2),]
@@ -967,7 +970,7 @@ isdir(outdir) && rm(outdir, recursive=true)
 
     for u in u_values, normal_direction in normal_directions
       @test flux_hll(u, u, normal_direction, equations) ≈ flux(u, normal_direction, equations)
-    end                  
+    end
   end
 
   @timed_testset "Consistency check for Godunov flux" begin
@@ -1137,7 +1140,7 @@ isdir(outdir) && rm(outdir, recursive=true)
                            SVector(-1.2, 0.3)]
       u_values = [SVector(1.0, 0.5, -0.7, 1.0),
                   SVector(1.5, -0.2, 0.1, 5.0),]
-      fluxes = [flux_central, flux_ranocha, flux_shima_etal, flux_kennedy_gruber, 
+      fluxes = [flux_central, flux_ranocha, flux_shima_etal, flux_kennedy_gruber,
                 flux_hll, FluxHLL(min_max_speed_davis)]
 
       for f_std in fluxes
@@ -1157,7 +1160,7 @@ isdir(outdir) && rm(outdir, recursive=true)
                           SVector(-1.2, 0.3, 1.4)]
       u_values = [SVector(1.0, 0.5, -0.7, 0.1, 1.0),
                   SVector(1.5, -0.2, 0.1, 0.2, 5.0),]
-      fluxes = [flux_central, flux_ranocha, flux_shima_etal, flux_kennedy_gruber, FluxLMARS(340), 
+      fluxes = [flux_central, flux_ranocha, flux_shima_etal, flux_kennedy_gruber, FluxLMARS(340),
                 flux_hll, FluxHLL(min_max_speed_davis)]
 
       for f_std in fluxes
@@ -1173,11 +1176,11 @@ isdir(outdir) && rm(outdir, recursive=true)
       normal_directions = [SVector(1.0, 0.0),
                           SVector(0.0, 1.0),
                           SVector(0.5, -0.5),
-                          SVector(-1.2, 0.3)]                       
+                          SVector(-1.2, 0.3)]
 
       u = SVector(1, 0.5, 0.5, 0.0)
 
-      fluxes = [flux_central, flux_fjordholm_etal, flux_wintermeyer_etal, 
+      fluxes = [flux_central, flux_fjordholm_etal, flux_wintermeyer_etal,
                 flux_hll, FluxHLL(min_max_speed_davis), FluxHLL(min_max_speed_einfeldt)]
 
     end

From 925c0474938a2b6e095a795a4c155b40965625e0 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Tue, 22 Aug 2023 15:58:08 +0200
Subject: [PATCH 122/163] set version to v0.5.39

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index dd937ed213b..6a11655d345 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.39-pre"
+version = "0.5.39"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From add3a7f9e9c4b7445ee669e43afd8b84350e04d4 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Tue, 22 Aug 2023 15:58:20 +0200
Subject: [PATCH 123/163] set development version to v0.5.40-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 6a11655d345..4374eaa3b0a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.39"
+version = "0.5.40-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From bfefef15d3ccd71f43a5518560d0cd96b599881b Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Tue, 22 Aug 2023 17:21:45 +0200
Subject: [PATCH 124/163] Update visualization.md (#1612)

Compare

[examples/tree\_2d\_dgsem/elixir\_advection\_amr\_visualization.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_amr_visualization.jl):

to (existing)

[examples/tree_2d_dgsem/elixir\_advection\_amr\_visualization.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_amr_visualization.jl):
---
 docs/src/visualization.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/src/visualization.md b/docs/src/visualization.md
index 8f72bb4b1c6..4e4b780004d 100644
--- a/docs/src/visualization.md
+++ b/docs/src/visualization.md
@@ -375,7 +375,7 @@ During the simulation, the visualization callback creates and displays
 visualizations of the current solution in regular intervals. This can be useful
 to, e.g., monitor the validity of a long-running simulation or for illustrative
 purposes. An example for how to create a `VisualizationCallback` can be found in
-[examples/tree_2d_dgsem/elixir\_advection\_amr\_visualization.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_amr_visualization.jl):
+[examples/tree\_2d\_dgsem/elixir\_advection\_amr\_visualization.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_amr_visualization.jl):
 ```julia
 [...]
 

From e22b11e5859b5168e24dabb4cb34a6a35fce3e66 Mon Sep 17 00:00:00 2001
From: Michael Schlottke-Lakemper <michael@sloede.com>
Date: Tue, 22 Aug 2023 19:30:35 +0200
Subject: [PATCH 125/163] Add review checklist to new PRs (#1609)

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 .github/review-checklist.md           | 38 +++++++++++++++++++++++++++
 .github/workflows/ReviewChecklist.yml | 20 ++++++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 .github/review-checklist.md
 create mode 100644 .github/workflows/ReviewChecklist.yml

diff --git a/.github/review-checklist.md b/.github/review-checklist.md
new file mode 100644
index 00000000000..2d8a24f1971
--- /dev/null
+++ b/.github/review-checklist.md
@@ -0,0 +1,38 @@
+### Review checklist
+
+This checklist is meant to assist creators of PRs (to let them know what reviewers will typically look for) and reviewers (to guide them in a structured review process). Items do not need to be checked explicitly for a PR to be eligible for merging.
+
+#### Purpose and scope
+- [ ] The PR has a single goal that is clear from the PR title and/or description.
+- [ ] All code changes represent a single set of modifications that logically belong together.
+- [ ] No more than 500 lines of code are changed or there is no obvious way to split the PR into multiple PRs.
+
+#### Code quality
+- [ ] The code can be understood easily.
+- [ ] Newly introduced names for variables etc. are self-descriptive and consistent with existing naming conventions.
+- [ ] There are no redundancies that can be removed by simple modularization/refactoring.
+- [ ] There are no leftover debug statements or commented code sections.
+- [ ] The code adheres to our [conventions](https://trixi-framework.github.io/Trixi.jl/stable/conventions/) and [style guide](https://trixi-framework.github.io/Trixi.jl/stable/styleguide/), and to the [Julia guidelines](https://docs.julialang.org/en/v1/manual/style-guide/).
+
+#### Documentation
+- [ ] New functions and types are documented with a docstring or top-level comment.
+- [ ] Relevant publications are referenced in docstrings (see [example](https://github.com/trixi-framework/Trixi.jl/blob/7f83a1a938eecd9b841efe215a6e482e67cfdcc1/src/equations/compressible_euler_2d.jl#L601-L615) for formatting).
+- [ ] Inline comments are used to document longer or unusual code sections.
+- [ ] Comments describe intent ("why?") and not just functionality ("what?").
+- [ ] If the PR introduces a significant change or new feature, it is documented in `NEWS.md`.
+
+#### Testing
+- [ ] The PR passes all tests.
+- [ ] New or modified lines of code are covered by tests.
+- [ ] New or modified tests run in less then 10 seconds.
+
+#### Performance
+- [ ] There are no type instabilities or memory allocations in performance-critical parts.
+- [ ] If the PR intent is to improve performance, before/after [time measurements](https://trixi-framework.github.io/Trixi.jl/stable/performance/#Manual-benchmarking) are posted in the PR.
+
+#### Verification
+- [ ] The correctness of the code was verified using appropriate tests.
+- [ ] If new equations/methods are added, a convergence test has been run and the results
+  are posted in the PR.
+
+*Created with :heart: by the Trixi.jl community.*
\ No newline at end of file
diff --git a/.github/workflows/ReviewChecklist.yml b/.github/workflows/ReviewChecklist.yml
new file mode 100644
index 00000000000..959a04752d7
--- /dev/null
+++ b/.github/workflows/ReviewChecklist.yml
@@ -0,0 +1,20 @@
+name: Add review checklist
+
+on:
+  pull_request_target:
+    types: [opened]
+
+permissions:
+  pull-requests: write
+
+jobs:
+  add-review-checklist:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v3
+      - name: Add review checklist
+        uses: trixi-framework/add-pr-review-checklist@v1
+        with:
+          file: '.github/review-checklist.md'
+          no-checklist-keyword: '[no checklist]'
\ No newline at end of file

From e82ebb557a280abb87e7b6f3db9d90bcac715321 Mon Sep 17 00:00:00 2001
From: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
Date: Wed, 30 Aug 2023 20:16:45 +0200
Subject: [PATCH 126/163] unify how we refer to links to elixirs in docs
 (#1620)

---
 .../src/files/differentiable_programming.jl   |  2 +-
 docs/literate/src/files/index.jl              |  2 +-
 docs/src/callbacks.md                         | 22 +++++++++----------
 docs/src/meshes/dgmulti_mesh.md               | 14 +++++++-----
 docs/src/overview.md                          |  4 ++--
 docs/src/restart.md                           | 10 ++++-----
 docs/src/visualization.md                     |  2 +-
 7 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/docs/literate/src/files/differentiable_programming.jl b/docs/literate/src/files/differentiable_programming.jl
index ecc09d05dcf..5c5a7cd7440 100644
--- a/docs/literate/src/files/differentiable_programming.jl
+++ b/docs/literate/src/files/differentiable_programming.jl
@@ -128,7 +128,7 @@ condition_number = cond(V)
 # you can compute the gradient of an entropy-dissipative semidiscretization with respect to the
 # ideal gas constant of the compressible Euler equations as described in the following. This example
 # is also available as the elixir
-# [examples/special\_elixirs/elixir\_euler\_ad.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/special_elixirs/elixir_euler_ad.jl)
+# [`examples/special_elixirs/elixir_euler_ad.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/special_elixirs/elixir_euler_ad.jl)
 
 # First, we create a semidiscretization of the compressible Euler equations.
 
diff --git a/docs/literate/src/files/index.jl b/docs/literate/src/files/index.jl
index 5b669881502..0c8de66bf42 100644
--- a/docs/literate/src/files/index.jl
+++ b/docs/literate/src/files/index.jl
@@ -116,7 +116,7 @@
 
 # ## Examples in Trixi.jl
 # Trixi.jl already contains several more coding examples, the so-called `elixirs`. You can find them
-# in the folder [`examples`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/).
+# in the folder [`examples/`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/).
 # They are structured by the underlying mesh type and the respective number of spatial dimensions.
 # The name of an elixir is composed of the underlying system of conservation equations (for instance
 # `advection` or `euler`) and other special characteristics like the initial condition
diff --git a/docs/src/callbacks.md b/docs/src/callbacks.md
index a85f8e8191b..1d3e5e34b51 100644
--- a/docs/src/callbacks.md
+++ b/docs/src/callbacks.md
@@ -15,7 +15,7 @@ control, adaptive mesh refinement, I/O, and more.
 
 ### CFL-based time step control
 Time step control can be performed with a [`StepsizeCallback`](@ref). An example making use
-of this can be found at [examples/tree_2d_dgsem/elixir\_advection\_basic.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_basic.jl)
+of this can be found at [`examples/tree_2d_dgsem/elixir_advection_basic.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_basic.jl)
 
 ### Adaptive mesh refinement
 Trixi.jl uses a hierarchical Cartesian mesh which can be locally refined in a solution-adaptive way.
@@ -24,12 +24,12 @@ passing an [`AMRCallback`](@ref) to the ODE solver. The `AMRCallback` requires a
 [`ControllerThreeLevel`](@ref) or [`ControllerThreeLevelCombined`](@ref) to tell the AMR
 algorithm which cells to refine/coarsen.
 
-An example elixir using AMR can be found at [examples/tree_2d_dgsem/elixir\_advection\_amr.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_amr.jl).
+An example elixir using AMR can be found at [`examples/tree_2d_dgsem/elixir_advection_amr.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_amr.jl).
 
 ### Analyzing the numerical solution
 The [`AnalysisCallback`](@ref) can be used to analyze the numerical solution, e.g. calculate
 errors or user-specified integrals, and print the results to the screen. The results can also be
-saved in a file. An example can be found at [examples/tree_2d_dgsem/elixir\_euler\_vortex.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_euler_vortex.jl).
+saved in a file. An example can be found at [`examples/tree_2d_dgsem/elixir_euler_vortex.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_euler_vortex.jl).
 In [Performance metrics of the `AnalysisCallback`](@ref) you can find a detailed
 description of the different performance metrics the `AnalysisCallback` computes.
 
@@ -38,15 +38,15 @@ description of the different performance metrics the `AnalysisCallback` computes
 #### Solution and restart files
 To save the solution in regular intervals you can use a [`SaveSolutionCallback`](@ref). It is also
 possible to create restart files using the [`SaveRestartCallback`](@ref). An example making use
-of these can be found at [examples/tree_2d_dgsem/elixir\_advection\_extended.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_extended.jl).
+of these can be found at [`examples/tree_2d_dgsem/elixir_advection_extended.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_extended.jl).
 An example showing how to restart a simulation from a restart file can be found at
-[examples/tree_2d_dgsem/elixir\_advection\_restart.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_restart.jl).
+[`examples/tree_2d_dgsem/elixir_advection_restart.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_restart.jl).
 
 #### Time series
 Sometimes it is useful to record the evaluations of state variables over time at
 a given set of points. This can be achieved by the [`TimeSeriesCallback`](@ref), which is used,
 e.g., in
-[examples/tree_2d_dgsem/elixir\_acoustics\_gaussian\_source.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_acoustics_gaussian_source.jl).
+[`examples/tree_2d_dgsem/elixir_acoustics_gaussian_source.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_acoustics_gaussian_source.jl).
 The `TimeSeriesCallback` constructor expects a semidiscretization and a list of points at
 which the solution should be recorded in regular time step intervals. After the
 last time step, the entire record is stored in an HDF5 file.
@@ -113,12 +113,12 @@ will yield the following plot:
 Some callbacks provided by Trixi.jl implement specific features for certain equations:
 * The [`LBMCollisionCallback`](@ref) implements the Lattice-Boltzmann method (LBM) collision
   operator and should only be used when solving the Lattice-Boltzmann equations. See e.g.
-  [examples/tree_2d_dgsem/elixir\_lbm\_constant.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_lbm_constant.jl)
+  [`examples/tree_2d_dgsem/elixir_lbm_constant.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_lbm_constant.jl)
 * The [`SteadyStateCallback`](@ref) terminates the time integration when the residual steady state
   falls below a certain threshold. This checks the convergence of the potential ``\phi`` for
-  hyperbolic diffusion. See e.g. [examples/tree_2d_dgsem/elixir\_hypdiff\_nonperiodic.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_hypdiff_nonperiodic.jl).
+  hyperbolic diffusion. See e.g. [`examples/tree_2d_dgsem/elixir_hypdiff_nonperiodic.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_hypdiff_nonperiodic.jl).
 * The [`GlmSpeedCallback`](@ref) updates the divergence cleaning wave speed `c_h` for the ideal
-  GLM-MHD equations. See e.g. [examples/tree_2d_dgsem/elixir\_mhd\_alfven\_wave.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_mhd_alfven_wave.jl).
+  GLM-MHD equations. See e.g. [`examples/tree_2d_dgsem/elixir_mhd_alfven_wave.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_mhd_alfven_wave.jl).
 
 ## Usage of step callbacks
 Step callbacks are passed to the `solve` method from the ODE solver via the keyword argument
@@ -152,7 +152,7 @@ more callbacks, you need to turn them into a `CallbackSet` first by calling
 ## Stage callbacks
 [`PositivityPreservingLimiterZhangShu`](@ref) is a positivity-preserving limiter, used to enforce
 physical constraints. An example elixir using this feature can be found at
-[examples/tree_2d_dgsem/elixir\_euler\_positivity.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_euler_positivity.jl).
+[`examples/tree_2d_dgsem/elixir_euler_positivity.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_euler_positivity.jl).
 
 ## Implementing new callbacks
 Since Trixi.jl is compatible with [OrdinaryDiffEq.jl](https://github.com/SciML/OrdinaryDiffEq.jl),
@@ -162,4 +162,4 @@ Step callbacks are just called [callbacks](https://diffeq.sciml.ai/latest/featur
 Stage callbacks are called [`stage_limiter!`](https://diffeq.sciml.ai/latest/solvers/ode_solve/#Explicit-Strong-Stability-Preserving-Runge-Kutta-Methods-for-Hyperbolic-PDEs-(Conservation-Laws)).
 
 An example elixir showing how to implement a new simple stage callback and a new simple step
-callback can be found at [examples/tree_2d_dgsem/elixir\_advection\_callbacks.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_callbacks.jl).
+callback can be found at [`examples/tree_2d_dgsem/elixir_advection_callbacks.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_callbacks.jl).
diff --git a/docs/src/meshes/dgmulti_mesh.md b/docs/src/meshes/dgmulti_mesh.md
index e07ba70a80a..fc086bba146 100644
--- a/docs/src/meshes/dgmulti_mesh.md
+++ b/docs/src/meshes/dgmulti_mesh.md
@@ -81,16 +81,20 @@ type, but will be more efficient at high orders of approximation.
 ## Trixi.jl elixirs on simplicial and tensor product element meshes
 
 Example elixirs with triangular, quadrilateral, and tetrahedral meshes can be found in
-the `examples/dgmulti_2d` and `examples/dgmulti_3d` folders. Some key elixirs to look at:
+the [`examples/dgmulti_2d/`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/dgmulti_2d/)
+and [`examples/dgmulti_3d/`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/dgmulti_3d/)
+folders. Some key elixirs to look at:
 
-* `examples/dgmulti_2d/elixir_euler_weakform.jl`: basic weak form DG discretization on a uniform triangular mesh.
+* [`examples/dgmulti_2d/elixir_euler_weakform.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/dgmulti_2d/elixir_euler_weakform.jl):
+  basic weak form DG discretization on a uniform triangular mesh.
   Changing `element_type = Quad()` or `approximation_type = SBP()` will switch to a quadrilateral mesh
   or an SBP-type discretization. Changing `surface_integral = SurfaceIntegralWeakForm(flux_ec)` and
   `volume_integral = VolumeIntegralFluxDifferencing(flux_ec)` for some entropy conservative flux
   (e.g., [`flux_chandrashekar`](@ref) or [`flux_ranocha`](@ref)) will switch to an entropy conservative formulation.
-* `examples/dgmulti_2d/elixir_euler_triangulate_pkg_mesh.jl`: uses an unstructured mesh generated by
-  [Triangulate.jl](https://github.com/JuliaGeometry/Triangulate.jl).
-* `examples/dgmulti_3d/elixir_euler_weakform.jl`: basic weak form DG discretization on a uniform tetrahedral mesh.
+* [`examples/dgmulti_2d/elixir_euler_triangulate_pkg_mesh.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/dgmulti_2d/elixir_euler_triangulate_pkg_mesh.jl):
+  uses an unstructured mesh generated by [Triangulate.jl](https://github.com/JuliaGeometry/Triangulate.jl).
+* [`examples/dgmulti_3d/elixir_euler_weakform.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/dgmulti_3d/elixir_euler_weakform.jl):
+  ´basic weak form DG discretization on a uniform tetrahedral mesh.
   Changing `element_type = Hex()` will switch to a hexahedral mesh. Changing
   `surface_integral = SurfaceIntegralWeakForm(flux_ec)` and
   `volume_integral = VolumeIntegralFluxDifferencing(flux_ec)` for some entropy conservative flux
diff --git a/docs/src/overview.md b/docs/src/overview.md
index 519ec2ca424..46bc28b6025 100644
--- a/docs/src/overview.md
+++ b/docs/src/overview.md
@@ -5,7 +5,7 @@ conservation laws. Thus, it is not a monolithic PDE solver that is configured at
 via parameter files, as it is often found in classical numerical simulation codes.
 Instead, each simulation is configured by pure Julia code. Many examples of such
 simulation setups, called *elixirs* in Trixi.jl, are provided in the
-[examples](https://github.com/trixi-framework/Trixi.jl/blob/main/examples)
+[`examples/`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples)
 folder.
 
 Trixi.jl uses the method of lines, i.e., the full space-time discretization is separated into two steps;
@@ -77,7 +77,7 @@ Further information can be found in the
 ## Next steps
 
 We explicitly encourage people interested in Trixi.jl to have a look at the
-[examples](https://github.com/trixi-framework/Trixi.jl/blob/main/examples)
+[`examples/`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples)
 bundled with Trixi.jl to get an impression of what is possible and the general
 look and feel of Trixi.jl.
 Before doing that, it is usually good to get an idea of
diff --git a/docs/src/restart.md b/docs/src/restart.md
index d24d93cb297..767269ff27d 100644
--- a/docs/src/restart.md
+++ b/docs/src/restart.md
@@ -18,7 +18,7 @@ save_restart = SaveRestartCallback(interval=100,
 Make this part of your `CallbackSet`.
 
 An example is
-[```examples/examples/structured_2d_dgsem/elixir_advection_extended.jl```](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/structured_2d_dgsem/elixir_advection_extended.jl).
+[`examples/examples/structured_2d_dgsem/elixir_advection_extended.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/structured_2d_dgsem/elixir_advection_extended.jl).
 
 
 ## [Perform the simulation restart](@id restart_perform)
@@ -26,7 +26,7 @@ Since all of the information about the simulation can be obtained from the
 last snapshot, the restart can be done with relatively few lines
 in an extra elixir file.
 However, some might prefer to keep everything in one elixir and
-conditionals like ```if restart``` with a boolean variable ```restart``` that is user defined.
+conditionals like `if restart` with a boolean variable `restart` that is user defined.
 
 First we need to define from which file we want to restart, e.g.
 ```julia
@@ -50,7 +50,7 @@ time the one form the snapshot:
 tspan = (load_time(restart_filename), 2.0)
 ```
 
-We now also take the last ```dt```, so that our solver does not need to first find
+We now also take the last `dt`, so that our solver does not need to first find
 one to fulfill the CFL condition:
 ```julia
 dt = load_dt(restart_filename)
@@ -63,7 +63,7 @@ ode = semidiscretize(semi, tspan, restart_filename)
 
 You should now define a [`SaveSolutionCallback`](@ref) similar to the
 [original simulation](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/structured_2d_dgsem/elixir_advection_extended.jl),
-but with ```save_initial_solution=false```, otherwise our initial snapshot will be overwritten.
+but with `save_initial_solution=false`, otherwise our initial snapshot will be overwritten.
 If you are using one file for the original simulation and the restart
 you can reuse your [`SaveSolutionCallback`](@ref), but need to set
 ```julia
@@ -86,4 +86,4 @@ Now we can compute the solution:
 sol = solve!(integrator)
 ```
 
-An example is in `[``examples/structured_2d_dgsem/elixir_advection_restart.jl```](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/structured_2d_dgsem/elixir_advection_restart.jl).
+An example is in [`examples/structured_2d_dgsem/elixir_advection_restart.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/structured_2d_dgsem/elixir_advection_restart.jl).
diff --git a/docs/src/visualization.md b/docs/src/visualization.md
index 4e4b780004d..36a7e8f5ac8 100644
--- a/docs/src/visualization.md
+++ b/docs/src/visualization.md
@@ -375,7 +375,7 @@ During the simulation, the visualization callback creates and displays
 visualizations of the current solution in regular intervals. This can be useful
 to, e.g., monitor the validity of a long-running simulation or for illustrative
 purposes. An example for how to create a `VisualizationCallback` can be found in
-[examples/tree\_2d\_dgsem/elixir\_advection\_amr\_visualization.jl](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_amr_visualization.jl):
+[`examples/tree_2d_dgsem/elixir_advection_amr_visualization.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_amr_visualization.jl):
 ```julia
 [...]
 

From af70d89eb35b30561833a20a6d6d3bb6e9567264 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Fri, 1 Sep 2023 15:35:30 +0200
Subject: [PATCH 127/163] update affiliation of HR (#1621)

---
 .zenodo.json      | 2 +-
 AUTHORS.md        | 2 +-
 README.md         | 2 +-
 docs/src/index.md | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.zenodo.json b/.zenodo.json
index 95879af1e90..905c0170ab9 100644
--- a/.zenodo.json
+++ b/.zenodo.json
@@ -15,7 +15,7 @@
             "orcid": "0000-0002-1752-1158"
         },
         {
-            "affiliation": "Applied Mathematics, University of Hamburg, Germany",
+            "affiliation": "Numerical Mathematics, Johannes Gutenberg University Mainz, Germany",
             "name": "Ranocha, Hendrik",
             "orcid": "0000-0002-3456-2277"
         },
diff --git a/AUTHORS.md b/AUTHORS.md
index 74bfaa9c852..f1debf8ba76 100644
--- a/AUTHORS.md
+++ b/AUTHORS.md
@@ -12,7 +12,7 @@ provided substantial additions or modifications. Together, these two groups form
 * [Gregor Gassner](https://www.mi.uni-koeln.de/NumSim/gregor-gassner),
   University of Cologne, Germany
 * [Hendrik Ranocha](https://ranocha.de),
-  University of Hamburg, Germany
+  Johannes Gutenberg University Mainz, Germany
 * [Andrew Winters](https://liu.se/en/employee/andwi94),
   Linköping University, Sweden
 * [Jesse Chan](https://jlchan.github.io),
diff --git a/README.md b/README.md
index 7eaee8750dd..63540b1f640 100644
--- a/README.md
+++ b/README.md
@@ -247,7 +247,7 @@ Schlottke-Lakemper](https://lakemper.eu)
 (RWTH Aachen University/High-Performance Computing Center Stuttgart (HLRS), Germany) and
 [Gregor Gassner](https://www.mi.uni-koeln.de/NumSim/gregor-gassner)
 (University of Cologne, Germany). Together with [Hendrik Ranocha](https://ranocha.de)
-(University of Hamburg, Germany), [Andrew Winters](https://liu.se/en/employee/andwi94)
+(Johannes Gutenberg University Mainz, Germany), [Andrew Winters](https://liu.se/en/employee/andwi94)
 (Linköping University, Sweden), and [Jesse Chan](https://jlchan.github.io) (Rice University, US),
 they are the principal developers of Trixi.jl.
 The full list of contributors can be found in [AUTHORS.md](AUTHORS.md).
diff --git a/docs/src/index.md b/docs/src/index.md
index 3af785bc681..bb2afd1019f 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -324,7 +324,7 @@ Schlottke-Lakemper](https://lakemper.eu)
 (RWTH Aachen University/High-Performance Computing Center Stuttgart (HLRS), Germany) and
 [Gregor Gassner](https://www.mi.uni-koeln.de/NumSim/gregor-gassner)
 (University of Cologne, Germany). Together with [Hendrik Ranocha](https://ranocha.de)
-(University of Hamburg, Germany) and [Andrew Winters](https://liu.se/en/employee/andwi94)
+(Johannes Gutenberg University Mainz, Germany) and [Andrew Winters](https://liu.se/en/employee/andwi94)
 (Linköping University, Sweden), and [Jesse Chan](https://jlchan.github.io) (Rice University, US),
 they are the principal developers of Trixi.jl.
 The full list of contributors can be found under [Authors](@ref).

From 6403a480825dcebdd10cb90584c5cc877b4b2c5d Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 1 Sep 2023 19:13:47 +0200
Subject: [PATCH 128/163] Bump crate-ci/typos from 1.16.5 to 1.16.9 (#1622)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.16.5 to 1.16.9.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.16.5...v1.16.9)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/SpellCheck.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index 6ebb288ea30..a06121e7ca1 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v3
       - name: Check spelling
-        uses: crate-ci/typos@v1.16.5
+        uses: crate-ci/typos@v1.16.9

From bd5ba865478a889c48a7675072d921906c27c0c4 Mon Sep 17 00:00:00 2001
From: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
Date: Wed, 6 Sep 2023 09:15:38 +0200
Subject: [PATCH 129/163] Update docs on how to use a system-provided MPI
 installation with T8code.jl (#1613)

* update docs on how to use a system-provided MPI installation with T8code.jl

* reduce number of characters per line

* adjust path of shared object files

* fix typo
---
 docs/src/parallelization.md | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/docs/src/parallelization.md b/docs/src/parallelization.md
index 08470fd064a..245fdc11852 100644
--- a/docs/src/parallelization.md
+++ b/docs/src/parallelization.md
@@ -53,16 +53,24 @@ a system-provided MPI installation with Trixi.jl can be found in the following s
 
 ### [Using a system-provided MPI installation](@id parallel_system_MPI)
 
-When using Trixi.jl with a system-provided MPI backend the underlying [`p4est`](https://github.com/cburstedde/p4est)
-library needs to be compiled with the same MPI installation. Therefore, you also need to use
-a system-provided `p4est` installation (for notes on how to install `p4est` see e.g.
-[here](https://github.com/cburstedde/p4est/blob/master/README), use the configure option
-`--enable-mpi`). In addition, [P4est.jl](https://github.com/trixi-framework/P4est.jl) needs to
-be configured to use the custom `p4est` installation. Follow the steps described
-[here](https://github.com/trixi-framework/P4est.jl/blob/main/README.md) for the configuration.
+When using Trixi.jl with a system-provided MPI backend the underlying
+[`p4est`](https://github.com/cburstedde/p4est) and [`t8code`](https://github.com/DLR-AMR/t8code)
+libraries need to be compiled with the same MPI installation. Therefore, you also need to
+use system-provided `p4est` and `t8code` installations (for notes on how to install `p4est`
+and `t8code` see e.g. [here](https://github.com/cburstedde/p4est/blob/master/README) and
+[here](https://github.com/DLR-AMR/t8code/wiki/Installation), use the configure option
+`--enable-mpi`). Note that `t8code` already comes with a `p4est` installation, so it suffices
+to install `t8code`. In addition, [P4est.jl](https://github.com/trixi-framework/P4est.jl) and
+[T8code.jl](https://github.com/DLR-AMR/T8code.jl) need to be configured to use the custom
+installations. Follow the steps described
+[here](https://github.com/DLR-AMR/T8code.jl/blob/main/README.md#installation) and
+[here](https://github.com/trixi-framework/P4est.jl/blob/main/README.md#installation) for the
+configuration. The paths that point to `libp4est.so` (and potentially to `libsc.so`) need to be
+the same for P4est.jl and T8code.jl. This could e.g. be `libp4est.so` that usually can be found
+in `lib/` or `local/lib/` in the installation directory of `t8code`.
 In total, in your active Julia project you should have a LocalPreferences.toml file with sections
-`[MPIPreferences]` and `[P4est]` as well as an entry `MPIPreferences` in your Project.toml to
-use a custom MPI installation.
+`[MPIPreferences]`, `[T8code]` and `[P4est]` as well as an entry `MPIPreferences` in your
+Project.toml to use a custom MPI installation.
 
 
 ### [Usage](@id parallel_usage)

From 76b4c5fc842e47c4bd9f33c044ae99bd2dfbf789 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Wed, 6 Sep 2023 09:16:48 +0200
Subject: [PATCH 130/163] set version to v0.5.40

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 4374eaa3b0a..0d27c0fd6e8 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.40-pre"
+version = "0.5.40"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From f098ea20f545007d741745c160c7d1e1919733c4 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Wed, 6 Sep 2023 09:17:13 +0200
Subject: [PATCH 131/163] set development version to v0.5.41-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 0d27c0fd6e8..e14dbcd0c03 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.40"
+version = "0.5.41-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 81d2b70965f361b0bfd9b113ebe4fb360b1437cb Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Wed, 6 Sep 2023 15:33:48 +0200
Subject: [PATCH 132/163] workaround for allocations when broadcasting
 equations (#1626)

---
 src/equations/equations.jl | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/equations/equations.jl b/src/equations/equations.jl
index 90b2cd62191..570a25cece9 100644
--- a/src/equations/equations.jl
+++ b/src/equations/equations.jl
@@ -75,8 +75,14 @@ end
 
 @inline Base.ndims(::AbstractEquations{NDIMS}) where {NDIMS} = NDIMS
 
-# equations act like scalars in broadcasting
-Base.broadcastable(equations::AbstractEquations) = Ref(equations)
+# Equations act like scalars in broadcasting.
+# Using `Ref(equations)` would be more convenient in some circumstances.
+# However, this does not work with Julia v1.9.3 correctly due to a (performance)
+# bug in Julia, see
+# - https://github.com/trixi-framework/Trixi.jl/pull/1618
+# - https://github.com/JuliaLang/julia/issues/51118
+# Thus, we use the workaround below.
+Base.broadcastable(equations::AbstractEquations) = (equations,)
 
 """
     flux(u, orientation_or_normal, equations)

From 4e6d1638a279130e0f5008ff75acadb8307b0a6d Mon Sep 17 00:00:00 2001
From: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
Date: Wed, 6 Sep 2023 17:00:08 +0200
Subject: [PATCH 133/163] increase absolute tolerance (#1625)

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 test/test_tree_1d_shallowwater.jl | 3 ++-
 test/test_trixi.jl                | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/test_tree_1d_shallowwater.jl b/test/test_tree_1d_shallowwater.jl
index cafa17edd4c..1e5aeac1786 100644
--- a/test/test_tree_1d_shallowwater.jl
+++ b/test/test_tree_1d_shallowwater.jl
@@ -102,7 +102,8 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_beach.jl"),
       l2   = [0.17979210479598923, 1.2377495706611434, 6.289818963361573e-8],
       linf = [0.845938394800688, 3.3740800777086575, 4.4541473087633676e-7],
-      tspan = (0.0, 0.05))
+      tspan = (0.0, 0.05),
+      atol = 3e-10) # see https://github.com/trixi-framework/Trixi.jl/issues/1617
   end
 
   @trixi_testset "elixir_shallowwater_parabolic_bowl.jl" begin
diff --git a/test/test_trixi.jl b/test/test_trixi.jl
index ddace6b4fbe..f2cd0cab94d 100644
--- a/test/test_trixi.jl
+++ b/test/test_trixi.jl
@@ -5,7 +5,7 @@ import Trixi
 # inside an elixir.
 """
     @test_trixi_include(elixir; l2=nothing, linf=nothing,
-                                atol=10*eps(), rtol=0.001,
+                                atol=500*eps(), rtol=sqrt(eps()),
                                 parameters...)
 
 Test Trixi by calling `trixi_include(elixir; parameters...)`.

From a7867e7376541d1326f2796661f5ca35bc9fe499 Mon Sep 17 00:00:00 2001
From: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
Date: Wed, 6 Sep 2023 18:38:45 +0200
Subject: [PATCH 134/163] Update docs for parallel HDF5 (#1504)

* update docs for parallel HDF5

* Update docs/src/parallelization.md

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* update docs on parallel HDF5

* bump compat for HDF5

* mention T8code

* reduce number of characters per line

* add information for older HDF5.jl versions

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 Project.toml                |  2 +-
 docs/src/parallelization.md | 41 +++++++++++++++++++++++++++----------
 2 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/Project.toml b/Project.toml
index e14dbcd0c03..41dde8662ab 100644
--- a/Project.toml
+++ b/Project.toml
@@ -56,7 +56,7 @@ DiffEqCallbacks = "2.25"
 EllipsisNotation = "1.0"
 FillArrays = "0.13.2, 1"
 ForwardDiff = "0.10.18"
-HDF5 = "0.14, 0.15, 0.16"
+HDF5 = "0.14, 0.15, 0.16, 0.17"
 IfElse = "0.1"
 LinearMaps = "2.7, 3.0"
 LoopVectorization = "0.12.118"
diff --git a/docs/src/parallelization.md b/docs/src/parallelization.md
index 245fdc11852..d56777c9af4 100644
--- a/docs/src/parallelization.md
+++ b/docs/src/parallelization.md
@@ -166,17 +166,36 @@ section, specifically at the descriptions of the performance index (PID).
 
 
 ### Using error-based step size control with MPI
-If you use error-based step size control (see also the section on [error-based adaptive step sizes](@ref adaptive_step_sizes))
-together with MPI you need to pass `internalnorm=ode_norm` and you should pass
-`unstable_check=ode_unstable_check` to OrdinaryDiffEq's [`solve`](https://docs.sciml.ai/DiffEqDocs/latest/basics/common_solver_opts/),
+If you use error-based step size control (see also the section on
+[error-based adaptive step sizes](@ref adaptive_step_sizes)) together with MPI you need to pass
+`internalnorm=ode_norm` and you should pass `unstable_check=ode_unstable_check` to
+OrdinaryDiffEq's [`solve`](https://docs.sciml.ai/DiffEqDocs/latest/basics/common_solver_opts/),
 which are both included in [`ode_default_options`](@ref).
 
 ### Using parallel input and output
-Trixi.jl allows parallel I/O using MPI by leveraging parallel HDF5.jl. To enable this, you first need
-to use a system-provided MPI library, see also [here](@ref parallel_system_MPI) and you need to tell
-[HDF5.jl](https://github.com/JuliaIO/HDF5.jl) to use this library.
-To do so, set the environment variable `JULIA_HDF5_PATH` to the local path
-that contains the `libhdf5.so` shared object file and build HDF5.jl by executing `using Pkg; Pkg.build("HDF5")`.
-For more information see also the [documentation of HDF5.jl](https://juliaio.github.io/HDF5.jl/stable/mpi/).
-
-If you do not perform these steps to use parallel HDF5 or if the HDF5 is not MPI-enabled, Trixi.jl will fall back on a less efficient I/O mechanism. In that case, all disk I/O is performed only on rank zero and data is distributed to/gathered from the other ranks using regular MPI communication.
+Trixi.jl allows parallel I/O using MPI by leveraging parallel HDF5.jl. On most systems, this is
+enabled by default. Additionally, you can also use a local installation of the HDF5 library
+(with MPI support). For this, you first need to use a system-provided MPI library, see also
+[here](@ref parallel_system_MPI) and you need to tell [HDF5.jl](https://github.com/JuliaIO/HDF5.jl)
+to use this library. To do so with HDF5.jl v0.17 and newer, set the preferences `libhdf5` and
+`libhdf5_hl` to the local paths of the libraries `libhdf5` and `libhdf5_hl`, which can be done by
+```julia
+julia> using Preferences, UUIDs
+julia> set_preferences!(
+           UUID("f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"), # UUID of HDF5.jl
+           "libhdf5" => "/path/to/your/libhdf5.so",
+           "libhdf5_hl" => "/path/to/your/libhdf5_hl.so", force = true)
+```
+For more information see also the
+[documentation of HDF5.jl](https://juliaio.github.io/HDF5.jl/stable/mpi/). In total, you should
+have a file called LocalPreferences.toml in the project directory that contains a section
+`[MPIPreferences]`, a section `[HDF5]` with entries `libhdf5` and `libhdf5_hl`, a section `[P4est]`
+with the entry `libp4est` as well as a section `[T8code]` with the entries `libt8`, `libp4est`
+and `libsc`.
+If you use HDF5.jl v0.16 or older, instead of setting the preferences for HDF5.jl, you need to set
+the environment variable `JULIA_HDF5_PATH` to the path, where the HDF5 binaries are located and
+then call `]build HDF5` from Julia.
+
+If HDF5 is not MPI-enabled, Trixi.jl will fall back on a less efficient I/O mechanism. In that
+case, all disk I/O is performed only on rank zero and data is distributed to/gathered from the
+other ranks using regular MPI communication.

From 13260284dcbed67e9c430623cef049e171d19bfd Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Thu, 7 Sep 2023 08:10:57 +0200
Subject: [PATCH 135/163] set version to v0.5.41

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 41dde8662ab..37553fb70f4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.41-pre"
+version = "0.5.41"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 953f88a78688969b893f34b3cf99693674217381 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Thu, 7 Sep 2023 08:11:09 +0200
Subject: [PATCH 136/163] set development version to v0.5.42-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 37553fb70f4..d37c0548a6a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.41"
+version = "0.5.42-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 7791faa0ca116c047b41b8c556ec5175c4507a24 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Tue, 12 Sep 2023 10:53:52 +0200
Subject: [PATCH 137/163] Some multi-threading improvements (#1630)

* fix multi-threaded parabolic terms on ARM

On ARM, the previous versions resulted in
  cfunction: closures are not supported on this platform
With this change, everything seems to work fine locally.
At least test/test_threaded.jl runs fine with two threads.

* reduce alloactions of multi-threaded parabolic terms a bit

Polyester.jl passes arrays as pointer arrays to the closures without requiring allocations.
More complicated structs may still require allocations, so unpacking some arrays before entering a threaded loop can reduce allocations.

* format
---
 src/solvers/dgmulti/dg_parabolic.jl       |  5 +-
 src/solvers/dgsem_tree/dg_1d_parabolic.jl | 34 +++++++-----
 src/solvers/dgsem_tree/dg_2d_parabolic.jl | 51 ++++++++++--------
 src/solvers/dgsem_tree/dg_3d_parabolic.jl | 65 ++++++++++++-----------
 4 files changed, 86 insertions(+), 69 deletions(-)

diff --git a/src/solvers/dgmulti/dg_parabolic.jl b/src/solvers/dgmulti/dg_parabolic.jl
index 72dbe2c4256..7dfe4430244 100644
--- a/src/solvers/dgmulti/dg_parabolic.jl
+++ b/src/solvers/dgmulti/dg_parabolic.jl
@@ -62,9 +62,10 @@ end
 function transform_variables!(u_transformed, u, mesh,
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DGMulti, parabolic_scheme, cache, cache_parabolic)
+    transformation = gradient_variable_transformation(equations_parabolic)
+
     @threaded for i in eachindex(u)
-        u_transformed[i] = gradient_variable_transformation(equations_parabolic)(u[i],
-                                                                                 equations_parabolic)
+        u_transformed[i] = transformation(u[i], equations_parabolic)
     end
 end
 
diff --git a/src/solvers/dgsem_tree/dg_1d_parabolic.jl b/src/solvers/dgsem_tree/dg_1d_parabolic.jl
index c2aa75388c8..7602331d7c8 100644
--- a/src/solvers/dgsem_tree/dg_1d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_1d_parabolic.jl
@@ -105,12 +105,13 @@ end
 function transform_variables!(u_transformed, u, mesh::TreeMesh{1},
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, parabolic_scheme, cache, cache_parabolic)
+    transformation = gradient_variable_transformation(equations_parabolic)
+
     @threaded for element in eachelement(dg, cache)
         # Calculate volume terms in one element
         for i in eachnode(dg)
             u_node = get_node_vars(u, equations_parabolic, dg, i, element)
-            u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node,
-                                                                                       equations_parabolic)
+            u_transformed_node = transformation(u_node, equations_parabolic)
             set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg,
                            i, element)
         end
@@ -147,16 +148,18 @@ function prolong2interfaces!(cache_parabolic, flux_viscous,
                              equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
     @unpack interfaces = cache_parabolic
+    @unpack neighbor_ids = interfaces
+    interfaces_u = interfaces.u
 
     @threaded for interface in eachinterface(dg, cache)
-        left_element = interfaces.neighbor_ids[1, interface]
-        right_element = interfaces.neighbor_ids[2, interface]
+        left_element = neighbor_ids[1, interface]
+        right_element = neighbor_ids[2, interface]
 
         # interface in x-direction
         for v in eachvariable(equations_parabolic)
-            # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-            interfaces.u[1, v, interface] = flux_viscous[v, nnodes(dg), left_element]
-            interfaces.u[2, v, interface] = flux_viscous[v, 1, right_element]
+            # OBS! `interfaces_u` stores the interpolated *fluxes* and *not the solution*!
+            interfaces_u[1, v, interface] = flux_viscous[v, nnodes(dg), left_element]
+            interfaces_u[2, v, interface] = flux_viscous[v, 1, right_element]
         end
     end
 
@@ -204,21 +207,22 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
                              equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
     @unpack boundaries = cache_parabolic
-    @unpack neighbor_sides = boundaries
+    @unpack neighbor_sides, neighbor_ids = boundaries
+    boundaries_u = boundaries.u
 
     @threaded for boundary in eachboundary(dg, cache_parabolic)
-        element = boundaries.neighbor_ids[boundary]
+        element = neighbor_ids[boundary]
 
         if neighbor_sides[boundary] == 1
             # element in -x direction of boundary
             for v in eachvariable(equations_parabolic)
-                # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-                boundaries.u[1, v, boundary] = flux_viscous[v, nnodes(dg), element]
+                # OBS! `boundaries_u` stores the interpolated *fluxes* and *not the solution*!
+                boundaries_u[1, v, boundary] = flux_viscous[v, nnodes(dg), element]
             end
         else # Element in +x direction of boundary
             for v in eachvariable(equations_parabolic)
-                # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-                boundaries.u[2, v, boundary] = flux_viscous[v, 1, element]
+                # OBS! `boundaries_u` stores the interpolated *fluxes* and *not the solution*!
+                boundaries_u[2, v, boundary] = flux_viscous[v, 1, element]
             end
         end
     end
@@ -552,8 +556,10 @@ end
 # where f(u) is the inviscid flux and g(u) is the viscous flux.
 function apply_jacobian_parabolic!(du, mesh::TreeMesh{1},
                                    equations::AbstractEquationsParabolic, dg::DG, cache)
+    @unpack inverse_jacobian = cache.elements
+
     @threaded for element in eachelement(dg, cache)
-        factor = cache.elements.inverse_jacobian[element]
+        factor = inverse_jacobian[element]
 
         for i in eachnode(dg)
             for v in eachvariable(equations)
diff --git a/src/solvers/dgsem_tree/dg_2d_parabolic.jl b/src/solvers/dgsem_tree/dg_2d_parabolic.jl
index 0da25230380..3dbc55412ad 100644
--- a/src/solvers/dgsem_tree/dg_2d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_2d_parabolic.jl
@@ -118,12 +118,13 @@ end
 function transform_variables!(u_transformed, u, mesh::Union{TreeMesh{2}, P4estMesh{2}},
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, parabolic_scheme, cache, cache_parabolic)
+    transformation = gradient_variable_transformation(equations_parabolic)
+
     @threaded for element in eachelement(dg, cache)
         # Calculate volume terms in one element
         for j in eachnode(dg), i in eachnode(dg)
             u_node = get_node_vars(u, equations_parabolic, dg, i, j, element)
-            u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node,
-                                                                                       equations_parabolic)
+            u_transformed_node = transformation(u_node, equations_parabolic)
             set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg,
                            i, j, element)
         end
@@ -168,30 +169,31 @@ function prolong2interfaces!(cache_parabolic, flux_viscous,
                              equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
     @unpack interfaces = cache_parabolic
-    @unpack orientations = interfaces
+    @unpack orientations, neighbor_ids = interfaces
+    interfaces_u = interfaces.u
 
     flux_viscous_x, flux_viscous_y = flux_viscous
 
     @threaded for interface in eachinterface(dg, cache)
-        left_element = interfaces.neighbor_ids[1, interface]
-        right_element = interfaces.neighbor_ids[2, interface]
+        left_element = neighbor_ids[1, interface]
+        right_element = neighbor_ids[2, interface]
 
         if orientations[interface] == 1
             # interface in x-direction
             for j in eachnode(dg), v in eachvariable(equations_parabolic)
-                # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-                interfaces.u[1, v, j, interface] = flux_viscous_x[v, nnodes(dg), j,
+                # OBS! `interfaces_u` stores the interpolated *fluxes* and *not the solution*!
+                interfaces_u[1, v, j, interface] = flux_viscous_x[v, nnodes(dg), j,
                                                                   left_element]
-                interfaces.u[2, v, j, interface] = flux_viscous_x[v, 1, j,
+                interfaces_u[2, v, j, interface] = flux_viscous_x[v, 1, j,
                                                                   right_element]
             end
         else # if orientations[interface] == 2
             # interface in y-direction
             for i in eachnode(dg), v in eachvariable(equations_parabolic)
-                # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-                interfaces.u[1, v, i, interface] = flux_viscous_y[v, i, nnodes(dg),
+                # OBS! `interfaces_u` stores the interpolated *fluxes* and *not the solution*!
+                interfaces_u[1, v, i, interface] = flux_viscous_y[v, i, nnodes(dg),
                                                                   left_element]
-                interfaces.u[2, v, i, interface] = flux_viscous_y[v, i, 1,
+                interfaces_u[2, v, i, interface] = flux_viscous_y[v, i, 1,
                                                                   right_element]
             end
         end
@@ -244,25 +246,26 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
                              equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
     @unpack boundaries = cache_parabolic
-    @unpack orientations, neighbor_sides = boundaries
+    @unpack orientations, neighbor_sides, neighbor_ids = boundaries
+    boundaries_u = boundaries.u
     flux_viscous_x, flux_viscous_y = flux_viscous
 
     @threaded for boundary in eachboundary(dg, cache_parabolic)
-        element = boundaries.neighbor_ids[boundary]
+        element = neighbor_ids[boundary]
 
         if orientations[boundary] == 1
             # boundary in x-direction
             if neighbor_sides[boundary] == 1
                 # element in -x direction of boundary
                 for l in eachnode(dg), v in eachvariable(equations_parabolic)
-                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-                    boundaries.u[1, v, l, boundary] = flux_viscous_x[v, nnodes(dg), l,
+                    # OBS! `boundaries_u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries_u[1, v, l, boundary] = flux_viscous_x[v, nnodes(dg), l,
                                                                      element]
                 end
             else # Element in +x direction of boundary
                 for l in eachnode(dg), v in eachvariable(equations_parabolic)
-                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-                    boundaries.u[2, v, l, boundary] = flux_viscous_x[v, 1, l, element]
+                    # OBS! `boundaries_u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries_u[2, v, l, boundary] = flux_viscous_x[v, 1, l, element]
                 end
             end
         else # if orientations[boundary] == 2
@@ -270,15 +273,15 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
             if neighbor_sides[boundary] == 1
                 # element in -y direction of boundary
                 for l in eachnode(dg), v in eachvariable(equations_parabolic)
-                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-                    boundaries.u[1, v, l, boundary] = flux_viscous_y[v, l, nnodes(dg),
+                    # OBS! `boundaries_u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries_u[1, v, l, boundary] = flux_viscous_y[v, l, nnodes(dg),
                                                                      element]
                 end
             else
                 # element in +y direction of boundary
                 for l in eachnode(dg), v in eachvariable(equations_parabolic)
-                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-                    boundaries.u[2, v, l, boundary] = flux_viscous_y[v, l, 1, element]
+                    # OBS! `boundaries_u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries_u[2, v, l, boundary] = flux_viscous_y[v, l, 1, element]
                 end
             end
         end
@@ -608,7 +611,7 @@ function prolong2mortars!(cache, flux_viscous::Tuple{AbstractArray, AbstractArra
 end
 
 # NOTE: Use analogy to "calc_mortar_flux!" for hyperbolic eqs with no nonconservative terms.
-# Reasoning: "calc_interface_flux!" for parabolic part is implemented as the version for 
+# Reasoning: "calc_interface_flux!" for parabolic part is implemented as the version for
 # hyperbolic terms with conserved terms only, i.e., no nonconservative terms.
 function calc_mortar_flux!(surface_flux_values,
                            mesh::TreeMesh{2},
@@ -934,8 +937,10 @@ end
 # where f(u) is the inviscid flux and g(u) is the viscous flux.
 function apply_jacobian_parabolic!(du, mesh::Union{TreeMesh{2}, P4estMesh{2}},
                                    equations::AbstractEquationsParabolic, dg::DG, cache)
+    @unpack inverse_jacobian = cache.elements
+
     @threaded for element in eachelement(dg, cache)
-        factor = cache.elements.inverse_jacobian[element]
+        factor = inverse_jacobian[element]
 
         for j in eachnode(dg), i in eachnode(dg)
             for v in eachvariable(equations)
diff --git a/src/solvers/dgsem_tree/dg_3d_parabolic.jl b/src/solvers/dgsem_tree/dg_3d_parabolic.jl
index 2745d312b37..9817e0e5f0e 100644
--- a/src/solvers/dgsem_tree/dg_3d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_3d_parabolic.jl
@@ -118,12 +118,13 @@ end
 function transform_variables!(u_transformed, u, mesh::Union{TreeMesh{3}, P4estMesh{3}},
                               equations_parabolic::AbstractEquationsParabolic,
                               dg::DG, parabolic_scheme, cache, cache_parabolic)
+    transformation = gradient_variable_transformation(equations_parabolic)
+
     @threaded for element in eachelement(dg, cache)
         # Calculate volume terms in one element
         for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
             u_node = get_node_vars(u, equations_parabolic, dg, i, j, k, element)
-            u_transformed_node = gradient_variable_transformation(equations_parabolic)(u_node,
-                                                                                       equations_parabolic)
+            u_transformed_node = transformation(u_node, equations_parabolic)
             set_node_vars!(u_transformed, u_transformed_node, equations_parabolic, dg,
                            i, j, k, element)
         end
@@ -175,43 +176,44 @@ function prolong2interfaces!(cache_parabolic, flux_viscous,
                              equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
     @unpack interfaces = cache_parabolic
-    @unpack orientations = interfaces
+    @unpack orientations, neighbor_ids = interfaces
+    interfaces_u = interfaces.u
 
     flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
 
     @threaded for interface in eachinterface(dg, cache)
-        left_element = interfaces.neighbor_ids[1, interface]
-        right_element = interfaces.neighbor_ids[2, interface]
+        left_element = neighbor_ids[1, interface]
+        right_element = neighbor_ids[2, interface]
 
         if orientations[interface] == 1
             # interface in x-direction
             for k in eachnode(dg), j in eachnode(dg),
                 v in eachvariable(equations_parabolic)
-                # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-                interfaces.u[1, v, j, k, interface] = flux_viscous_x[v, nnodes(dg), j,
+                # OBS! `interfaces_u` stores the interpolated *fluxes* and *not the solution*!
+                interfaces_u[1, v, j, k, interface] = flux_viscous_x[v, nnodes(dg), j,
                                                                      k, left_element]
-                interfaces.u[2, v, j, k, interface] = flux_viscous_x[v, 1, j, k,
+                interfaces_u[2, v, j, k, interface] = flux_viscous_x[v, 1, j, k,
                                                                      right_element]
             end
         elseif orientations[interface] == 2
             # interface in y-direction
             for k in eachnode(dg), i in eachnode(dg),
                 v in eachvariable(equations_parabolic)
-                # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-                interfaces.u[1, v, i, k, interface] = flux_viscous_y[v, i, nnodes(dg),
+                # OBS! `interfaces_u` stores the interpolated *fluxes* and *not the solution*!
+                interfaces_u[1, v, i, k, interface] = flux_viscous_y[v, i, nnodes(dg),
                                                                      k, left_element]
-                interfaces.u[2, v, i, k, interface] = flux_viscous_y[v, i, 1, k,
+                interfaces_u[2, v, i, k, interface] = flux_viscous_y[v, i, 1, k,
                                                                      right_element]
             end
         else # if orientations[interface] == 3
             # interface in z-direction
             for j in eachnode(dg), i in eachnode(dg),
                 v in eachvariable(equations_parabolic)
-                # OBS! `interfaces.u` stores the interpolated *fluxes* and *not the solution*!
-                interfaces.u[1, v, i, j, interface] = flux_viscous_z[v, i, j,
+                # OBS! `interfaces_u` stores the interpolated *fluxes* and *not the solution*!
+                interfaces_u[1, v, i, j, interface] = flux_viscous_z[v, i, j,
                                                                      nnodes(dg),
                                                                      left_element]
-                interfaces.u[2, v, i, j, interface] = flux_viscous_z[v, i, j, 1,
+                interfaces_u[2, v, i, j, interface] = flux_viscous_z[v, i, j, 1,
                                                                      right_element]
             end
         end
@@ -265,11 +267,12 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
                              equations_parabolic::AbstractEquationsParabolic,
                              surface_integral, dg::DG, cache)
     @unpack boundaries = cache_parabolic
-    @unpack orientations, neighbor_sides = boundaries
+    @unpack orientations, neighbor_sides, neighbor_ids = boundaries
+    boundaries_u = boundaries.u
     flux_viscous_x, flux_viscous_y, flux_viscous_z = flux_viscous
 
     @threaded for boundary in eachboundary(dg, cache_parabolic)
-        element = boundaries.neighbor_ids[boundary]
+        element = neighbor_ids[boundary]
 
         if orientations[boundary] == 1
             # boundary in x-direction
@@ -277,15 +280,15 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
                 # element in -x direction of boundary
                 for k in eachnode(dg), j in eachnode(dg),
                     v in eachvariable(equations_parabolic)
-                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-                    boundaries.u[1, v, j, k, boundary] = flux_viscous_x[v, nnodes(dg),
+                    # OBS! `boundaries_u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries_u[1, v, j, k, boundary] = flux_viscous_x[v, nnodes(dg),
                                                                         j, k, element]
                 end
             else # Element in +x direction of boundary
                 for k in eachnode(dg), j in eachnode(dg),
                     v in eachvariable(equations_parabolic)
-                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-                    boundaries.u[2, v, j, k, boundary] = flux_viscous_x[v, 1, j, k,
+                    # OBS! `boundaries_u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries_u[2, v, j, k, boundary] = flux_viscous_x[v, 1, j, k,
                                                                         element]
                 end
             end
@@ -295,8 +298,8 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
                 # element in -y direction of boundary
                 for k in eachnode(dg), i in eachnode(dg),
                     v in eachvariable(equations_parabolic)
-                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-                    boundaries.u[1, v, i, k, boundary] = flux_viscous_y[v, i,
+                    # OBS! `boundaries_u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries_u[1, v, i, k, boundary] = flux_viscous_y[v, i,
                                                                         nnodes(dg), k,
                                                                         element]
                 end
@@ -304,8 +307,8 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
                 # element in +y direction of boundary
                 for k in eachnode(dg), i in eachnode(dg),
                     v in eachvariable(equations_parabolic)
-                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-                    boundaries.u[2, v, i, k, boundary] = flux_viscous_y[v, i, 1, k,
+                    # OBS! `boundaries_u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries_u[2, v, i, k, boundary] = flux_viscous_y[v, i, 1, k,
                                                                         element]
                 end
             end
@@ -315,8 +318,8 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
                 # element in -z direction of boundary
                 for j in eachnode(dg), i in eachnode(dg),
                     v in eachvariable(equations_parabolic)
-                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-                    boundaries.u[1, v, i, j, boundary] = flux_viscous_z[v, i, j,
+                    # OBS! `boundaries_u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries_u[1, v, i, j, boundary] = flux_viscous_z[v, i, j,
                                                                         nnodes(dg),
                                                                         element]
                 end
@@ -324,8 +327,8 @@ function prolong2boundaries!(cache_parabolic, flux_viscous,
                 # element in +z direction of boundary
                 for j in eachnode(dg), i in eachnode(dg),
                     v in eachvariable(equations_parabolic)
-                    # OBS! `boundaries.u` stores the interpolated *fluxes* and *not the solution*!
-                    boundaries.u[2, v, i, j, boundary] = flux_viscous_z[v, i, j, 1,
+                    # OBS! `boundaries_u` stores the interpolated *fluxes* and *not the solution*!
+                    boundaries_u[2, v, i, j, boundary] = flux_viscous_z[v, i, j, 1,
                                                                         element]
                 end
             end
@@ -820,7 +823,7 @@ function prolong2mortars!(cache,
 end
 
 # NOTE: Use analogy to "calc_mortar_flux!" for hyperbolic eqs with no nonconservative terms.
-# Reasoning: "calc_interface_flux!" for parabolic part is implemented as the version for 
+# Reasoning: "calc_interface_flux!" for parabolic part is implemented as the version for
 # hyperbolic terms with conserved terms only, i.e., no nonconservative terms.
 function calc_mortar_flux!(surface_flux_values,
                            mesh::TreeMesh{3},
@@ -1124,8 +1127,10 @@ end
 # where f(u) is the inviscid flux and g(u) is the viscous flux.
 function apply_jacobian_parabolic!(du, mesh::Union{TreeMesh{3}, P4estMesh{3}},
                                    equations::AbstractEquationsParabolic, dg::DG, cache)
+    @unpack inverse_jacobian = cache.elements
+
     @threaded for element in eachelement(dg, cache)
-        factor = cache.elements.inverse_jacobian[element]
+        factor = inverse_jacobian[element]
 
         for k in eachnode(dg), j in eachnode(dg), i in eachnode(dg)
             for v in eachvariable(equations)

From cfbf048308b1074591e08f8627c0089871bf91f3 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Tue, 12 Sep 2023 12:11:06 +0200
Subject: [PATCH 138/163] remove JuliaCOn 2023 announcement (#1631)

---
 README.md | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/README.md b/README.md
index 63540b1f640..c177ad2347f 100644
--- a/README.md
+++ b/README.md
@@ -17,16 +17,6 @@
   <img width="300px" src="https://trixi-framework.github.io/assets/logo.png">
 </p>
 
-***
-**Trixi.jl at JuliaCon 2023**<br/>
-At this year's JuliaCon, we will be present with an online contribution that involves Trixi.jl:
-
-* [Scaling Trixi.jl to more than 10,000 cores using MPI](https://pretalx.com/juliacon2023/talk/PC8PZ8/),
-  27th July 2023, 10:30–11:30 (US/Eastern), 32-G449 (Kiva)
-
-We are looking forward to seeing you there ♥️
-***
-
 **Trixi.jl** is a numerical simulation framework for hyperbolic conservation
 laws written in [Julia](https://julialang.org). A key objective for the
 framework is to be useful to both scientists and students. Therefore, next to

From b9f3f3051c483e8ad09cb51857eec9bb228e267c Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Tue, 12 Sep 2023 12:50:44 +0200
Subject: [PATCH 139/163] set version to v0.5.42

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index d37c0548a6a..9f27fbb2710 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.42-pre"
+version = "0.5.42"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From daf18a5352a80ca2fbb2077ace991b9e5cc33c16 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Tue, 12 Sep 2023 12:50:58 +0200
Subject: [PATCH 140/163] set development version to v0.5.43-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 9f27fbb2710..06fd29ba590 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.42"
+version = "0.5.43-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 3523c49120d7c282518769a5b3d40ce7c9cc5882 Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Tue, 12 Sep 2023 15:00:58 +0200
Subject: [PATCH 141/163] AMR for 1D Parabolic Eqs (Clean branch) (#1605)

* Clean branch

* Un-Comment

* un-comment

* test coarsen

* remove redundancy

* Remove support for passive terms

* expand resize

* comments

* format

* Avoid code duplication

* Update src/callbacks_step/amr_dg1d.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* comment

* comment & format

* Try to increase coverage

* Slightly more expressive names

* Apply suggestions from code review

---------

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
---
 ...ixir_navierstokes_convergence_walls_amr.jl | 172 ++++++++++++++++++
 src/callbacks_step/amr.jl                     | 158 ++++++++++++++++
 src/callbacks_step/amr_dg1d.jl                |  73 ++++++++
 .../dgsem_tree/container_viscous_1d.jl        |  58 ++++++
 src/solvers/dgsem_tree/dg.jl                  |   3 +
 src/solvers/dgsem_tree/dg_1d_parabolic.jl     |  14 +-
 test/test_parabolic_1d.jl                     |  41 +++++
 test/test_parabolic_2d.jl                     |  12 +-
 test/test_parabolic_3d.jl                     |  12 +-
 9 files changed, 523 insertions(+), 20 deletions(-)
 create mode 100644 examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls_amr.jl
 create mode 100644 src/solvers/dgsem_tree/container_viscous_1d.jl

diff --git a/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls_amr.jl b/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls_amr.jl
new file mode 100644
index 00000000000..1daeab04a71
--- /dev/null
+++ b/examples/tree_1d_dgsem/elixir_navierstokes_convergence_walls_amr.jl
@@ -0,0 +1,172 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the ideal compressible Navier-Stokes equations
+
+prandtl_number() = 0.72
+mu() = 0.01
+
+equations = CompressibleEulerEquations1D(1.4)
+equations_parabolic = CompressibleNavierStokesDiffusion1D(equations, mu=mu(), Prandtl=prandtl_number(),
+                                                          gradient_variables=GradientVariablesEntropy())
+
+# Create DG solver with polynomial degree = 3 and (local) Lax-Friedrichs/Rusanov flux as surface flux
+solver = DGSEM(polydeg=3, surface_flux=flux_lax_friedrichs,
+               volume_integral=VolumeIntegralWeakForm())
+
+coordinates_min = -1.0
+coordinates_max =  1.0
+
+# Create a uniformly refined mesh with periodic boundaries
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level=3,
+                periodicity=false,
+                n_cells_max=30_000) # set maximum capacity of tree data structure
+
+# Note: the initial condition cannot be specialized to `CompressibleNavierStokesDiffusion1D`
+#       since it is called by both the parabolic solver (which passes in `CompressibleNavierStokesDiffusion1D`)
+#       and by the initial condition (which passes in `CompressibleEulerEquations1D`).
+# This convergence test setup was originally derived by Andrew Winters (@andrewwinters5000)
+function initial_condition_navier_stokes_convergence_test(x, t, equations)
+  # Amplitude and shift
+  A = 0.5
+  c = 2.0
+
+  # convenience values for trig. functions
+  pi_x = pi * x[1]
+  pi_t = pi * t
+
+  rho = c + A * cos(pi_x) * cos(pi_t)
+  v1  = log(x[1] + 2.0) * (1.0 - exp(-A * (x[1] - 1.0)) ) * cos(pi_t)
+  p   = rho^2
+
+  return prim2cons(SVector(rho, v1, p), equations)
+end
+
+@inline function source_terms_navier_stokes_convergence_test(u, x, t, equations)
+  x = x[1]
+
+  # TODO: parabolic
+  # we currently need to hardcode these parameters until we fix the "combined equation" issue
+  # see also https://github.com/trixi-framework/Trixi.jl/pull/1160
+  inv_gamma_minus_one = inv(equations.gamma - 1)
+  Pr = prandtl_number()
+  mu_ = mu()
+
+  # Same settings as in `initial_condition`
+  # Amplitude and shift
+  A = 0.5
+  c = 2.0
+
+  # convenience values for trig. functions
+  pi_x = pi * x
+  pi_t = pi * t
+
+  # compute the manufactured solution and all necessary derivatives
+  rho    =  c  + A * cos(pi_x) * cos(pi_t)
+  rho_t  = -pi * A * cos(pi_x) * sin(pi_t)
+  rho_x  = -pi * A * sin(pi_x) * cos(pi_t)
+  rho_xx = -pi * pi * A * cos(pi_x) * cos(pi_t)
+
+  v1    =       log(x + 2.0) * (1.0 - exp(-A * (x - 1.0))) * cos(pi_t)
+  v1_t  = -pi * log(x + 2.0) * (1.0 - exp(-A * (x - 1.0))) * sin(pi_t)
+  v1_x  =       (A * log(x + 2.0) * exp(-A * (x - 1.0)) + (1.0 - exp(-A * (x - 1.0))) / (x + 2.0)) * cos(pi_t)
+  v1_xx = (( 2.0 * A * exp(-A * (x - 1.0)) / (x + 2.0)
+                         - A * A * log(x + 2.0) * exp(-A * (x - 1.0))
+                         - (1.0 - exp(-A * (x - 1.0))) / ((x + 2.0) * (x + 2.0))) * cos(pi_t))
+
+  p    = rho * rho
+  p_t  = 2.0 * rho * rho_t
+  p_x  = 2.0 * rho * rho_x
+  p_xx = 2.0 * rho * rho_xx + 2.0 * rho_x * rho_x
+
+  # Note this simplifies slightly because the ansatz assumes that v1 = v2
+  E   = p * inv_gamma_minus_one + 0.5 * rho * v1^2
+  E_t = p_t * inv_gamma_minus_one + 0.5 * rho_t * v1^2 + rho * v1 * v1_t
+  E_x = p_x * inv_gamma_minus_one + 0.5 * rho_x * v1^2 + rho * v1 * v1_x
+
+  # Some convenience constants
+  T_const = equations.gamma * inv_gamma_minus_one / Pr
+  inv_rho_cubed = 1.0 / (rho^3)
+
+  # compute the source terms
+  # density equation
+  du1 = rho_t + rho_x * v1 + rho * v1_x
+
+  # y-momentum equation
+  du2 = ( rho_t * v1 + rho * v1_t 
+         + p_x + rho_x * v1^2 + 2.0 * rho * v1 * v1_x
+    # stress tensor from y-direction
+         - v1_xx * mu_)
+
+  # total energy equation
+  du3 = ( E_t + v1_x * (E + p) + v1 * (E_x + p_x)
+    # stress tensor and temperature gradient terms from x-direction
+                                - v1_xx * v1   * mu_
+                                - v1_x  * v1_x * mu_
+         - T_const * inv_rho_cubed * (        p_xx * rho   * rho
+                                      - 2.0 * p_x  * rho   * rho_x
+                                      + 2.0 * p    * rho_x * rho_x
+                                      -       p    * rho   * rho_xx ) * mu_ )
+
+  return SVector(du1, du2, du3)
+end
+
+initial_condition = initial_condition_navier_stokes_convergence_test
+
+# BC types
+velocity_bc_left_right = NoSlip((x, t, equations) -> initial_condition_navier_stokes_convergence_test(x, t, equations)[2])
+
+heat_bc_left = Isothermal((x, t, equations) -> 
+                          Trixi.temperature(initial_condition_navier_stokes_convergence_test(x, t, equations), 
+                                            equations_parabolic))
+heat_bc_right = Adiabatic((x, t, equations) -> 0.0)
+
+boundary_condition_left = BoundaryConditionNavierStokesWall(velocity_bc_left_right, heat_bc_left)
+boundary_condition_right = BoundaryConditionNavierStokesWall(velocity_bc_left_right, heat_bc_right)
+
+# define inviscid boundary conditions
+boundary_conditions = (; x_neg = boundary_condition_slip_wall,
+                         x_pos = boundary_condition_slip_wall)
+
+# define viscous boundary conditions
+boundary_conditions_parabolic = (; x_neg = boundary_condition_left,
+                                   x_pos = boundary_condition_right)
+
+semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic), initial_condition, solver;
+                                             boundary_conditions=(boundary_conditions, boundary_conditions_parabolic),
+                                             source_terms=source_terms_navier_stokes_convergence_test)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# Create ODE problem with time span `tspan`
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+alive_callback = AliveCallback(alive_interval=10)
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval=analysis_interval)
+
+amr_controller = ControllerThreeLevel(semi, 
+                                      IndicatorLöhner(semi, variable=Trixi.density),
+                                      base_level=3,
+                                      med_level=4, med_threshold=0.005,
+                                      max_level=5, max_threshold=0.01)
+
+amr_callback = AMRCallback(semi, amr_controller,
+                           interval=5,
+                           adapt_initial_condition=true)
+
+# Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, amr_callback)
+
+###############################################################################
+# run the simulation
+
+time_int_tol = 1e-8
+sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol, dt = 1e-5,
+            ode_default_options()..., callback=callbacks)
+summary_callback() # print the timer summary
\ No newline at end of file
diff --git a/src/callbacks_step/amr.jl b/src/callbacks_step/amr.jl
index 4d80e6e1139..ba840ff9675 100644
--- a/src/callbacks_step/amr.jl
+++ b/src/callbacks_step/amr.jl
@@ -192,6 +192,16 @@ end
     amr_callback(u_ode, mesh_equations_solver_cache(semi)..., semi, t, iter; kwargs...)
 end
 
+@inline function (amr_callback::AMRCallback)(u_ode::AbstractVector,
+                                             semi::SemidiscretizationHyperbolicParabolic,
+                                             t, iter;
+                                             kwargs...)
+    # Note that we don't `wrap_array` the vector `u_ode` to be able to `resize!`
+    # it when doing AMR while still dispatching on the `mesh` etc.
+    amr_callback(u_ode, mesh_equations_solver_cache(semi)..., semi.cache_parabolic,
+                 semi, t, iter; kwargs...)
+end
+
 # `passive_args` is currently used for Euler with self-gravity to adapt the gravity solver
 # passively without querying its indicator, based on the assumption that both solvers use
 # the same mesh. That's a hack and should be improved in the future once we have more examples
@@ -346,6 +356,154 @@ function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::TreeMesh,
     return has_changed
 end
 
+function (amr_callback::AMRCallback)(u_ode::AbstractVector, mesh::TreeMesh,
+                                     equations, dg::DG,
+                                     cache, cache_parabolic,
+                                     semi::SemidiscretizationHyperbolicParabolic,
+                                     t, iter;
+                                     only_refine = false, only_coarsen = false)
+    @unpack controller, adaptor = amr_callback
+
+    u = wrap_array(u_ode, mesh, equations, dg, cache)
+    # Indicator kept based on hyperbolic variables
+    lambda = @trixi_timeit timer() "indicator" controller(u, mesh, equations, dg, cache,
+                                                          t = t, iter = iter)
+
+    if mpi_isparallel()
+        error("MPI has not been verified yet for parabolic AMR")
+
+        # Collect lambda for all elements
+        lambda_global = Vector{eltype(lambda)}(undef, nelementsglobal(dg, cache))
+        # Use parent because n_elements_by_rank is an OffsetArray
+        recvbuf = MPI.VBuffer(lambda_global, parent(cache.mpi_cache.n_elements_by_rank))
+        MPI.Allgatherv!(lambda, recvbuf, mpi_comm())
+        lambda = lambda_global
+    end
+
+    leaf_cell_ids = leaf_cells(mesh.tree)
+    @boundscheck begin
+        @assert axes(lambda)==axes(leaf_cell_ids) ("Indicator (axes = $(axes(lambda))) and leaf cell (axes = $(axes(leaf_cell_ids))) arrays have different axes")
+    end
+
+    @unpack to_refine, to_coarsen = amr_callback.amr_cache
+    empty!(to_refine)
+    empty!(to_coarsen)
+    for element in 1:length(lambda)
+        controller_value = lambda[element]
+        if controller_value > 0
+            push!(to_refine, leaf_cell_ids[element])
+        elseif controller_value < 0
+            push!(to_coarsen, leaf_cell_ids[element])
+        end
+    end
+
+    @trixi_timeit timer() "refine" if !only_coarsen && !isempty(to_refine)
+        # refine mesh
+        refined_original_cells = @trixi_timeit timer() "mesh" refine!(mesh.tree,
+                                                                      to_refine)
+
+        # Find all indices of elements whose cell ids are in refined_original_cells
+        # Note: This assumes same indices for hyperbolic and parabolic part.
+        elements_to_refine = findall(in(refined_original_cells),
+                                     cache.elements.cell_ids)
+
+        # refine solver
+        @trixi_timeit timer() "solver" refine!(u_ode, adaptor, mesh, equations, dg,
+                                               cache, cache_parabolic,
+                                               elements_to_refine)
+    else
+        # If there is nothing to refine, create empty array for later use
+        refined_original_cells = Int[]
+    end
+
+    @trixi_timeit timer() "coarsen" if !only_refine && !isempty(to_coarsen)
+        # Since the cells may have been shifted due to refinement, first we need to
+        # translate the old cell ids to the new cell ids
+        if !isempty(to_coarsen)
+            to_coarsen = original2refined(to_coarsen, refined_original_cells, mesh)
+        end
+
+        # Next, determine the parent cells from which the fine cells are to be
+        # removed, since these are needed for the coarsen! function. However, since
+        # we only want to coarsen if *all* child cells are marked for coarsening,
+        # we count the coarsening indicators for each parent cell and only coarsen
+        # if all children are marked as such (i.e., where the count is 2^ndims). At
+        # the same time, check if a cell is marked for coarsening even though it is
+        # *not* a leaf cell -> this can only happen if it was refined due to 2:1
+        # smoothing during the preceding refinement operation.
+        parents_to_coarsen = zeros(Int, length(mesh.tree))
+        for cell_id in to_coarsen
+            # If cell has no parent, it cannot be coarsened
+            if !has_parent(mesh.tree, cell_id)
+                continue
+            end
+
+            # If cell is not leaf (anymore), it cannot be coarsened
+            if !is_leaf(mesh.tree, cell_id)
+                continue
+            end
+
+            # Increase count for parent cell
+            parent_id = mesh.tree.parent_ids[cell_id]
+            parents_to_coarsen[parent_id] += 1
+        end
+
+        # Extract only those parent cells for which all children should be coarsened
+        to_coarsen = collect(1:length(parents_to_coarsen))[parents_to_coarsen .== 2^ndims(mesh)]
+
+        # Finally, coarsen mesh
+        coarsened_original_cells = @trixi_timeit timer() "mesh" coarsen!(mesh.tree,
+                                                                         to_coarsen)
+
+        # Convert coarsened parent cell ids to the list of child cell ids that have
+        # been removed, since this is the information that is expected by the solver
+        removed_child_cells = zeros(Int,
+                                    n_children_per_cell(mesh.tree) *
+                                    length(coarsened_original_cells))
+        for (index, coarse_cell_id) in enumerate(coarsened_original_cells)
+            for child in 1:n_children_per_cell(mesh.tree)
+                removed_child_cells[n_children_per_cell(mesh.tree) * (index - 1) + child] = coarse_cell_id +
+                                                                                            child
+            end
+        end
+
+        # Find all indices of elements whose cell ids are in removed_child_cells
+        # Note: This assumes same indices for hyperbolic and parabolic part.
+        elements_to_remove = findall(in(removed_child_cells), cache.elements.cell_ids)
+
+        # coarsen solver
+        @trixi_timeit timer() "solver" coarsen!(u_ode, adaptor, mesh, equations, dg,
+                                                cache, cache_parabolic,
+                                                elements_to_remove)
+    else
+        # If there is nothing to coarsen, create empty array for later use
+        coarsened_original_cells = Int[]
+    end
+
+    # Store whether there were any cells coarsened or refined
+    has_changed = !isempty(refined_original_cells) || !isempty(coarsened_original_cells)
+    if has_changed # TODO: Taal decide, where shall we set this?
+        # don't set it to has_changed since there can be changes from earlier calls
+        mesh.unsaved_changes = true
+    end
+
+    # Dynamically balance computational load by first repartitioning the mesh and then redistributing the cells/elements
+    if has_changed && mpi_isparallel() && amr_callback.dynamic_load_balancing
+        error("MPI has not been verified yet for parabolic AMR")
+
+        @trixi_timeit timer() "dynamic load balancing" begin
+            old_mpi_ranks_per_cell = copy(mesh.tree.mpi_ranks)
+
+            partition!(mesh)
+
+            rebalance_solver!(u_ode, mesh, equations, dg, cache, old_mpi_ranks_per_cell)
+        end
+    end
+
+    # Return true if there were any cells coarsened or refined, otherwise false
+    return has_changed
+end
+
 # Copy controller values to quad user data storage, will be called below
 function copy_to_quad_iter_volume(info, user_data)
     info_pw = PointerWrapper(info)
diff --git a/src/callbacks_step/amr_dg1d.jl b/src/callbacks_step/amr_dg1d.jl
index e31a74730ea..e721ccc61cb 100644
--- a/src/callbacks_step/amr_dg1d.jl
+++ b/src/callbacks_step/amr_dg1d.jl
@@ -76,6 +76,44 @@ function refine!(u_ode::AbstractVector, adaptor, mesh::TreeMesh{1},
     return nothing
 end
 
+function refine!(u_ode::AbstractVector, adaptor, mesh::TreeMesh{1},
+                 equations, dg::DGSEM, cache, cache_parabolic,
+                 elements_to_refine)
+    # Call `refine!` for the hyperbolic part, which does the heavy lifting of
+    # actually transferring the solution to the refined cells
+    refine!(u_ode, adaptor, mesh, equations, dg, cache, elements_to_refine)
+
+    # The remaining function only handles the necessary adaptation of the data structures
+    # for the parabolic part of the semidiscretization
+
+    # Get new list of leaf cells
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
+
+    @unpack elements, viscous_container = cache_parabolic
+    resize!(elements, length(leaf_cell_ids))
+    init_elements!(elements, leaf_cell_ids, mesh, dg.basis)
+
+    # Resize parabolic helper variables
+    resize!(viscous_container, equations, dg, cache)
+
+    # re-initialize interfaces container
+    @unpack interfaces = cache_parabolic
+    resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids))
+    init_interfaces!(interfaces, elements, mesh)
+
+    # re-initialize boundaries container
+    @unpack boundaries = cache_parabolic
+    resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids))
+    init_boundaries!(boundaries, elements, mesh)
+
+    # Sanity check
+    if isperiodic(mesh.tree)
+        @assert ninterfaces(interfaces)==1 * nelements(dg, cache_parabolic) ("For 1D and periodic domains, the number of interfaces must be the same as the number of elements")
+    end
+
+    return nothing
+end
+
 # TODO: Taal compare performance of different implementations
 # Refine solution data u for an element, using L2 projection (interpolation)
 function refine_element!(u::AbstractArray{<:Any, 3}, element_id,
@@ -201,6 +239,41 @@ function coarsen!(u_ode::AbstractVector, adaptor, mesh::TreeMesh{1},
     return nothing
 end
 
+function coarsen!(u_ode::AbstractVector, adaptor, mesh::TreeMesh{1},
+                  equations, dg::DGSEM, cache, cache_parabolic,
+                  elements_to_remove)
+    # Call `coarsen!` for the hyperbolic part, which does the heavy lifting of
+    # actually transferring the solution to the coarsened cells
+    coarsen!(u_ode, adaptor, mesh, equations, dg, cache, elements_to_remove)
+
+    # Get new list of leaf cells
+    leaf_cell_ids = local_leaf_cells(mesh.tree)
+
+    @unpack elements, viscous_container = cache_parabolic
+    resize!(elements, length(leaf_cell_ids))
+    init_elements!(elements, leaf_cell_ids, mesh, dg.basis)
+
+    # Resize parabolic helper variables
+    resize!(viscous_container, equations, dg, cache)
+
+    # re-initialize interfaces container
+    @unpack interfaces = cache_parabolic
+    resize!(interfaces, count_required_interfaces(mesh, leaf_cell_ids))
+    init_interfaces!(interfaces, elements, mesh)
+
+    # re-initialize boundaries container
+    @unpack boundaries = cache_parabolic
+    resize!(boundaries, count_required_boundaries(mesh, leaf_cell_ids))
+    init_boundaries!(boundaries, elements, mesh)
+
+    # Sanity check
+    if isperiodic(mesh.tree)
+        @assert ninterfaces(interfaces)==1 * nelements(dg, cache_parabolic) ("For 1D and periodic domains, the number of interfaces must be the same as the number of elements")
+    end
+
+    return nothing
+end
+
 # TODO: Taal compare performance of different implementations
 # Coarsen solution data u for two elements, using L2 projection
 function coarsen_elements!(u::AbstractArray{<:Any, 3}, element_id,
diff --git a/src/solvers/dgsem_tree/container_viscous_1d.jl b/src/solvers/dgsem_tree/container_viscous_1d.jl
new file mode 100644
index 00000000000..a4919f75396
--- /dev/null
+++ b/src/solvers/dgsem_tree/container_viscous_1d.jl
@@ -0,0 +1,58 @@
+mutable struct ViscousContainer1D{uEltype <: Real}
+    u_transformed::Array{uEltype, 3}
+    gradients::Array{uEltype, 3}
+    flux_viscous::Array{uEltype, 3}
+
+    # internal `resize!`able storage
+    _u_transformed::Vector{uEltype}
+    _gradients::Vector{uEltype}
+    _flux_viscous::Vector{uEltype}
+
+    function ViscousContainer1D{uEltype}(n_vars::Integer, n_nodes::Integer,
+                                         n_elements::Integer) where {uEltype <: Real}
+        new(Array{uEltype, 3}(undef, n_vars, n_nodes, n_elements),
+            Array{uEltype, 3}(undef, n_vars, n_nodes, n_elements),
+            Array{uEltype, 3}(undef, n_vars, n_nodes, n_elements),
+            Vector{uEltype}(undef, n_vars * n_nodes * n_elements),
+            Vector{uEltype}(undef, n_vars * n_nodes * n_elements),
+            Vector{uEltype}(undef, n_vars * n_nodes * n_elements))
+    end
+end
+
+function init_viscous_container(n_vars::Integer, n_nodes::Integer,
+                                n_elements::Integer,
+                                ::Type{uEltype}) where {uEltype <: Real}
+    return ViscousContainer1D{uEltype}(n_vars, n_nodes, n_elements)
+end
+
+# Only one-dimensional `Array`s are `resize!`able in Julia.
+# Hence, we use `Vector`s as internal storage and `resize!`
+# them whenever needed. Then, we reuse the same memory by
+# `unsafe_wrap`ping multi-dimensional `Array`s around the
+# internal storage.
+function Base.resize!(viscous_container::ViscousContainer1D, equations, dg, cache)
+    capacity = nvariables(equations) * nnodes(dg) * nelements(dg, cache)
+    resize!(viscous_container._u_transformed, capacity)
+    resize!(viscous_container._gradients, capacity)
+    resize!(viscous_container._flux_viscous, capacity)
+
+    viscous_container.u_transformed = unsafe_wrap(Array,
+                                                  pointer(viscous_container._u_transformed),
+                                                  (nvariables(equations),
+                                                   nnodes(dg),
+                                                   nelements(dg, cache)))
+
+    viscous_container.gradients = unsafe_wrap(Array,
+                                              pointer(viscous_container._gradients),
+                                              (nvariables(equations),
+                                               nnodes(dg),
+                                               nelements(dg, cache)))
+
+    viscous_container.flux_viscous = unsafe_wrap(Array,
+                                                 pointer(viscous_container._flux_viscous),
+                                                 (nvariables(equations),
+                                                  nnodes(dg),
+                                                  nelements(dg, cache)))
+
+    return nothing
+end
diff --git a/src/solvers/dgsem_tree/dg.jl b/src/solvers/dgsem_tree/dg.jl
index 6e02bc1d94a..ff37bad3b3a 100644
--- a/src/solvers/dgsem_tree/dg.jl
+++ b/src/solvers/dgsem_tree/dg.jl
@@ -54,6 +54,9 @@ include("containers.jl")
 # Dimension-agnostic parallel setup
 include("dg_parallel.jl")
 
+# Helper struct for parabolic AMR
+include("container_viscous_1d.jl")
+
 # 1D DG implementation
 include("dg_1d.jl")
 include("dg_1d_parabolic.jl")
diff --git a/src/solvers/dgsem_tree/dg_1d_parabolic.jl b/src/solvers/dgsem_tree/dg_1d_parabolic.jl
index 7602331d7c8..97e31e0e22b 100644
--- a/src/solvers/dgsem_tree/dg_1d_parabolic.jl
+++ b/src/solvers/dgsem_tree/dg_1d_parabolic.jl
@@ -17,7 +17,8 @@ function rhs_parabolic!(du, u, t, mesh::TreeMesh{1},
                         equations_parabolic::AbstractEquationsParabolic,
                         initial_condition, boundary_conditions_parabolic, source_terms,
                         dg::DG, parabolic_scheme, cache, cache_parabolic)
-    @unpack u_transformed, gradients, flux_viscous = cache_parabolic
+    @unpack viscous_container = cache_parabolic
+    @unpack u_transformed, gradients, flux_viscous = viscous_container
 
     # Convert conservative variables to a form more suitable for viscous flux calculations
     @trixi_timeit timer() "transform variables" begin
@@ -534,18 +535,15 @@ function create_cache_parabolic(mesh::TreeMesh{1},
     elements = init_elements(leaf_cell_ids, mesh, equations_hyperbolic, dg.basis, RealT,
                              uEltype)
 
-    n_vars = nvariables(equations_hyperbolic)
-    n_nodes = nnodes(elements)
-    n_elements = nelements(elements)
-    u_transformed = Array{uEltype}(undef, n_vars, n_nodes, n_elements)
-    gradients = similar(u_transformed)
-    flux_viscous = similar(u_transformed)
+    viscous_container = init_viscous_container(nvariables(equations_hyperbolic),
+                                               nnodes(elements), nelements(elements),
+                                               uEltype)
 
     interfaces = init_interfaces(leaf_cell_ids, mesh, elements)
 
     boundaries = init_boundaries(leaf_cell_ids, mesh, elements)
 
-    cache = (; elements, interfaces, boundaries, gradients, flux_viscous, u_transformed)
+    cache = (; elements, interfaces, boundaries, viscous_container)
 
     return cache
 end
diff --git a/test/test_parabolic_1d.jl b/test/test_parabolic_1d.jl
index 06a55100d62..3c2b8855ce8 100644
--- a/test/test_parabolic_1d.jl
+++ b/test/test_parabolic_1d.jl
@@ -20,6 +20,28 @@ isdir(outdir) && rm(outdir, recursive=true)
     )
   end
 
+  @trixi_testset "TreeMesh1D: elixir_advection_diffusion.jl (AMR)" begin
+    @test_trixi_include(joinpath(examples_dir(), "tree_1d_dgsem", "elixir_advection_diffusion.jl"),
+      tspan=(0.0, 0.0), initial_refinement_level = 5)
+      tspan=(0.0, 1.0)
+      ode = semidiscretize(semi, tspan)
+      amr_controller = ControllerThreeLevel(semi, IndicatorMax(semi, variable=first),
+                                      base_level=4,
+                                      med_level=5, med_threshold=0.1,
+                                      max_level=6, max_threshold=0.6)
+      amr_callback = AMRCallback(semi, amr_controller,
+                                interval=5,
+                                adapt_initial_condition=true)
+
+      # Create a CallbackSet to collect all callbacks such that they can be passed to the ODE solver
+      callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, amr_callback)
+      sol = solve(ode, KenCarp4(autodiff=false), abstol=time_abs_tol, reltol=time_int_tol,
+            save_everystep=false, callback=callbacks)
+      l2_error, linf_error = analysis_callback(sol)
+      @test l2_error ≈ [6.4878111416468355e-6]
+      @test linf_error ≈ [3.258075790424364e-5]
+  end
+
   @trixi_testset "TreeMesh1D: elixir_navierstokes_convergence_periodic.jl" begin
     @test_trixi_include(joinpath(examples_dir(), "tree_1d_dgsem", "elixir_navierstokes_convergence_periodic.jl"),
       l2 = [0.0001133835907077494, 6.226282245610444e-5, 0.0002820171699999139],
@@ -53,6 +75,25 @@ isdir(outdir) && rm(outdir, recursive=true)
       linf = [0.002754803146635787, 0.0028567714697580906, 0.012941794048176192]
     )
   end
+
+  @trixi_testset "TreeMesh1D: elixir_navierstokes_convergence_walls_amr.jl" begin
+    @test_trixi_include(joinpath(examples_dir(), "tree_1d_dgsem", "elixir_navierstokes_convergence_walls_amr.jl"),
+      equations_parabolic = CompressibleNavierStokesDiffusion1D(equations, mu=mu(),
+                                                                Prandtl=prandtl_number()),
+      l2 = [2.527877257772131e-5, 2.5539911566937718e-5, 0.0001211860451244785],
+      linf = [0.00014663867588948776, 0.00019422448348348196, 0.0009556439394007299]
+    )
+  end
+
+  @trixi_testset "TreeMesh1D: elixir_navierstokes_convergence_walls_amr.jl: GradientVariablesEntropy" begin
+    @test_trixi_include(joinpath(examples_dir(), "tree_1d_dgsem", "elixir_navierstokes_convergence_walls_amr.jl"),
+      equations_parabolic = CompressibleNavierStokesDiffusion1D(equations, mu=mu(),
+                                                                Prandtl=prandtl_number(), 
+                                                                gradient_variables = GradientVariablesEntropy()),
+      l2 = [2.4593699163175966e-5, 2.392863645712634e-5, 0.00011252526651714956],
+      linf = [0.00011850555445525046, 0.0001898777490968537, 0.0009597561467877824]
+    )
+  end
 end
 
 # Clean up afterwards: delete Trixi output directory
diff --git a/test/test_parabolic_2d.jl b/test/test_parabolic_2d.jl
index 1564a33dc41..3fff4382cd1 100644
--- a/test/test_parabolic_2d.jl
+++ b/test/test_parabolic_2d.jl
@@ -143,9 +143,9 @@ isdir(outdir) && rm(outdir, recursive=true)
       callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
       sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
             ode_default_options()..., callback=callbacks)
-      ac_sol = analysis_callback(sol)
-      @test ac_sol.l2[1] ≈ 1.67452550744728e-6
-      @test ac_sol.linf[1] ≈ 7.905059166368744e-6
+      l2_error, linf_error = analysis_callback(sol)
+      @test l2_error ≈ [1.67452550744728e-6]
+      @test linf_error ≈ [7.905059166368744e-6]
 
       # Ensure that we do not have excessive memory allocations 
       # (e.g., from type instabilities) 
@@ -229,9 +229,9 @@ isdir(outdir) && rm(outdir, recursive=true)
       callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
       sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol, dt = 1e-5,
             ode_default_options()..., callback=callbacks)           
-      ac_sol = analysis_callback(sol)
-      @test ac_sol.l2 ≈ [0.00024296959173852447; 0.0002093263158670915; 0.0005390572390977262; 0.00026753561392341537]
-      @test ac_sol.linf ≈ [0.0016210102053424436; 0.002593287648655501; 0.002953907343823712; 0.002077119120180271]
+      l2_error, linf_error = analysis_callback(sol)
+      @test l2_error ≈ [0.00024296959173852447; 0.0002093263158670915; 0.0005390572390977262; 0.00026753561392341537]
+      @test linf_error ≈ [0.0016210102053424436; 0.002593287648655501; 0.002953907343823712; 0.002077119120180271]
   end
 
   @trixi_testset "TreeMesh2D: elixir_navierstokes_lid_driven_cavity.jl" begin
diff --git a/test/test_parabolic_3d.jl b/test/test_parabolic_3d.jl
index d607962afa0..ded052fb9d3 100644
--- a/test/test_parabolic_3d.jl
+++ b/test/test_parabolic_3d.jl
@@ -94,9 +94,9 @@ isdir(outdir) && rm(outdir, recursive=true)
       callbacks = CallbackSet(summary_callback, alive_callback, analysis_callback)
       sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol, dt = 1e-5,
             ode_default_options()..., callback=callbacks)
-      ac_sol = analysis_callback(sol)
-      @test ac_sol.l2 ≈ [0.0003991794175622818; 0.0008853745163670504; 0.0010658655552066817; 0.0008785559918324284; 0.001403163458422815]
-      @test ac_sol.linf ≈ [0.0035306410538458177; 0.01505692306169911; 0.008862444161110705; 0.015065647972869856; 0.030402714743065218]
+      l2_error, linf_error = analysis_callback(sol)
+      @test l2_error ≈ [0.0003991794175622818; 0.0008853745163670504; 0.0010658655552066817; 0.0008785559918324284; 0.001403163458422815]
+      @test linf_error ≈ [0.0035306410538458177; 0.01505692306169911; 0.008862444161110705; 0.015065647972869856; 0.030402714743065218]
   end
 
   @trixi_testset "TreeMesh3D: elixir_navierstokes_taylor_green_vortex.jl" begin
@@ -127,9 +127,9 @@ isdir(outdir) && rm(outdir, recursive=true)
       sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
             dt=5e-3,
             save_everystep=false, callback=callbacks); 
-      ac_sol = analysis_callback(sol)
-      @test ac_sol.l2 ≈ [0.0013666103707729502; 0.2313581629543744; 0.2308164306264533; 0.17460246787819503; 0.28121914446544005]
-      @test ac_sol.linf ≈ [0.006938093883741336; 1.028235074139312; 1.0345438209717241; 1.0821111605203542; 1.2669636522564645]
+      l2_error, linf_error = analysis_callback(sol)
+      @test l2_error ≈ [0.0013666103707729502; 0.2313581629543744; 0.2308164306264533; 0.17460246787819503; 0.28121914446544005]
+      @test linf_error ≈ [0.006938093883741336; 1.028235074139312; 1.0345438209717241; 1.0821111605203542; 1.2669636522564645]
 
       # Ensure that we do not have excessive memory allocations 
       # (e.g., from type instabilities) 

From 27d4fd190bd7a8c76a56f9fefd062169a2682d46 Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Tue, 12 Sep 2023 17:54:08 +0200
Subject: [PATCH 142/163] Shorten 3d parabolic test times (#1634)

* Shorten 3d parabolic test times

* fix typo

* clear notation
---
 test/test_parabolic_3d.jl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/test_parabolic_3d.jl b/test/test_parabolic_3d.jl
index ded052fb9d3..86076460294 100644
--- a/test/test_parabolic_3d.jl
+++ b/test/test_parabolic_3d.jl
@@ -85,7 +85,7 @@ isdir(outdir) && rm(outdir, recursive=true)
       num_leafs = length(LLID)
       @assert num_leafs % 16 == 0
       Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs/16)])
-      tspan=(0.0, 1.0)
+      tspan=(0.0, 0.25)
       semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic), initial_condition, solver;
                                              boundary_conditions=(boundary_conditions, boundary_conditions_parabolic),
                                              source_terms=source_terms_navier_stokes_convergence_test)
@@ -95,8 +95,8 @@ isdir(outdir) && rm(outdir, recursive=true)
       sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol, dt = 1e-5,
             ode_default_options()..., callback=callbacks)
       l2_error, linf_error = analysis_callback(sol)
-      @test l2_error ≈ [0.0003991794175622818; 0.0008853745163670504; 0.0010658655552066817; 0.0008785559918324284; 0.001403163458422815]
-      @test linf_error ≈ [0.0035306410538458177; 0.01505692306169911; 0.008862444161110705; 0.015065647972869856; 0.030402714743065218]
+      @test l2_error ≈ [0.0003109336253407314, 0.0006473493036803503, 0.0007705277238213672, 0.0006280517917198335, 0.000903927789884075]
+      @test linf_error ≈ [0.0023694155365339142, 0.010634932622402863, 0.006772070862236412, 0.010640551561726901, 0.019256819038719897]
   end
 
   @trixi_testset "TreeMesh3D: elixir_navierstokes_taylor_green_vortex.jl" begin
@@ -114,7 +114,7 @@ isdir(outdir) && rm(outdir, recursive=true)
       num_leafs = length(LLID)
       @assert num_leafs % 32 == 0
       Trixi.refine!(mesh.tree, LLID[1:Int(num_leafs/32)])
-      tspan=(0.0, 10.0)
+      tspan=(0.0, 0.1)
       semi = SemidiscretizationHyperbolicParabolic(mesh, (equations, equations_parabolic),
                                              initial_condition, solver)
       ode = semidiscretize(semi, tspan)
@@ -128,8 +128,8 @@ isdir(outdir) && rm(outdir, recursive=true)
             dt=5e-3,
             save_everystep=false, callback=callbacks); 
       l2_error, linf_error = analysis_callback(sol)
-      @test l2_error ≈ [0.0013666103707729502; 0.2313581629543744; 0.2308164306264533; 0.17460246787819503; 0.28121914446544005]
-      @test linf_error ≈ [0.006938093883741336; 1.028235074139312; 1.0345438209717241; 1.0821111605203542; 1.2669636522564645]
+      @test l2_error ≈ [7.314319856736271e-5, 0.006266480163542894, 0.006266489911815533, 0.008829222305770226, 0.0032859166842329228]
+      @test linf_error ≈ [0.0002943968186086554, 0.013876261980614757, 0.013883619864959451, 0.025201279960491936, 0.018679364985388247]
 
       # Ensure that we do not have excessive memory allocations 
       # (e.g., from type instabilities) 

From d206b766e3e0aad36a9808ed65c5549b8b284f73 Mon Sep 17 00:00:00 2001
From: ArseniyKholod <119304909+ArseniyKholod@users.noreply.github.com>
Date: Tue, 12 Sep 2023 19:16:44 +0200
Subject: [PATCH 143/163] Add load_timestep! for restart setup (#1614)

* add load_timestep!

* Update save_restart.jl

* Update save_restart.jl

* Update src/callbacks_step/save_restart.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* use new function in elixirs and docs

---------

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 docs/src/restart.md                                  |  3 +--
 examples/p4est_2d_dgsem/elixir_advection_restart.jl  |  3 +--
 examples/p4est_3d_dgsem/elixir_advection_restart.jl  |  3 +--
 .../structured_2d_dgsem/elixir_advection_restart.jl  |  3 +--
 .../structured_3d_dgsem/elixir_advection_restart.jl  |  3 +--
 examples/tree_2d_dgsem/elixir_advection_restart.jl   |  3 +--
 examples/tree_3d_dgsem/elixir_advection_restart.jl   |  3 +--
 .../unstructured_2d_dgsem/elixir_euler_restart.jl    |  3 +--
 src/Trixi.jl                                         |  2 +-
 src/callbacks_step/save_restart.jl                   | 12 ++++++++++++
 10 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/docs/src/restart.md b/docs/src/restart.md
index 767269ff27d..c7cbcd11852 100644
--- a/docs/src/restart.md
+++ b/docs/src/restart.md
@@ -77,8 +77,7 @@ and its time step number, e.g.:
 ```julia
 integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
                   dt=dt, save_everystep=false, callback=callbacks);
-integrator.iter = load_timestep(restart_filename)
-integrator.stats.naccept = integrator.iter
+load_timestep!(integrator, restart_filename)
 ```
 
 Now we can compute the solution:
diff --git a/examples/p4est_2d_dgsem/elixir_advection_restart.jl b/examples/p4est_2d_dgsem/elixir_advection_restart.jl
index 79a35199b83..52917616a6a 100644
--- a/examples/p4est_2d_dgsem/elixir_advection_restart.jl
+++ b/examples/p4est_2d_dgsem/elixir_advection_restart.jl
@@ -35,8 +35,7 @@ integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
                   save_everystep=false, callback=callbacks);
 
 # Get the last time index and work with that.
-integrator.iter = load_timestep(restart_filename)
-integrator.stats.naccept = integrator.iter
+load_timestep!(integrator, restart_filename)
 
 
 ###############################################################################
diff --git a/examples/p4est_3d_dgsem/elixir_advection_restart.jl b/examples/p4est_3d_dgsem/elixir_advection_restart.jl
index b27eaab62e2..26d10cf8826 100644
--- a/examples/p4est_3d_dgsem/elixir_advection_restart.jl
+++ b/examples/p4est_3d_dgsem/elixir_advection_restart.jl
@@ -32,8 +32,7 @@ integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
                   save_everystep=false, callback=callbacks);
 
 # Get the last time index and work with that.
-integrator.iter = load_timestep(restart_filename)
-integrator.stats.naccept = integrator.iter
+load_timestep!(integrator, restart_filename)
 
 
 ###############################################################################
diff --git a/examples/structured_2d_dgsem/elixir_advection_restart.jl b/examples/structured_2d_dgsem/elixir_advection_restart.jl
index 98c44fac71a..82eaa21333a 100644
--- a/examples/structured_2d_dgsem/elixir_advection_restart.jl
+++ b/examples/structured_2d_dgsem/elixir_advection_restart.jl
@@ -34,8 +34,7 @@ integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
                  save_everystep=false, callback=callbacks);
 
 # Get the last time index and work with that.
-integrator.iter = load_timestep(restart_filename)
-integrator.stats.naccept = integrator.iter
+load_timestep!(integrator, restart_filename)
 
 ###############################################################################
 # run the simulation
diff --git a/examples/structured_3d_dgsem/elixir_advection_restart.jl b/examples/structured_3d_dgsem/elixir_advection_restart.jl
index 39d28848c77..921c5310340 100644
--- a/examples/structured_3d_dgsem/elixir_advection_restart.jl
+++ b/examples/structured_3d_dgsem/elixir_advection_restart.jl
@@ -32,8 +32,7 @@ integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
                   save_everystep=false, callback=callbacks);
 
 # Get the last time index and work with that.
-integrator.iter = load_timestep(restart_filename)
-integrator.stats.naccept = integrator.iter
+load_timestep!(integrator, restart_filename)
 
 
 ###############################################################################
diff --git a/examples/tree_2d_dgsem/elixir_advection_restart.jl b/examples/tree_2d_dgsem/elixir_advection_restart.jl
index 72efb7d0c84..771ec5aefe7 100644
--- a/examples/tree_2d_dgsem/elixir_advection_restart.jl
+++ b/examples/tree_2d_dgsem/elixir_advection_restart.jl
@@ -32,8 +32,7 @@ integrator = init(ode, alg,
                   save_everystep=false, callback=callbacks)
 
 # Get the last time index and work with that.
-integrator.iter = load_timestep(restart_filename)
-integrator.stats.naccept = integrator.iter
+load_timestep!(integrator, restart_filename)
 
 ###############################################################################
 # run the simulation
diff --git a/examples/tree_3d_dgsem/elixir_advection_restart.jl b/examples/tree_3d_dgsem/elixir_advection_restart.jl
index 3061f165874..b7835ed061f 100644
--- a/examples/tree_3d_dgsem/elixir_advection_restart.jl
+++ b/examples/tree_3d_dgsem/elixir_advection_restart.jl
@@ -31,8 +31,7 @@ integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
                   save_everystep=false, callback=callbacks);
 
 # Get the last time index and work with that.
-integrator.iter = load_timestep(restart_filename)
-integrator.stats.naccept = integrator.iter
+load_timestep!(integrator, restart_filename)
 
 
 ###############################################################################
diff --git a/examples/unstructured_2d_dgsem/elixir_euler_restart.jl b/examples/unstructured_2d_dgsem/elixir_euler_restart.jl
index b85cc2c6d70..6653f8662d9 100644
--- a/examples/unstructured_2d_dgsem/elixir_euler_restart.jl
+++ b/examples/unstructured_2d_dgsem/elixir_euler_restart.jl
@@ -33,8 +33,7 @@ integrator = init(ode, CarpenterKennedy2N54(williamson_condition=false),
                   save_everystep=false, callback=callbacks);
 
 # Get the last time index and work with that.
-integrator.iter = load_timestep(restart_filename)
-integrator.stats.naccept = integrator.iter
+load_timestep!(integrator, restart_filename)
 
 
 ###############################################################################
diff --git a/src/Trixi.jl b/src/Trixi.jl
index ec4d20558e5..be43c45b93d 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -253,7 +253,7 @@ export SummaryCallback, SteadyStateCallback, AnalysisCallback, AliveCallback,
        GlmSpeedCallback, LBMCollisionCallback, EulerAcousticsCouplingCallback,
        TrivialCallback, AnalysisCallbackCoupled
 
-export load_mesh, load_time, load_timestep, load_dt
+export load_mesh, load_time, load_timestep, load_timestep!, load_dt
 
 export ControllerThreeLevel, ControllerThreeLevelCombined,
        IndicatorLöhner, IndicatorLoehner, IndicatorMax,
diff --git a/src/callbacks_step/save_restart.jl b/src/callbacks_step/save_restart.jl
index f567a5c7fda..06817a9b730 100644
--- a/src/callbacks_step/save_restart.jl
+++ b/src/callbacks_step/save_restart.jl
@@ -141,6 +141,18 @@ function load_timestep(restart_file::AbstractString)
     end
 end
 
+"""
+    load_timestep!(integrator, restart_file::AbstractString)
+
+Load the time step number saved in a `restart_file` and assign it to both the time step
+number and and the number of accepted steps
+(`iter` and `stats.naccept` in OrdinaryDiffEq.jl, respectively) in `integrator`.
+"""
+function load_timestep!(integrator, restart_file::AbstractString)
+    integrator.iter = load_timestep(restart_file)
+    integrator.stats.naccept = integrator.iter
+end
+
 """
     load_dt(restart_file::AbstractString)
 

From bc6736183271ec191645e9a38f95790a76671d25 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Wed, 13 Sep 2023 09:07:41 +0200
Subject: [PATCH 144/163] fix allocations of P4estMesh2D BCs (#1636)

---
 src/solvers/dgsem_p4est/dg_2d.jl |  4 ++--
 test/test_p4est_2d.jl            | 11 ++++++++++-
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl
index 97b931fa325..a665aa4b19d 100644
--- a/src/solvers/dgsem_p4est/dg_2d.jl
+++ b/src/solvers/dgsem_p4est/dg_2d.jl
@@ -275,9 +275,9 @@ function prolong2boundaries!(cache, u,
     return nothing
 end
 
-function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing,
+function calc_boundary_flux!(cache, t, boundary_condition::BC, boundary_indexing,
                              mesh::Union{P4estMesh{2}, T8codeMesh{2}},
-                             equations, surface_integral, dg::DG)
+                             equations, surface_integral, dg::DG) where {BC}
     @unpack boundaries = cache
     @unpack surface_flux_values = cache.elements
     index_range = eachnode(dg)
diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl
index c4ce2619e15..31dfe1d35a5 100644
--- a/test/test_p4est_2d.jl
+++ b/test/test_p4est_2d.jl
@@ -24,7 +24,7 @@ isdir(outdir) && rm(outdir, recursive=true)
       l2   = [3.198940059144588e-5],
       linf = [0.00030636069494005547])
 
-    # Ensure that we do not have excessive memory allocations 
+    # Ensure that we do not have excessive memory allocations
     # (e.g., from type instabilities)
     let
       t = sol.t[end]
@@ -102,6 +102,15 @@ isdir(outdir) && rm(outdir, recursive=true)
       l2   = [0.020291447969983396, 0.017479614254319948, 0.011387644425613437, 0.0514420126021293],
       linf = [0.3582779022370579, 0.32073537890751663, 0.221818049107692, 0.9209559420400415],
       tspan = (0.0, 0.15))
+
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+      t = sol.t[end]
+      u_ode = sol.u[end]
+      du_ode = similar(u_ode)
+      @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
   end
 
   @trixi_testset "elixir_euler_forward_step_amr.jl" begin

From 547556dafd3c84ae8ed50fc1911e924cb4237468 Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Wed, 13 Sep 2023 10:58:33 +0200
Subject: [PATCH 145/163] Avoid slicing (#1637)

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 examples/p4est_2d_dgsem/elixir_navierstokes_convergence.jl | 5 ++++-
 examples/p4est_3d_dgsem/elixir_navierstokes_convergence.jl | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/examples/p4est_2d_dgsem/elixir_navierstokes_convergence.jl b/examples/p4est_2d_dgsem/elixir_navierstokes_convergence.jl
index 8111df8251a..b0c6086ad63 100644
--- a/examples/p4est_2d_dgsem/elixir_navierstokes_convergence.jl
+++ b/examples/p4est_2d_dgsem/elixir_navierstokes_convergence.jl
@@ -170,7 +170,10 @@ end
 initial_condition = initial_condition_navier_stokes_convergence_test
 
 # BC types
-velocity_bc_top_bottom = NoSlip((x, t, equations) -> initial_condition_navier_stokes_convergence_test(x, t, equations)[2:3])
+velocity_bc_top_bottom = NoSlip() do x, t, equations
+  u = initial_condition_navier_stokes_convergence_test(x, t, equations)
+  return SVector(u[2], u[3])
+end
 heat_bc_top_bottom = Adiabatic((x, t, equations) -> 0.0)
 boundary_condition_top_bottom = BoundaryConditionNavierStokesWall(velocity_bc_top_bottom, heat_bc_top_bottom)
 
diff --git a/examples/p4est_3d_dgsem/elixir_navierstokes_convergence.jl b/examples/p4est_3d_dgsem/elixir_navierstokes_convergence.jl
index c426fe95f5b..0109e58dfb3 100644
--- a/examples/p4est_3d_dgsem/elixir_navierstokes_convergence.jl
+++ b/examples/p4est_3d_dgsem/elixir_navierstokes_convergence.jl
@@ -220,7 +220,10 @@ end
 initial_condition = initial_condition_navier_stokes_convergence_test
 
 # BC types
-velocity_bc_top_bottom = NoSlip((x, t, equations) -> initial_condition_navier_stokes_convergence_test(x, t, equations)[2:4])
+velocity_bc_top_bottom = NoSlip() do x, t, equations
+  u = initial_condition_navier_stokes_convergence_test(x, t, equations)
+  return  SVector(u[2], u[3], u[4])
+end
 heat_bc_top_bottom = Adiabatic((x, t, equations) -> 0.0)
 boundary_condition_top_bottom = BoundaryConditionNavierStokesWall(velocity_bc_top_bottom, heat_bc_top_bottom)
 

From 32d837b0920c3cd9218f448080495c4a29d53566 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Wed, 13 Sep 2023 11:46:52 +0200
Subject: [PATCH 146/163] new tutorial on custom RHS functions and
 semidiscretizations (#1633)

* fix list of tutorials

* WIP: tutorial on semidiscretizations

* WIP: tutorial on semidiscretizations

* custom RHS

* custom semidiscretization

* update make.jl script

* WIP: trying to make Literate.jl testsets safeer

* fix

* fix reference

* comment on safe testsets

* some minor fixes

* the SciML ecosystem

* mention package versions
---
 docs/literate/make.jl                         |  17 +-
 .../src/files/custom_semidiscretization.jl    | 324 ++++++++++++++++++
 docs/literate/src/files/index.jl              |  24 +-
 docs/make.jl                                  |   1 +
 docs/src/callbacks.md                         |   4 +-
 docs/src/overview.md                          |   2 +-
 docs/src/parallelization.md                   |   6 +-
 docs/src/performance.md                       |   2 +-
 8 files changed, 366 insertions(+), 14 deletions(-)
 create mode 100644 docs/literate/src/files/custom_semidiscretization.jl

diff --git a/docs/literate/make.jl b/docs/literate/make.jl
index b620f85c975..a04d8a0b333 100644
--- a/docs/literate/make.jl
+++ b/docs/literate/make.jl
@@ -51,12 +51,25 @@ function create_tutorials(files)
     # Run tests on all tutorial files
     @testset "TrixiTutorials" begin
         for (i, (title, filename)) in enumerate(files)
+            # Evaluate each tutorial in its own module to avoid leaking of
+            # function/variable names, polluting the namespace of later tutorials
+            # by stuff defined in earlier tutorials.
             if filename isa Vector # Several files of one topic
                 for j in eachindex(filename)
-                    @testset "$(filename[j][2][2])" begin include(joinpath(repo_src, filename[j][2][1], filename[j][2][2])) end
+                    mod = gensym(filename[j][2][2])
+                    @testset "$(filename[j][2][2])" begin
+                        @eval module $mod
+                            include(joinpath($repo_src, $(filename[j][2][1]), $(filename[j][2][2])))
+                        end
+                    end
                 end
             else # Single files
-                @testset "$title" begin include(joinpath(repo_src, filename)) end
+                mod = gensym(title)
+                @testset "$title" begin
+                    @eval module $mod
+                        include(joinpath($repo_src, $filename))
+                    end
+                end
             end
         end
     end
diff --git a/docs/literate/src/files/custom_semidiscretization.jl b/docs/literate/src/files/custom_semidiscretization.jl
new file mode 100644
index 00000000000..fd432fb0826
--- /dev/null
+++ b/docs/literate/src/files/custom_semidiscretization.jl
@@ -0,0 +1,324 @@
+#src # Custom semidiscretizations
+
+# As described in the [overview section](@ref overview-semidiscretizations),
+# semidiscretizations are high-level descriptions of spatial discretizations
+# in Trixi.jl. Trixi.jl's main focus is on hyperbolic conservation
+# laws represented in a [`SemidiscretizationHyperbolic`](@ref).
+# Hyperbolic-parabolic problems based on the advection-diffusion equation or
+# the compressible Navier-Stokes equations can be represented in a
+# [`SemidiscretizationHyperbolicParabolic`](@ref). This is described in the
+# [basic tutorial on parabolic terms](@ref parabolic_terms) and its extension to
+# [custom parabolic terms](@ref adding_new_parabolic_terms).
+# In this tutorial, we will describe how these semidiscretizations work and how
+# they can be used to create custom semidiscretizations involving also other tasks.
+
+
+# ## Overview of the right-hand side evaluation
+
+# The semidiscretizations provided by Trixi.jl are set up to create `ODEProblem`s from the
+# [SciML ecosystem for ordinary differential equations](https://diffeq.sciml.ai/latest/).
+# In particular, a spatial semidiscretization can be wrapped in an ODE problem
+# using [`semidiscretize`](@ref), which returns an `ODEProblem`. This `ODEProblem`
+# bundles an initial condition, a right-hand side (RHS) function, the time span,
+# and possible parameters. The `ODEProblem`s created by Trixi.jl use the semidiscretization
+# passed to [`semidiscretize`](@ref) as a parameter.
+# For a [`SemidiscretizationHyperbolic`](@ref), the `ODEProblem` wraps
+# `Trixi.rhs!` as ODE RHS.
+# For a [`SemidiscretizationHyperbolicParabolic`](@ref),  Trixi.jl
+# uses a `SplitODEProblem` combining `Trixi.rhs_parabolic!` for the
+# (potentially) stiff part and `Trixi.rhs!` for the other part.
+
+
+# ## Standard Trixi.jl setup
+
+# In this tutorial, we will consider the linear advection equation
+# with source term
+# ```math
+# \partial_t u(t,x) + \partial_x u(t,x) = -\exp(-t) \sin\bigl(\pi (x - t) \bigr)
+# ```
+# with periodic boundary conditions in the domain `[-1, 1]` as a
+# model problem.
+# The initial condition is
+# ```math
+# u(0,x) = \sin(\pi x).
+# ```
+# The source term results in some damping and the analytical solution
+# ```math
+# u(t,x) = \exp(-t) \sin\bigl(\pi (x - t) \bigr).
+# ```
+# First, we discretize this equation using the standard functionality
+# of Trixi.jl.
+
+using Trixi, OrdinaryDiffEq, Plots
+
+# The linear scalar advection equation is already implemented in
+# Trixi.jl as [`LinearScalarAdvectionEquation1D`](@ref). We construct
+# it with an advection velocity `1.0`.
+
+equations = LinearScalarAdvectionEquation1D(1.0)
+
+# Next, we use a standard [`DGSEM`](@ref) solver.
+
+solver = DGSEM(polydeg = 3)
+
+# We create a simple [`TreeMesh`](@ref) in 1D.
+
+coordinates_min = (-1.0,)
+coordinates_max = (+1.0,)
+mesh = TreeMesh(coordinates_min, coordinates_max;
+                initial_refinement_level = 4,
+                n_cells_max = 10^4,
+                periodicity = true)
+
+# We wrap everything in in a semidiscretization and pass the source
+# terms as a standard Julia function. Please note that Trixi.jl uses
+# `SVector`s from
+# [StaticArrays.jl](https://github.com/JuliaArrays/StaticArrays.jl)
+# to store the conserved variables `u`. Thus, the return value of the
+# source terms must be wrapped in an `SVector` - even if we consider
+# just a scalar problem.
+
+function initial_condition(x, t, equations)
+    return SVector(exp(-t) * sinpi(x[1] - t))
+end
+
+function source_terms_standard(u, x, t, equations)
+    return -initial_condition(x, t, equations)
+end
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition,
+                                    solver;
+                                    source_terms = source_terms_standard)
+
+# Now, we can create the `ODEProblem`, solve the resulting ODE
+# using a time integration method from
+# [OrdinaryDiffEq.jl](https://github.com/SciML/OrdinaryDiffEq.jl),
+# and visualize the numerical solution at the final time using
+# [Plots.jl](https://github.com/JuliaPlots/Plots.jl).
+
+tspan = (0.0, 3.0)
+ode = semidiscretize(semi, tspan)
+
+sol = solve(ode, RDPK3SpFSAL49(); ode_default_options()...)
+
+plot(sol; label = "numerical sol.", legend = :topright)
+
+# We can also plot the analytical solution for comparison.
+# Since Trixi.jl uses `SVector`s for the variables, we take their `first`
+# (and only) component to get the scalar value for manual plotting.
+
+let
+   x = range(-1.0, 1.0; length = 200)
+   plot!(x, first.(initial_condition.(x, sol.t[end], equations)),
+         label = "analytical sol.", linestyle = :dash, legend = :topright)
+end
+
+# We can also add the initial condition to the plot.
+
+plot!(sol.u[1], semi, label = "u0", linestyle = :dot, legend = :topleft)
+
+# You can of course also use some
+# [callbacks](https://trixi-framework.github.io/Trixi.jl/stable/callbacks/)
+# provided by Trixi.jl as usual.
+
+summary_callback = SummaryCallback()
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi; interval = analysis_interval)
+alive_callback = AliveCallback(; analysis_interval)
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback)
+
+sol = solve(ode, RDPK3SpFSAL49();
+            ode_default_options()..., callback = callbacks)
+summary_callback()
+
+
+# ## Using a custom ODE right-hand side function
+
+# Next, we will solve the same problem but use our own ODE RHS function.
+# To demonstrate this, we will artificially create a global variable
+# containing the current time of the simulation.
+
+const GLOBAL_TIME = Ref(0.0)
+
+function source_terms_custom(u, x, t, equations)
+    t = GLOBAL_TIME[]
+    return -initial_condition(x, t, equations)
+end
+
+# Next, we create our own RHS function to update the global time of
+# the simulation before calling the RHS function from Trixi.jl.
+
+function rhs_source_custom!(du_ode, u_ode, semi, t)
+    GLOBAL_TIME[] = t
+    Trixi.rhs!(du_ode, u_ode, semi, t)
+end
+
+# Next, we create an `ODEProblem` manually copying over the data from
+# the one we got from [`semidiscretize`](@ref) earlier.
+
+ode_source_custom = ODEProblem(rhs_source_custom!,
+                               ode.u0,
+                               ode.tspan,
+                               ode.p #= semi =#)
+sol_source_custom = solve(ode_source_custom, RDPK3SpFSAL49();
+                          ode_default_options()...)
+
+plot(sol_source_custom; label = "numerical sol.")
+let
+    x = range(-1.0, 1.0; length = 200)
+    plot!(x, first.(initial_condition.(x, sol_source_custom.t[end], equations)),
+          label = "analytical sol.", linestyle = :dash, legend = :topleft)
+end
+plot!(sol_source_custom.u[1], semi, label = "u0", linestyle = :dot, legend = :topleft)
+
+# This also works with callbacks as usual.
+
+summary_callback = SummaryCallback()
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi; interval = analysis_interval)
+alive_callback = AliveCallback(; analysis_interval)
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback)
+
+sol = solve(ode_source_custom, RDPK3SpFSAL49();
+            ode_default_options()..., callback = callbacks)
+summary_callback()
+
+
+# ## Setting up a custom semidiscretization
+
+# Using a global constant is of course not really nice from a software
+# engineering point of view. Thus, it can often be useful to collect
+# additional data in the parameters of the `ODEProblem`. Thus, it is
+# time to create our own semidiscretization. Here, we create a small
+# wrapper of a standard semidiscretization of Trixi.jl and the current
+# global time of the simulation.
+
+struct CustomSemidiscretization{Semi, T} <: Trixi.AbstractSemidiscretization
+    semi::Semi
+    t::T
+end
+
+semi_custom = CustomSemidiscretization(semi, Ref(0.0))
+
+# To get pretty printing in the REPL, you can consider specializing
+#
+# - `Base.show(io::IO, parameters::CustomSemidiscretization)`
+# - `Base.show(io::IO, ::MIME"text/plain", parameters::CustomSemidiscretization)`
+#
+# for your custom semidiscretiation.
+
+# Next, we create our own source terms that use the global time stored
+# in the custom semidiscretiation.
+
+source_terms_custom_semi = let semi_custom = semi_custom
+    function source_terms_custom_semi(u, x, t, equations)
+        t = semi_custom.t[]
+        return -initial_condition(x, t, equations)
+    end
+end
+
+# We also create a custom ODE RHS to update the current global time
+# stored in the custom semidiscretization. We unpack the standard
+# semidiscretization created by Trixi.jl and pass it to `Trixi.rhs!`.
+
+function rhs_semi_custom!(du_ode, u_ode, semi_custom, t)
+    semi_custom.t[] = t
+    Trixi.rhs!(du_ode, u_ode, semi_custom.semi, t)
+end
+
+# Finally, we set up an `ODEProblem` and solve it numerically.
+
+ode_semi_custom = ODEProblem(rhs_semi_custom!,
+                             ode.u0,
+                             ode.tspan,
+                             semi_custom)
+sol_semi_custom = solve(ode_semi_custom, RDPK3SpFSAL49();
+                        ode_default_options()...)
+
+# If we want to make use of additional functionality provided by
+# Trixi.jl, e.g., for plotting, we need to implement a few additional
+# specializations. In this case, we forward everything to the standard
+# semidiscretization provided by Trixi.jl wrapped in our custom
+# semidiscretization.
+
+Base.ndims(semi::CustomSemidiscretization) = ndims(semi.semi)
+function Trixi.mesh_equations_solver_cache(semi::CustomSemidiscretization)
+    Trixi.mesh_equations_solver_cache(semi.semi)
+end
+
+# Now, we can plot the numerical solution as usual.
+
+plot(sol_semi_custom; label = "numerical sol.")
+let
+    x = range(-1.0, 1.0; length = 200)
+    plot!(x, first.(initial_condition.(x, sol_semi_custom.t[end], equations)),
+          label = "analytical sol.", linestyle = :dash, legend = :topleft)
+end
+plot!(sol_semi_custom.u[1], semi, label = "u0", linestyle = :dot, legend = :topleft)
+
+# This also works with many callbacks as usual. However, the
+# [`AnalysisCallback`](@ref) requires some special handling since it
+# makes use of a performance counter contained in the standard
+# semidiscretizations of Trixi.jl to report some
+# [performance metrics](@ref performance-metrics).
+# Here, we forward all accesses to the performance counter to the
+# wrapped semidiscretization.
+
+function Base.getproperty(semi::CustomSemidiscretization, s::Symbol)
+    if s === :performance_counter
+        wrapped_semi = getfield(semi, :semi)
+        wrapped_semi.performance_counter
+    else
+        getfield(semi, s)
+    end
+end
+
+# Moreover, the [`AnalysisCallback`](@ref) also performs some error
+# calculations. We also need to forward them to the wrapped
+# semidiscretization.
+
+function Trixi.calc_error_norms(func, u, t, analyzer,
+                                semi::CustomSemidiscretization,
+                                cache_analysis)
+    Trixi.calc_error_norms(func, u, t, analyzer,
+                           semi.semi,
+                           cache_analysis)
+end
+
+# Now, we can work with the callbacks used before as usual.
+
+summary_callback = SummaryCallback()
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi_custom;
+                                     interval = analysis_interval)
+alive_callback = AliveCallback(; analysis_interval)
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback)
+
+sol = solve(ode_semi_custom, RDPK3SpFSAL49();
+            ode_default_options()..., callback = callbacks)
+summary_callback()
+
+# For even more advanced usage of custom semidiscretizations, you
+# may look at the source code of the ones contained in Trixi.jl, e.g.,
+# - [`SemidiscretizationHyperbolicParabolic`](@ref)
+# - [`SemidiscretizationEulerGravity`](@ref)
+# - [`SemidiscretizationEulerAcoustics`](@ref)
+# - [`SemidiscretizationCoupled`](@ref)
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/index.jl b/docs/literate/src/files/index.jl
index 0c8de66bf42..d42695611f6 100644
--- a/docs/literate/src/files/index.jl
+++ b/docs/literate/src/files/index.jl
@@ -76,21 +76,30 @@
 # In this part, another physics model is implemented, the nonconservative linear advection equation.
 # We run two different simulations with different levels of refinement and compare the resulting errors.
 
-# ### [10 Adaptive mesh refinement](@ref adaptive_mesh_refinement)
+# ### [10 Parabolic terms](@ref parabolic_terms)
+#-
+# This tutorial describes how parabolic terms are implemented in Trixi.jl, e.g.,
+# to solve the advection-diffusion equation.
+
+# ### [11 Adding new parabolic terms](@ref adding_new_parabolic_terms)
+#-
+# This tutorial describes how new parabolic terms can be implemented using Trixi.jl.
+
+# ### [12 Adaptive mesh refinement](@ref adaptive_mesh_refinement)
 #-
 # Adaptive mesh refinement (AMR) helps to increase the accuracy in sensitive or turbolent regions while
 # not wasting resources for less interesting parts of the domain. This leads to much more efficient
 # simulations. This tutorial presents the implementation strategy of AMR in Trixi.jl, including the use of
 # different indicators and controllers.
 
-# ### [11 Structured mesh with curvilinear mapping](@ref structured_mesh_mapping)
+# ### [13 Structured mesh with curvilinear mapping](@ref structured_mesh_mapping)
 #-
 # In this tutorial, the use of Trixi.jl's structured curved mesh type [`StructuredMesh`](@ref) is explained.
 # We present the two basic option to initialize such a mesh. First, the curved domain boundaries
 # of a circular cylinder are set by explicit boundary functions. Then, a fully curved mesh is
 # defined by passing the transformation mapping.
 
-# ### [12 Unstructured meshes with HOHQMesh.jl](@ref hohqmesh_tutorial)
+# ### [14 Unstructured meshes with HOHQMesh.jl](@ref hohqmesh_tutorial)
 #-
 # The purpose of this tutorial is to demonstrate how to use the [`UnstructuredMesh2D`](@ref)
 # functionality of Trixi.jl. This begins by running and visualizing an available unstructured
@@ -99,19 +108,24 @@
 # software in the Trixi.jl ecosystem, and then run a simulation using Trixi.jl on said mesh.
 # In the end, the tutorial briefly explains how to simulate an example using AMR via `P4estMesh`.
 
-# ### [13 Explicit time stepping](@ref time_stepping)
+# ### [15 Explicit time stepping](@ref time_stepping)
 #-
 # This tutorial is about time integration using [OrdinaryDiffEq.jl](https://github.com/SciML/OrdinaryDiffEq.jl).
 # It explains how to use their algorithms and presents two types of time step choices - with error-based
 # and CFL-based adaptive step size control.
 
-# ### [14 Differentiable programming](@ref differentiable_programming)
+# ### [16 Differentiable programming](@ref differentiable_programming)
 #-
 # This part deals with some basic differentiable programming topics. For example, a Jacobian, its
 # eigenvalues and a curve of total energy (through the simulation) are calculated and plotted for
 # a few semidiscretizations. Moreover, we calculate an example for propagating errors with Measurement.jl
 # at the end.
 
+# ### [17 Custom semidiscretization](@ref custom_semidiscretization)
+#-
+# This tutorial describes the [semidiscretiations](@ref overview-semidiscretizations) of Trixi.jl
+# and explains how to extend them for custom tasks.
+
 
 
 # ## Examples in Trixi.jl
diff --git a/docs/make.jl b/docs/make.jl
index 57629577ddb..f882fcf1219 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -68,6 +68,7 @@ files = [
     # Topic: other stuff
     "Explicit time stepping" => "time_stepping.jl",
     "Differentiable programming" => "differentiable_programming.jl",
+    "Custom semidiscretizations" => "custom_semidiscretization.jl"
     ]
 tutorials = create_tutorials(files)
 
diff --git a/docs/src/callbacks.md b/docs/src/callbacks.md
index 1d3e5e34b51..7f44dfd5925 100644
--- a/docs/src/callbacks.md
+++ b/docs/src/callbacks.md
@@ -30,7 +30,7 @@ An example elixir using AMR can be found at [`examples/tree_2d_dgsem/elixir_adve
 The [`AnalysisCallback`](@ref) can be used to analyze the numerical solution, e.g. calculate
 errors or user-specified integrals, and print the results to the screen. The results can also be
 saved in a file. An example can be found at [`examples/tree_2d_dgsem/elixir_euler_vortex.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_euler_vortex.jl).
-In [Performance metrics of the `AnalysisCallback`](@ref) you can find a detailed
+In [Performance metrics of the `AnalysisCallback`](@ref performance-metrics) you can find a detailed
 description of the different performance metrics the `AnalysisCallback` computes.
 
 ### I/O
@@ -106,7 +106,7 @@ will yield the following plot:
   the automated performance measurements, including an output of the recorded timers after a simulation.
 * The [`VisualizationCallback`](@ref) can be used for in-situ visualization. See
   [Visualizing results during a simulation](@ref).
-* The [`TrivialCallback`](@ref) does nothing and can be used to to easily disable some callbacks
+* The [`TrivialCallback`](@ref) does nothing and can be used to easily disable some callbacks
   via [`trixi_include`](@ref).
 
 ### Equation-specific callbacks
diff --git a/docs/src/overview.md b/docs/src/overview.md
index 46bc28b6025..51a6272ae8e 100644
--- a/docs/src/overview.md
+++ b/docs/src/overview.md
@@ -16,7 +16,7 @@ to solve a PDE numerically are the spatial semidiscretization and the time
 integration scheme.
 
 
-## Semidiscretizations
+## [Semidiscretizations](@id overview-semidiscretizations)
 
 Semidiscretizations are high-level descriptions of spatial discretizations
 specialized for certain PDEs. Trixi.jl's main focus is on hyperbolic conservation
diff --git a/docs/src/parallelization.md b/docs/src/parallelization.md
index d56777c9af4..e55471bb256 100644
--- a/docs/src/parallelization.md
+++ b/docs/src/parallelization.md
@@ -22,7 +22,7 @@ julia --threads=4
 If both the environment variable and the command line argument are specified at
 the same time, the latter takes precedence.
 
-If you use time integration methods from 
+If you use time integration methods from
 [OrdinaryDiffEq.jl](https://github.com/SciML/OrdinaryDiffEq.jl)
 and want to use multiple threads therein, you need to set the keyword argument
 `thread=OrdinaryDiffEq.True()` of the algorithms, as described in the
@@ -143,7 +143,7 @@ To start Trixi.jl in parallel with MPI, there are three options:
    Switching between panes can be done by `Ctrl+b` followed by `o`.
    As of March 2022, newer versions of tmpi also support mpich, which is the default
    backend of MPI.jl (via MPICH_Jll.jl). To use this setup, you need to install
-   `mpiexecjl` as described in the 
+   `mpiexecjl` as described in the
    [documentation of MPI.jl](https://juliaparallel.org/MPI.jl/v0.20/usage/#Julia-wrapper-for-mpiexec)
    and make it available as `mpirun`, e.g., via a symlink of the form
    ```bash
@@ -161,7 +161,7 @@ To start Trixi.jl in parallel with MPI, there are three options:
 
 ### [Performance](@id parallel_performance)
 For information on how to evaluate the parallel performance of Trixi.jl, please
-have a look at the [Performance metrics of the `AnalysisCallback`](@ref)
+have a look at the [Performance metrics of the `AnalysisCallback`](@ref performance-metrics)
 section, specifically at the descriptions of the performance index (PID).
 
 
diff --git a/docs/src/performance.md b/docs/src/performance.md
index 428672ec75f..bbe3a3390b7 100644
--- a/docs/src/performance.md
+++ b/docs/src/performance.md
@@ -170,7 +170,7 @@ As a rule of thumb:
 - Consider using `@nospecialize` for methods like custom implementations of `Base.show`.
 
 
-## Performance metrics of the `AnalysisCallback`
+## [Performance metrics of the `AnalysisCallback`](@id performance-metrics)
 The [`AnalysisCallback`](@ref) computes two performance indicators that you can use to
 evaluate the serial and parallel performance of Trixi.jl. They represent
 measured run times that are normalized by the number of `rhs!` evaluations and

From b942775af0677ac83d77934c2326ab2b5db5ba77 Mon Sep 17 00:00:00 2001
From: Krissh Chawla <127906314+KrisshChawla@users.noreply.github.com>
Date: Wed, 13 Sep 2023 20:08:08 -0500
Subject: [PATCH 147/163] Adding quasi 1d shallow water equations (#1619)

* implementation of quasi shallow water equations 1d.

* added example elixer for shallow_water_quasi_1d

* changed the names of Quasi1d equations

* including and exported ShallowWaterEquationsQuasi1D

* exporting flux_chan_etal and flux_chan_nonconservative_etal

* minor comment fix

* adding tests

* Apply suggestions from code review

* Apply suggestions from code review

* Update src/equations/shallow_water_quasi_1d.jl

* formatting

* formatting

* forgot comma

* Apply suggestions from code review

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* renamed example elixir to elixir_shallow_water_quasi_1d_source_terms.jl

* Apply suggestions from code review

Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>

* Update test_tree_1d_shallowwater.jl with renamed example elixir

* comment fix

* comment fix for elixir_shallow_water_quasi_1d_source_terms.jl

* Added well-balancedness test for shallow_water_quasi_1d

The initial condition in the elixir is intended to test a discontinuous channel width 'a(x)' and bottom topography 'b(x)' on a periodic mesh.

* Added 'max_abs_speeds' function and 'lake_at_rest_error'

* Updated test_tree_1d_shallowwater with quasi well-balancedness test

* File name fix in test_tree_1d_shallowwater

* Update examples/tree_1d_dgsem/elixir_shallowwater_quasi1d_well_balanced.jl

Co-authored-by: Jesse Chan <1156048+jlchan@users.noreply.github.com>

* Renamed to "elixir_shallowwater_quasi_1d_well_balanced.jl"

---------

Co-authored-by: Jesse Chan <jesse.lee.chan@gmail.com>
Co-authored-by: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>
---
 ...xir_shallow_water_quasi_1d_source_terms.jl |  60 ++++
 ...xir_shallowwater_quasi_1d_well_balanced.jl |  84 +++++
 src/Trixi.jl                                  |   2 +
 src/equations/equations.jl                    |   1 +
 src/equations/shallow_water_quasi_1d.jl       | 323 ++++++++++++++++++
 test/test_tree_1d_shallowwater.jl             |  14 +
 6 files changed, 484 insertions(+)
 create mode 100644 examples/tree_1d_dgsem/elixir_shallow_water_quasi_1d_source_terms.jl
 create mode 100644 examples/tree_1d_dgsem/elixir_shallowwater_quasi_1d_well_balanced.jl
 create mode 100644 src/equations/shallow_water_quasi_1d.jl

diff --git a/examples/tree_1d_dgsem/elixir_shallow_water_quasi_1d_source_terms.jl b/examples/tree_1d_dgsem/elixir_shallow_water_quasi_1d_source_terms.jl
new file mode 100644
index 00000000000..72747c669e2
--- /dev/null
+++ b/examples/tree_1d_dgsem/elixir_shallow_water_quasi_1d_source_terms.jl
@@ -0,0 +1,60 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# Semidiscretization of the quasi 1d shallow water equations
+# See Chan et al.  https://doi.org/10.48550/arXiv.2307.12089 for details
+
+equations = ShallowWaterEquationsQuasi1D(gravity_constant = 9.81)
+
+initial_condition = initial_condition_convergence_test
+
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_chan_etal, flux_nonconservative_chan_etal)
+surface_flux = (FluxPlusDissipation(flux_chan_etal, DissipationLocalLaxFriedrichs()),
+                flux_nonconservative_chan_etal)
+solver = DGSEM(polydeg = 3, surface_flux = surface_flux,
+               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+
+###############################################################################
+# Get the TreeMesh and setup a periodic mesh
+
+coordinates_min = 0.0
+coordinates_max = sqrt(2.0)
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level = 3,
+                n_cells_max = 10_000,
+                periodicity = true)
+
+# create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver,
+                                    source_terms = source_terms_convergence_test)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0, 1.0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 500
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+save_solution = SaveSolutionCallback(interval = 200,
+                                     save_initial_solution = true,
+                                     save_final_solution = true)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution)
+
+###############################################################################
+# run the simulation
+
+# use a Runge-Kutta method with automatic (error based) time step size control
+sol = solve(ode, RDPK3SpFSAL49(); abstol = 1.0e-8, reltol = 1.0e-8,
+            ode_default_options()..., callback = callbacks);
+summary_callback() # print the timer summary
diff --git a/examples/tree_1d_dgsem/elixir_shallowwater_quasi_1d_well_balanced.jl b/examples/tree_1d_dgsem/elixir_shallowwater_quasi_1d_well_balanced.jl
new file mode 100644
index 00000000000..d9f1a52b500
--- /dev/null
+++ b/examples/tree_1d_dgsem/elixir_shallowwater_quasi_1d_well_balanced.jl
@@ -0,0 +1,84 @@
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the shallow water equations with a discontinuous
+# bottom topography function and channel width function
+
+equations = ShallowWaterEquationsQuasi1D(gravity_constant = 9.81, H0 = 2.0)
+
+# Setup a truly discontinuous bottom topography function and channel width for
+# this academic testcase of well-balancedness. The errors from the analysis
+# callback are not important but the error for this lake-at-rest test case
+# `∑|H0-(h+b)|` should be around machine roundoff.
+# Works as intended for TreeMesh1D with `initial_refinement_level=3`. If the mesh
+# refinement level is changed the initial condition below may need changed as well to
+# ensure that the discontinuities lie on an element interface.
+function initial_condition_discontinuous_well_balancedness(x, t,
+                                                           equations::ShallowWaterEquationsQuasi1D)
+    H = equations.H0
+    v = 0.0
+
+    # for a periodic domain, this choice of `b` and `a` mimic 
+    # discontinuity across the periodic boundary.
+    b = 0.5 * (x[1] + 1)
+    a = 2 + x[1]
+
+    return prim2cons(SVector(H, v, b, a), equations)
+end
+
+initial_condition = initial_condition_discontinuous_well_balancedness
+
+###############################################################################
+# Get the DG approximation space
+
+volume_flux = (flux_chan_etal, flux_nonconservative_chan_etal)
+surface_flux = volume_flux
+solver = DGSEM(polydeg = 4, surface_flux = surface_flux,
+               volume_integral = VolumeIntegralFluxDifferencing(volume_flux))
+
+###############################################################################
+# Get the TreeMesh and setup a periodic mesh
+
+coordinates_min = -1.0
+coordinates_max = 1.0
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level = 3,
+                n_cells_max = 10_000)
+
+# Create the semi discretization object
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solver
+
+tspan = (0.0, 100.0)
+ode = semidiscretize(semi, tspan)
+
+###############################################################################
+# Callbacks
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 1000
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval,
+                                     extra_analysis_integrals = (lake_at_rest_error,))
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+save_solution = SaveSolutionCallback(interval = 1000,
+                                     save_initial_solution = true,
+                                     save_final_solution = true)
+
+stepsize_callback = StepsizeCallback(cfl = 3.0)
+
+callbacks = CallbackSet(summary_callback, analysis_callback, alive_callback, save_solution,
+                        stepsize_callback)
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+summary_callback() # print the timer summary
diff --git a/src/Trixi.jl b/src/Trixi.jl
index be43c45b93d..c883c3bf19f 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -149,6 +149,7 @@ export AcousticPerturbationEquations2D,
        LatticeBoltzmannEquations2D, LatticeBoltzmannEquations3D,
        ShallowWaterEquations1D, ShallowWaterEquations2D,
        ShallowWaterTwoLayerEquations1D, ShallowWaterTwoLayerEquations2D,
+       ShallowWaterEquationsQuasi1D,
        LinearizedEulerEquations2D
 
 export LaplaceDiffusion1D, LaplaceDiffusion2D,
@@ -164,6 +165,7 @@ export flux, flux_central, flux_lax_friedrichs, flux_hll, flux_hllc, flux_hlle,
        flux_kennedy_gruber, flux_shima_etal, flux_ec,
        flux_fjordholm_etal, flux_nonconservative_fjordholm_etal, flux_es_fjordholm_etal,
        flux_wintermeyer_etal, flux_nonconservative_wintermeyer_etal,
+       flux_chan_etal, flux_nonconservative_chan_etal,
        hydrostatic_reconstruction_audusse_etal, flux_nonconservative_audusse_etal,
 # TODO: TrixiShallowWater: move anything with "chen_noelle" to new file
        hydrostatic_reconstruction_chen_noelle, flux_nonconservative_chen_noelle,
diff --git a/src/equations/equations.jl b/src/equations/equations.jl
index 570a25cece9..9bae563d85f 100644
--- a/src/equations/equations.jl
+++ b/src/equations/equations.jl
@@ -356,6 +356,7 @@ include("shallow_water_1d.jl")
 include("shallow_water_2d.jl")
 include("shallow_water_two_layer_1d.jl")
 include("shallow_water_two_layer_2d.jl")
+include("shallow_water_quasi_1d.jl")
 
 # CompressibleEulerEquations
 abstract type AbstractCompressibleEulerEquations{NDIMS, NVARS} <:
diff --git a/src/equations/shallow_water_quasi_1d.jl b/src/equations/shallow_water_quasi_1d.jl
new file mode 100644
index 00000000000..217a764e173
--- /dev/null
+++ b/src/equations/shallow_water_quasi_1d.jl
@@ -0,0 +1,323 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
+@doc raw"""
+    ShallowWaterEquationsQuasi1D(; gravity, H0 = 0, threshold_limiter = nothing threshold_wet = nothing)
+
+The quasi-1D shallow water equations (SWE). The equations are given by
+```math
+\begin{aligned}
+  \frac{\partial}{\partial t}(a h) + \frac{\partial}{\partial x}(a h v) &= 0 \\
+    \frac{\partial}{\partial t}(a h v) + \frac{\partial}{\partial x}(a h v^2)
+    + g a h \frac{\partial}{\partial x}(h + b) &= 0
+\end{aligned}
+```
+The unknown quantities of the Quasi-1D SWE are the water height ``h`` and the scaled velocity ``v``.
+The gravitational constant is denoted by `g`, the (possibly) variable bottom topography function ``b(x)``, and (possibly) variable channel width ``a(x)``. The water height ``h`` is measured from the bottom topography ``b``, therefore one also defines the total water height as ``H = h + b``.
+
+The additional quantity ``H_0`` is also available to store a reference value for the total water height that
+is useful to set initial conditions or test the "lake-at-rest" well-balancedness.
+
+Also, there are two thresholds which prevent numerical problems as well as instabilities. Both of them do not
+have to be passed, as default values are defined within the struct. The first one, `threshold_limiter`, is
+used in [`PositivityPreservingLimiterShallowWater`](@ref) on the water height, as a (small) shift on the initial
+condition and cutoff before the next time step. The second one, `threshold_wet`, is applied on the water height to
+define when the flow is "wet" before calculating the numerical flux.
+
+The bottom topography function ``b(x)`` and channel width ``a(x)`` are set inside the initial condition routine
+for a particular problem setup. To test the conservative form of the SWE one can set the bottom topography
+variable `b` to zero and ``a`` to one. 
+
+In addition to the unknowns, Trixi.jl currently stores the bottom topography and channel width values at the approximation points 
+despite being fixed in time. This is done for convenience of computing the bottom topography gradients
+on the fly during the approximation as well as computing auxiliary quantities like the total water height ``H``
+or the entropy variables.
+This affects the implementation and use of these equations in various ways:
+* The flux values corresponding to the bottom topography and channel width must be zero.
+* The bottom topography and channel width values must be included when defining initial conditions, boundary conditions or
+  source terms.
+* [`AnalysisCallback`](@ref) analyzes this variable.
+* Trixi.jl's visualization tools will visualize the bottom topography and channel width by default.
+"""
+struct ShallowWaterEquationsQuasi1D{RealT <: Real} <:
+       AbstractShallowWaterEquations{1, 4}
+    gravity::RealT # gravitational constant
+    H0::RealT      # constant "lake-at-rest" total water height
+    # `threshold_limiter` used in `PositivityPreservingLimiterShallowWater` on water height,
+    # as a (small) shift on the initial condition and cutoff before the next time step.
+    # Default is 500*eps() which in double precision is ≈1e-13.
+    threshold_limiter::RealT
+    # `threshold_wet` applied on water height to define when the flow is "wet"
+    # before calculating the numerical flux.
+    # Default is 5*eps() which in double precision is ≈1e-15.
+    threshold_wet::RealT
+end
+
+# Allow for flexibility to set the gravitational constant within an elixir depending on the
+# application where `gravity_constant=1.0` or `gravity_constant=9.81` are common values.
+# The reference total water height H0 defaults to 0.0 but is used for the "lake-at-rest"
+# well-balancedness test cases.
+# Strict default values for thresholds that performed well in many numerical experiments
+function ShallowWaterEquationsQuasi1D(; gravity_constant, H0 = zero(gravity_constant),
+                                      threshold_limiter = nothing,
+                                      threshold_wet = nothing)
+    T = promote_type(typeof(gravity_constant), typeof(H0))
+    if threshold_limiter === nothing
+        threshold_limiter = 500 * eps(T)
+    end
+    if threshold_wet === nothing
+        threshold_wet = 5 * eps(T)
+    end
+    ShallowWaterEquationsQuasi1D(gravity_constant, H0, threshold_limiter, threshold_wet)
+end
+
+have_nonconservative_terms(::ShallowWaterEquationsQuasi1D) = True()
+function varnames(::typeof(cons2cons), ::ShallowWaterEquationsQuasi1D)
+    ("a_h", "a_h_v", "b", "a")
+end
+# Note, we use the total water height, H = h + b, as the first primitive variable for easier
+# visualization and setting initial conditions
+varnames(::typeof(cons2prim), ::ShallowWaterEquationsQuasi1D) = ("H", "v", "b", "a")
+
+# Set initial conditions at physical location `x` for time `t`
+"""
+    initial_condition_convergence_test(x, t, equations::ShallowWaterEquationsQuasi1D)
+
+A smooth initial condition used for convergence tests in combination with
+[`source_terms_convergence_test`](@ref)
+(and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
+"""
+function initial_condition_convergence_test(x, t,
+                                            equations::ShallowWaterEquationsQuasi1D)
+    # generates a manufactured solution. 
+    # some constants are chosen such that the function is periodic on the domain [0,sqrt(2)]
+    Omega = sqrt(2) * pi
+    H = 2.0 + 0.5 * sin(Omega * x[1] - t)
+    v = 0.25
+    b = 0.2 - 0.05 * sin(Omega * x[1])
+    a = 1 + 0.1 * cos(Omega * x[1])
+    return prim2cons(SVector(H, v, b, a), equations)
+end
+
+"""
+    source_terms_convergence_test(u, x, t, equations::ShallowWaterEquationsQuasi1D)
+
+Source terms used for convergence tests in combination with
+[`initial_condition_convergence_test`](@ref)
+(and [`BoundaryConditionDirichlet(initial_condition_convergence_test)`](@ref) in non-periodic domains).
+
+This manufactured solution source term is specifically designed for the bottom topography function
+`b(x) = 0.2 - 0.05 * sin(sqrt(2) * pi *x[1])` and channel width 'a(x)= 1 + 0.1 * cos(sqrt(2) * pi * x[1])'
+as defined in [`initial_condition_convergence_test`](@ref).
+"""
+@inline function source_terms_convergence_test(u, x, t,
+                                               equations::ShallowWaterEquationsQuasi1D)
+    # Same settings as in `initial_condition_convergence_test`. Some derivative simplify because
+    # this manufactured solution velocity is taken to be constant
+    Omega = sqrt(2) * pi
+    H = 2.0 + 0.5 * sin(Omega * x[1] - t)
+    H_x = 0.5 * cos(Omega * x[1] - t) * Omega
+    H_t = -0.5 * cos(Omega * x[1] - t)
+
+    v = 0.25
+
+    b = 0.2 - 0.05 * sin(Omega * x[1])
+    b_x = -0.05 * cos(Omega * x[1]) * Omega
+
+    a = 1 + 0.1 * cos(Omega * x[1])
+    a_x = -0.1 * sin(Omega * x[1]) * Omega
+
+    du1 = a * H_t + v * (a_x * (H - b) + a * (H_x - b_x))
+    du2 = v * du1 + a * (equations.gravity * (H - b) * H_x)
+
+    return SVector(du1, du2, 0.0, 0.0)
+end
+
+# Calculate 1D flux for a single point
+# Note, the bottom topography and channel width have no flux
+@inline function flux(u, orientation::Integer, equations::ShallowWaterEquationsQuasi1D)
+    a_h, a_h_v, _, a = u
+    h = waterheight(u, equations)
+    v = velocity(u, equations)
+
+    p = 0.5 * a * equations.gravity * h^2
+
+    f1 = a_h_v
+    f2 = a_h_v * v + p
+
+    return SVector(f1, f2, zero(eltype(u)), zero(eltype(u)))
+end
+
+"""
+    flux_nonconservative_chan_etal(u_ll, u_rr, orientation::Integer,
+                                   equations::ShallowWaterEquationsQuasi1D)
+
+Non-symmetric two-point volume flux discretizing the nonconservative (source) term
+that contains the gradient of the bottom topography [`ShallowWaterEquationsQuasi1D`](@ref) 
+and the channel width.
+
+Further details are available in the paper:
+- Jesse Chan, Khemraj Shukla, Xinhui Wu, Ruofeng Liu, Prani Nalluri (2023)
+    High order entropy stable schemes for the quasi-one-dimensional
+    shallow water and compressible Euler equations
+    [DOI: 10.48550/arXiv.2307.12089](https://doi.org/10.48550/arXiv.2307.12089)
+"""
+@inline function flux_nonconservative_chan_etal(u_ll, u_rr, orientation::Integer,
+                                                equations::ShallowWaterEquationsQuasi1D)
+    a_h_ll, _, b_ll, a_ll = u_ll
+    a_h_rr, _, b_rr, a_rr = u_rr
+
+    h_ll = waterheight(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+
+    z = zero(eltype(u_ll))
+
+    return SVector(z, equations.gravity * a_ll * h_ll * (h_rr + b_rr), z, z)
+end
+
+"""
+    flux_chan_etal(u_ll, u_rr, orientation,
+                   equations::ShallowWaterEquationsQuasi1D)
+
+Total energy conservative (mathematical entropy for quasi 1D shallow water equations) split form.
+When the bottom topography is nonzero this scheme will be well-balanced when used as a `volume_flux`.
+The `surface_flux` should still use, e.g., [`FluxPlusDissipation(flux_chan_etal, DissipationLocalLaxFriedrichs())`](@ref).
+
+Further details are available in the paper:
+- Jesse Chan, Khemraj Shukla, Xinhui Wu, Ruofeng Liu, Prani Nalluri (2023) 
+  High order entropy stable schemes for the quasi-one-dimensional
+  shallow water and compressible Euler equations
+  [DOI: 10.48550/arXiv.2307.12089](https://doi.org/10.48550/arXiv.2307.12089)
+"""
+@inline function flux_chan_etal(u_ll, u_rr, orientation::Integer,
+                                equations::ShallowWaterEquationsQuasi1D)
+    a_h_ll, a_h_v_ll, _, _ = u_ll
+    a_h_rr, a_h_v_rr, _, _ = u_rr
+
+    v_ll = velocity(u_ll, equations)
+    v_rr = velocity(u_rr, equations)
+
+    f1 = 0.5 * (a_h_v_ll + a_h_v_rr)
+    f2 = f1 * 0.5 * (v_ll + v_rr)
+
+    return SVector(f1, f2, zero(eltype(u_ll)), zero(eltype(u_ll)))
+end
+
+# Calculate maximum wave speed for local Lax-Friedrichs-type dissipation as the
+# maximum velocity magnitude plus the maximum speed of sound
+@inline function max_abs_speed_naive(u_ll, u_rr, orientation::Integer,
+                                     equations::ShallowWaterEquationsQuasi1D)
+    # Get the velocity quantities
+    v_ll = velocity(u_ll, equations)
+    v_rr = velocity(u_rr, equations)
+
+    # Calculate the wave celerity on the left and right
+    h_ll = waterheight(u_ll, equations)
+    h_rr = waterheight(u_rr, equations)
+    c_ll = sqrt(equations.gravity * h_ll)
+    c_rr = sqrt(equations.gravity * h_rr)
+
+    return max(abs(v_ll), abs(v_rr)) + max(c_ll, c_rr)
+end
+
+# Specialized `DissipationLocalLaxFriedrichs` to avoid spurious dissipation in the bottom topography
+# and channel width
+@inline function (dissipation::DissipationLocalLaxFriedrichs)(u_ll, u_rr,
+                                                              orientation_or_normal_direction,
+                                                              equations::ShallowWaterEquationsQuasi1D)
+    λ = dissipation.max_abs_speed(u_ll, u_rr, orientation_or_normal_direction,
+                                  equations)
+    diss = -0.5 * λ * (u_rr - u_ll)
+    return SVector(diss[1], diss[2], zero(eltype(u_ll)), zero(eltype(u_ll)))
+end
+
+@inline function max_abs_speeds(u, equations::ShallowWaterEquationsQuasi1D)
+    h = waterheight(u, equations)
+    v = velocity(u, equations)
+
+    c = equations.gravity * sqrt(h)
+    return (abs(v) + c,)
+end
+
+# Helper function to extract the velocity vector from the conservative variables
+@inline function velocity(u, equations::ShallowWaterEquationsQuasi1D)
+    a_h, a_h_v, _, _ = u
+
+    v = a_h_v / a_h
+
+    return v
+end
+
+# Convert conservative variables to primitive
+@inline function cons2prim(u, equations::ShallowWaterEquationsQuasi1D)
+    a_h, _, b, a = u
+    h = a_h / a
+    H = h + b
+    v = velocity(u, equations)
+    return SVector(H, v, b, a)
+end
+
+# Convert conservative variables to entropy variables
+# Note, only the first two are the entropy variables, the third and fourth entries still
+# just carry the bottom topography and channel width values for convenience
+@inline function cons2entropy(u, equations::ShallowWaterEquationsQuasi1D)
+    a_h, a_h_v, b, a = u
+    h = waterheight(u, equations)
+    v = velocity(u, equations)
+    #entropy variables are the same as ones in standard shallow water equations
+    w1 = equations.gravity * (h + b) - 0.5 * v^2
+    w2 = v
+
+    return SVector(w1, w2, b, a)
+end
+
+# Convert primitive to conservative variables
+@inline function prim2cons(prim, equations::ShallowWaterEquationsQuasi1D)
+    H, v, b, a = prim
+
+    a_h = a * (H - b)
+    a_h_v = a_h * v
+    return SVector(a_h, a_h_v, b, a)
+end
+
+@inline function waterheight(u, equations::ShallowWaterEquationsQuasi1D)
+    return u[1] / u[4]
+end
+
+# Entropy function for the shallow water equations is the total energy
+@inline function entropy(cons, equations::ShallowWaterEquationsQuasi1D)
+    a = cons[4]
+    return a * energy_total(cons, equations)
+end
+
+# Calculate total energy for a conservative state `cons`
+@inline function energy_total(cons, equations::ShallowWaterEquationsQuasi1D)
+    a_h, a_h_v, b, a = cons
+    e = (a_h_v^2) / (2 * a * a_h) + 0.5 * equations.gravity * (a_h^2 / a) +
+        equations.gravity * a_h * b
+    return e
+end
+
+# Calculate the error for the "lake-at-rest" test case where H = h+b should
+# be a constant value over time. Note, assumes there is a single reference
+# water height `H0` with which to compare.
+#
+# TODO: TrixiShallowWater: where should `threshold_limiter` live? May need
+# to modify or have different versions of the `lake_at_rest_error` function
+@inline function lake_at_rest_error(u, equations::ShallowWaterEquationsQuasi1D)
+    _, _, b, _ = u
+    h = waterheight(u, equations)
+
+    # For well-balancedness testing with possible wet/dry regions the reference
+    # water height `H0` accounts for the possibility that the bottom topography
+    # can emerge out of the water as well as for the threshold offset to avoid
+    # division by a "hard" zero water heights as well.
+    H0_wet_dry = max(equations.H0, b + equations.threshold_limiter)
+
+    return abs(H0_wet_dry - (h + b))
+end
+end # @muladd
diff --git a/test/test_tree_1d_shallowwater.jl b/test/test_tree_1d_shallowwater.jl
index 1e5aeac1786..09fb2d9e432 100644
--- a/test/test_tree_1d_shallowwater.jl
+++ b/test/test_tree_1d_shallowwater.jl
@@ -112,6 +112,20 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_1d_dgsem")
       linf = [0.00041080213807871235, 0.00014823261488938177, 2.220446049250313e-16],
       tspan = (0.0, 0.05))
   end
+
+  @trixi_testset "elixir_shallow_water_quasi_1d_source_terms.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallow_water_quasi_1d_source_terms.jl"),
+      l2 = [6.37048760275098e-5, 0.0002745658116815704, 4.436491725647962e-6, 8.872983451152218e-6], 
+      linf = [0.00026747526881631956, 0.0012106730729152249, 9.098379777500165e-6, 1.8196759554278685e-5],
+      tspan = (0.0, 0.05))
+  end
+  
+  @trixi_testset "elixir_shallowwater_quasi_1d_well_balanced.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_shallowwater_quasi_1d_well_balanced.jl"),
+      l2 = [1.4250229186905198e-14, 2.495109919406496e-12, 7.408599286788738e-17, 2.7205812409138776e-16], 
+      linf = [5.284661597215745e-14, 2.74056233065078e-12, 2.220446049250313e-16, 8.881784197001252e-16],
+      tspan = (0.0, 100.0))
+  end
 end
 
 end # module

From f7b09734e1a86077a35c9cab0b9e97e68aaeb232 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Thu, 14 Sep 2023 06:41:14 +0200
Subject: [PATCH 148/163] add package versions to tutorials (#1638)

---
 docs/literate/src/files/DGMulti_1.jl               | 12 ++++++++++++
 docs/literate/src/files/DGMulti_2.jl               | 12 ++++++++++++
 docs/literate/src/files/DGSEM_FluxDiff.jl          | 12 ++++++++++++
 .../literate/src/files/adaptive_mesh_refinement.jl | 12 ++++++++++++
 .../src/files/adding_new_parabolic_terms.jl        | 12 ++++++++++++
 .../src/files/adding_new_scalar_equations.jl       | 12 ++++++++++++
 .../src/files/adding_nonconservative_equation.jl   | 12 ++++++++++++
 .../src/files/differentiable_programming.jl        | 12 ++++++++++++
 docs/literate/src/files/hohqmesh_tutorial.jl       | 12 ++++++++++++
 docs/literate/src/files/non_periodic_boundaries.jl | 12 ++++++++++++
 docs/literate/src/files/parabolic_terms.jl         | 11 +++++++++++
 .../src/files/scalar_linear_advection_1d.jl        | 12 ++++++++++++
 docs/literate/src/files/shock_capturing.jl         | 12 ++++++++++++
 docs/literate/src/files/structured_mesh_mapping.jl | 12 ++++++++++++
 docs/literate/src/files/time_stepping.jl           | 14 +++++++++++++-
 docs/literate/src/files/upwind_fdsbp.jl            | 12 ++++++++++++
 16 files changed, 192 insertions(+), 1 deletion(-)

diff --git a/docs/literate/src/files/DGMulti_1.jl b/docs/literate/src/files/DGMulti_1.jl
index 0d78e79907c..5ef577e8eeb 100644
--- a/docs/literate/src/files/DGMulti_1.jl
+++ b/docs/literate/src/files/DGMulti_1.jl
@@ -194,3 +194,15 @@ plot(pd["rho"])
 plot!(getmesh(pd))
 
 # For more information, please have a look in the [StartUpDG.jl documentation](https://jlchan.github.io/StartUpDG.jl/stable/).
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "StartUpDG", "OrdinaryDiffEq", "Plots"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/DGMulti_2.jl b/docs/literate/src/files/DGMulti_2.jl
index 92dce43cdab..06248562343 100644
--- a/docs/literate/src/files/DGMulti_2.jl
+++ b/docs/literate/src/files/DGMulti_2.jl
@@ -38,3 +38,15 @@ D = couple_continuously(legendre_derivative_operator(xmin=0.0, xmax=1.0, N=4),
 
 # For more information and other SBP operators, see the documentations of [StartUpDG.jl](https://jlchan.github.io/StartUpDG.jl/dev/)
 # and [SummationByPartsOperators.jl](https://ranocha.de/SummationByPartsOperators.jl/stable/).
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "StartUpDG", "SummationByPartsOperators"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/DGSEM_FluxDiff.jl b/docs/literate/src/files/DGSEM_FluxDiff.jl
index 5ec156ebbe3..a5769900269 100644
--- a/docs/literate/src/files/DGSEM_FluxDiff.jl
+++ b/docs/literate/src/files/DGSEM_FluxDiff.jl
@@ -236,3 +236,15 @@ plot(sol)
 # [`flux_chandrashekar`](@ref), [`flux_kennedy_gruber`](@ref).
 # As surface flux you can use all volume fluxes and additionally for instance [`flux_lax_friedrichs`](@ref),
 # [`flux_hll`](@ref), [`flux_hllc`](@ref).
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/adaptive_mesh_refinement.jl b/docs/literate/src/files/adaptive_mesh_refinement.jl
index d6150e887a8..46af8f79523 100644
--- a/docs/literate/src/files/adaptive_mesh_refinement.jl
+++ b/docs/literate/src/files/adaptive_mesh_refinement.jl
@@ -202,3 +202,15 @@ plot!(getmesh(pd))
 # Source: Trixi.jl's YouTube channel [`Trixi Framework`](https://www.youtube.com/channel/UCpd92vU2HjjTPup-AIN0pkg)
 
 # For more information, please have a look at the respective links.
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/adding_new_parabolic_terms.jl b/docs/literate/src/files/adding_new_parabolic_terms.jl
index 882f73f66ff..f5c2b815f33 100644
--- a/docs/literate/src/files/adding_new_parabolic_terms.jl
+++ b/docs/literate/src/files/adding_new_parabolic_terms.jl
@@ -158,3 +158,15 @@ sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
 using Plots
 plot(sol)
 
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots"],
+           mode=PKGMODE_MANIFEST)
+
diff --git a/docs/literate/src/files/adding_new_scalar_equations.jl b/docs/literate/src/files/adding_new_scalar_equations.jl
index fec7bcf667a..a65b4de7f1a 100644
--- a/docs/literate/src/files/adding_new_scalar_equations.jl
+++ b/docs/literate/src/files/adding_new_scalar_equations.jl
@@ -211,3 +211,15 @@ semi = remake(semi, solver=DGSEM(3, flux_godunov, VolumeIntegralFluxDifferencing
 ode = semidiscretize(semi, (0.0, 0.5))
 sol = solve(ode, SSPRK43(); ode_default_options()...)
 plot(sol)
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/adding_nonconservative_equation.jl b/docs/literate/src/files/adding_nonconservative_equation.jl
index 110fa486070..b40e21fb11a 100644
--- a/docs/literate/src/files/adding_nonconservative_equation.jl
+++ b/docs/literate/src/files/adding_nonconservative_equation.jl
@@ -288,3 +288,15 @@ sol = solve(ode, Tsit5(), abstol=1.0e-6, reltol=1.0e-6,
 ## Plot the numerical solution at the final time
 using Plots: plot
 plot(sol);
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/differentiable_programming.jl b/docs/literate/src/files/differentiable_programming.jl
index 5c5a7cd7440..33427803afc 100644
--- a/docs/literate/src/files/differentiable_programming.jl
+++ b/docs/literate/src/files/differentiable_programming.jl
@@ -446,3 +446,15 @@ scatter(real.(λ), imag.(λ))
 λ = eigvals(Matrix(A))
 relative_maximum = maximum(real, λ) / maximum(abs, λ)
 @test relative_maximum < 1.0e-15 #src
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots", "ForwardDiff"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/hohqmesh_tutorial.jl b/docs/literate/src/files/hohqmesh_tutorial.jl
index 87076108d91..b19d363c4bf 100644
--- a/docs/literate/src/files/hohqmesh_tutorial.jl
+++ b/docs/literate/src/files/hohqmesh_tutorial.jl
@@ -566,3 +566,15 @@ mesh = UnstructuredMesh2D(mesh_file);
 # for details.
 
 # ![simulation_straight_sides_p4est_amr](https://user-images.githubusercontent.com/74359358/168049930-8abce6ac-cd47-4d04-b40b-0fa459bbd98d.png)
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots", "Trixi2Vtk", "HOHQMesh"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/non_periodic_boundaries.jl b/docs/literate/src/files/non_periodic_boundaries.jl
index 54da88a64aa..7ed6324ff99 100644
--- a/docs/literate/src/files/non_periodic_boundaries.jl
+++ b/docs/literate/src/files/non_periodic_boundaries.jl
@@ -155,3 +155,15 @@ end
 #   <style>.embed-container { position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; max-width: 100%; } .embed-container iframe, .embed-container object, .embed-container embed { position: absolute; top: 0; left: 0; width: 100%; height: 100%; }</style><div class='embed-container'><iframe src='https://www.youtube-nocookie.com/embed/w0A9X38cSe4' frameborder='0' allowfullscreen></iframe></div>
 # ```
 # Source: [`Video`](https://www.youtube.com/watch?v=w0A9X38cSe4) on Trixi.jl's YouTube channel [`Trixi Framework`](https://www.youtube.com/watch?v=WElqqdMhY4A)
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/parabolic_terms.jl b/docs/literate/src/files/parabolic_terms.jl
index bac0098f8e9..d0a355bbc19 100644
--- a/docs/literate/src/files/parabolic_terms.jl
+++ b/docs/literate/src/files/parabolic_terms.jl
@@ -86,3 +86,14 @@ sol = solve(ode, RDPK3SpFSAL49(); abstol=time_int_tol, reltol=time_int_tol,
 using Plots
 plot(sol)
 
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/scalar_linear_advection_1d.jl b/docs/literate/src/files/scalar_linear_advection_1d.jl
index 42c831c98ba..77ba7b087cc 100644
--- a/docs/literate/src/files/scalar_linear_advection_1d.jl
+++ b/docs/literate/src/files/scalar_linear_advection_1d.jl
@@ -511,3 +511,15 @@ sol_trixi  = solve(ode_trixi, RDPK3SpFSAL49(); abstol=1.0e-6, reltol=1.0e-6, ode
 
 plot!(sol_trixi, label="solution at t=$(tspan[2]) with Trixi.jl", legend=:topleft, linestyle=:dash, lw=2)
 @test maximum(abs.(vec(u0) - sol_trixi.u[end])) ≈ maximum(abs.(u0 - sol.u[end])) #src
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/shock_capturing.jl b/docs/literate/src/files/shock_capturing.jl
index afa34cbf06a..dd6698c2a86 100644
--- a/docs/literate/src/files/shock_capturing.jl
+++ b/docs/literate/src/files/shock_capturing.jl
@@ -224,3 +224,15 @@ sol = solve(ode, CarpenterKennedy2N54(stage_limiter!, williamson_condition=false
 
 using Plots
 plot(sol)
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/structured_mesh_mapping.jl b/docs/literate/src/files/structured_mesh_mapping.jl
index 0ae9cf723f8..c8da30bc2bf 100644
--- a/docs/literate/src/files/structured_mesh_mapping.jl
+++ b/docs/literate/src/files/structured_mesh_mapping.jl
@@ -201,3 +201,15 @@ plot!(getmesh(pd))
 # unstructured mesh type [`UnstructuredMesh2D`] and its use of the
 # [High-Order Hex-Quad Mesh (HOHQMesh) generator](https://github.com/trixi-framework/HOHQMesh),
 # created and developed by David Kopriva.
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq", "Plots"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/time_stepping.jl b/docs/literate/src/files/time_stepping.jl
index d400c4a94be..de7a2a83a41 100644
--- a/docs/literate/src/files/time_stepping.jl
+++ b/docs/literate/src/files/time_stepping.jl
@@ -49,7 +49,7 @@
 # ```math
 # \Delta t_n = \text{CFL} * \min_i \frac{\Delta x_i}{\lambda_{\max}(u_i^n)}
 # ```
-# We compute $\Delta x_i$ by scaling the element size by a factor of $1/(N+1)$, cf. 
+# We compute $\Delta x_i$ by scaling the element size by a factor of $1/(N+1)$, cf.
 # [Gassner and Kopriva (2011)](https://doi.org/10.1137/100807211), Section 5.
 
 # Trixi.jl provides such a CFL-based step size control. It is implemented as the callback
@@ -73,3 +73,15 @@
 # You can find simple examples with a CFL-based step size control for instance in the elixirs
 # [`elixir_advection_basic.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_advection_basic.jl)
 # or [`elixir_euler_source_terms.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_dgsem/elixir_euler_source_terms.jl).
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "OrdinaryDiffEq"],
+           mode=PKGMODE_MANIFEST)
diff --git a/docs/literate/src/files/upwind_fdsbp.jl b/docs/literate/src/files/upwind_fdsbp.jl
index 36ca1b57404..6d3379fa30d 100644
--- a/docs/literate/src/files/upwind_fdsbp.jl
+++ b/docs/literate/src/files/upwind_fdsbp.jl
@@ -62,3 +62,15 @@ Matrix(D_upw.plus)
 # flux vector splitting, e.g.,
 # - [`elixir_euler_vortex.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_2d_fdsbp/elixir_euler_vortex.jl)
 # - [`elixir_euler_taylor_green_vortex.jl`](https://github.com/trixi-framework/Trixi.jl/blob/main/examples/tree_3d_fdsbp/elixir_euler_taylor_green_vortex.jl)
+
+
+# ## Package versions
+
+# These results were obtained using the following versions.
+
+using InteractiveUtils
+versioninfo()
+
+using Pkg
+Pkg.status(["Trixi", "SummationByPartsOperators"],
+           mode=PKGMODE_MANIFEST)

From 15543f28b7070dbc403857a047d5dc2ca2d0c1c3 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Thu, 14 Sep 2023 06:45:19 +0200
Subject: [PATCH 149/163] set version to v0.5.43

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 06fd29ba590..943d0d48005 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.43-pre"
+version = "0.5.43"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 92dedde81e12f8ac2259785a9eab40f475d82251 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Thu, 14 Sep 2023 06:45:34 +0200
Subject: [PATCH 150/163] set development version to v0.5.44-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 943d0d48005..d134a8e548b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.43"
+version = "0.5.44-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 6069149fef0590c824ddee54b6d12d7531b6c790 Mon Sep 17 00:00:00 2001
From: ArseniyKholod <119304909+ArseniyKholod@users.noreply.github.com>
Date: Fri, 15 Sep 2023 08:10:03 +0200
Subject: [PATCH 151/163] Handles restarting problem of adaptive time
 integration methods (#1565)

* Create test.jl

* Delete test.jl

* loadcallback

* adding_parallel_support

* formatting

* minimize dependencies

* combine loadrestart and saverestart

* fix

* Update test_threaded.jl

* fix

* test fix

* fix

* MODULE add

* fix

* runtime macros

* Update test_threaded.jl

* handle MPI issues

* enable PIDController test

* Update test_mpi_tree.jl

* fix

* add asserts

* Update save_restart.jl

* add IController tests

* enable HDF5 parallel

* fix shot

* fix shot 2

* fix shot 3

* fix shot 4

* fix shot 5

* fix shot 6

* fix shot 7

* fix shot 8

* fix shot 9

* fix shot 10

* fix shot 11

* fix shot 12

* fix shot 13

* fix shot 14

* fix shot 15

* fix shot 16

* fix shot 17

* fix shot 18

* enable additional configuration only in mpi test on linux

* enable environment

* test coverage issue

* disable mpi macOs CI because of failure

* disable new configurations to test coverage

* disable PID and I test to test coverage issue

* enable old coverage all

* undo last commit and enable coverage on windows

* enable new tests, mpi macOs and HDF5 parallel

* fix

* enable coverage on threads

* test HDF5 parallel

* test HDF5 parallel 2

* test HDF5 parallel 3

* fix

* Update save_restart_dg.jl

* test HDF5 parallel 4

* test HDF5 parallel 5

* Update configure_packages.jl

* delete unnecessary changes

* Update save_restart_dg.jl

* Update save_restart_dg.jl

* remove dependency on OrdinaryDiffEq

* format

* discard unrelated changes

* delete barrier

* delete eval()

* comments & delete mpi_parallel

* format

* Update runtests.jl

* Update runtests.jl

* simplify tests

* test failing MPI on windiws and macOs

* test with RDPK3SpFSAL49

* test with RDPK3SpFSAL35

* change tests

* fix and new test

* Update test_tree_2d_euler.jl

* fix and delete unnecessary test

* add printing format

* add docstrings

* Update src/callbacks_step/save_restart.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* fix

* formatting

* Update src/callbacks_step/save_restart_dg.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* Update src/callbacks_step/save_restart_dg.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* Update src/callbacks_step/save_restart.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* Update src/callbacks_step/save_restart.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* Update src/callbacks_step/save_restart.jl

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

* suggested changes

* new test

* fix

* fix

* Update test_tree_2d_advection.jl

* fix

* fix error mpi on windows

* rerun

* Update src/callbacks_step/save_restart.jl

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Add comments

* format

---------

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 .../elixir_advection_extended.jl              |  7 ++--
 .../tree_2d_dgsem/elixir_advection_restart.jl | 19 +++++----
 src/Trixi.jl                                  |  3 +-
 src/callbacks_step/save_restart.jl            | 36 +++++++++++++++++
 src/callbacks_step/save_restart_dg.jl         | 24 ++++++++++++
 test/test_mpi_tree.jl                         | 20 ++++++++--
 test/test_threaded.jl                         | 39 ++++++++++++-------
 test/test_tree_2d_advection.jl                | 20 ++++++++--
 8 files changed, 135 insertions(+), 33 deletions(-)

diff --git a/examples/tree_2d_dgsem/elixir_advection_extended.jl b/examples/tree_2d_dgsem/elixir_advection_extended.jl
index 8c837957ffd..278dc85386d 100644
--- a/examples/tree_2d_dgsem/elixir_advection_extended.jl
+++ b/examples/tree_2d_dgsem/elixir_advection_extended.jl
@@ -54,7 +54,7 @@ analysis_callback = AnalysisCallback(semi, interval=analysis_interval,
 alive_callback = AliveCallback(analysis_interval=analysis_interval)
 
 # The SaveRestartCallback allows to save a file from which a Trixi.jl simulation can be restarted
-save_restart = SaveRestartCallback(interval=100,
+save_restart = SaveRestartCallback(interval=40,
                                    save_final_restart=true)
 
 # The SaveSolutionCallback allows to save the solution to a file in regular intervals
@@ -77,9 +77,10 @@ callbacks = CallbackSet(summary_callback,
 # run the simulation
 
 # OrdinaryDiffEq's `solve` method evolves the solution in time and executes the passed callbacks
-sol = solve(ode, CarpenterKennedy2N54(williamson_condition=false),
+alg = CarpenterKennedy2N54(williamson_condition=false)
+sol = solve(ode, alg,
             dt=1.0, # solve needs some value here but it will be overwritten by the stepsize_callback
-            save_everystep=false, callback=callbacks);
+            save_everystep=false, callback=callbacks; ode_default_options()...);
 
 # Print the timer summary
 summary_callback()
diff --git a/examples/tree_2d_dgsem/elixir_advection_restart.jl b/examples/tree_2d_dgsem/elixir_advection_restart.jl
index 771ec5aefe7..b63a8d1f7bc 100644
--- a/examples/tree_2d_dgsem/elixir_advection_restart.jl
+++ b/examples/tree_2d_dgsem/elixir_advection_restart.jl
@@ -3,9 +3,10 @@ using OrdinaryDiffEq
 using Trixi
 
 ###############################################################################
-# create a restart file
-
-trixi_include(@__MODULE__, joinpath(@__DIR__, "elixir_advection_extended.jl"))
+# Define time integration algorithm
+alg = CarpenterKennedy2N54(williamson_condition=false)
+# Create a restart file
+trixi_include(@__MODULE__, joinpath(@__DIR__, "elixir_advection_extended.jl"), alg = alg, tspan = (0.0, 10.0))
 
 
 ###############################################################################
@@ -14,22 +15,26 @@ trixi_include(@__MODULE__, joinpath(@__DIR__, "elixir_advection_extended.jl"))
 # Note: If you get a restart file from somewhere else, you need to provide
 # appropriate setups in the elixir loading a restart file
 
-restart_filename = joinpath("out", "restart_000018.h5")
+restart_filename = joinpath("out", "restart_000040.h5")
 mesh = load_mesh(restart_filename)
 
 semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
 
-tspan = (load_time(restart_filename), 2.0)
+tspan = (load_time(restart_filename), 10.0)
 dt = load_dt(restart_filename)
 ode = semidiscretize(semi, tspan, restart_filename);
 
 # Do not overwrite the initial snapshot written by elixir_advection_extended.jl.
 save_solution.condition.save_initial_solution = false
 
-alg = CarpenterKennedy2N54(williamson_condition=false)
 integrator = init(ode, alg,
                   dt=dt, # solve needs some value here but it will be overwritten by the stepsize_callback
-                  save_everystep=false, callback=callbacks)
+                  save_everystep=false, callback=callbacks; ode_default_options()...)
+
+# Load saved context for adaptive time integrator
+if integrator.opts.adaptive
+    load_adaptive_time_integrator!(integrator, restart_filename)
+end
 
 # Get the last time index and work with that.
 load_timestep!(integrator, restart_filename)
diff --git a/src/Trixi.jl b/src/Trixi.jl
index c883c3bf19f..b65d03e7975 100644
--- a/src/Trixi.jl
+++ b/src/Trixi.jl
@@ -255,7 +255,8 @@ export SummaryCallback, SteadyStateCallback, AnalysisCallback, AliveCallback,
        GlmSpeedCallback, LBMCollisionCallback, EulerAcousticsCouplingCallback,
        TrivialCallback, AnalysisCallbackCoupled
 
-export load_mesh, load_time, load_timestep, load_timestep!, load_dt
+export load_mesh, load_time, load_timestep, load_timestep!, load_dt,
+       load_adaptive_time_integrator!
 
 export ControllerThreeLevel, ControllerThreeLevelCombined,
        IndicatorLöhner, IndicatorLoehner, IndicatorMax,
diff --git a/src/callbacks_step/save_restart.jl b/src/callbacks_step/save_restart.jl
index 06817a9b730..0d174d85805 100644
--- a/src/callbacks_step/save_restart.jl
+++ b/src/callbacks_step/save_restart.jl
@@ -105,6 +105,11 @@ function (restart_callback::SaveRestartCallback)(integrator)
         end
 
         save_restart_file(u_ode, t, dt, iter, semi, restart_callback)
+        # If using an adaptive time stepping scheme, store controller values for restart
+        if integrator.opts.adaptive
+            save_adaptive_time_integrator(integrator, integrator.opts.controller,
+                                          restart_callback)
+        end
     end
 
     # avoid re-evaluating possible FSAL stages
@@ -168,5 +173,36 @@ function load_restart_file(semi::AbstractSemidiscretization, restart_file)
     load_restart_file(mesh_equations_solver_cache(semi)..., restart_file)
 end
 
+"""
+    load_adaptive_time_integrator!(integrator, restart_file::AbstractString)
+
+Load the context information for time integrators with error-based step size control
+saved in a `restart_file`.
+"""
+function load_adaptive_time_integrator!(integrator, restart_file::AbstractString)
+    controller = integrator.opts.controller
+    # Read context information for controller
+    h5open(restart_file, "r") do file
+        # Ensure that the necessary information was saved
+        if !("time_integrator_qold" in keys(attributes(file))) ||
+           !("time_integrator_dtpropose" in keys(attributes(file))) ||
+           (hasproperty(controller, :err) &&
+            !("time_integrator_controller_err" in keys(attributes(file))))
+            error("Missing data in restart file: check the consistency of adaptive time controller with initial setup!")
+        end
+        # Load data that is required both for PIController and PIDController
+        integrator.qold = read(attributes(file)["time_integrator_qold"])
+        integrator.dtpropose = read(attributes(file)["time_integrator_dtpropose"])
+        # Accept step to use dtpropose already in the first step
+        integrator.accept_step = true
+        # Reevaluate integrator.fsal_first on the first step
+        integrator.reeval_fsal = true
+        # Load additional parameters for PIDController
+        if hasproperty(controller, :err) # Distinguish PIDController from PIController 
+            controller.err[:] = read(attributes(file)["time_integrator_controller_err"])
+        end
+    end
+end
+
 include("save_restart_dg.jl")
 end # @muladd
diff --git a/src/callbacks_step/save_restart_dg.jl b/src/callbacks_step/save_restart_dg.jl
index 8db6db2d2b8..cddeef77bb2 100644
--- a/src/callbacks_step/save_restart_dg.jl
+++ b/src/callbacks_step/save_restart_dg.jl
@@ -327,4 +327,28 @@ function load_restart_file_on_root(mesh::Union{ParallelTreeMesh, ParallelP4estMe
 
     return u_ode
 end
+
+# Store controller values for an adaptive time stepping scheme
+function save_adaptive_time_integrator(integrator,
+                                       controller, restart_callback)
+    # Save only on root
+    if mpi_isroot()
+        @unpack output_directory = restart_callback
+        timestep = integrator.stats.naccept
+
+        # Filename based on current time step
+        filename = joinpath(output_directory, @sprintf("restart_%06d.h5", timestep))
+
+        # Open file (preserve existing content)
+        h5open(filename, "r+") do file
+            # Add context information as attributes both for PIController and PIDController
+            attributes(file)["time_integrator_qold"] = integrator.qold
+            attributes(file)["time_integrator_dtpropose"] = integrator.dtpropose
+            # For PIDController is necessary to save additional parameters
+            if hasproperty(controller, :err) # Distinguish PIDController from PIController
+                attributes(file)["time_integrator_controller_err"] = controller.err
+            end
+        end
+    end
+end
 end # @muladd
diff --git a/test/test_mpi_tree.jl b/test/test_mpi_tree.jl
index 8403fcf1b04..8f08a9d72e7 100644
--- a/test/test_mpi_tree.jl
+++ b/test/test_mpi_tree.jl
@@ -23,10 +23,22 @@ CI_ON_WINDOWS = (get(ENV, "GITHUB_ACTIONS", false) == "true") && Sys.iswindows()
   end
 
   @trixi_testset "elixir_advection_restart.jl" begin
-    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"),
-      # Expected errors are exactly the same as in the serial test!
-      l2   = [7.81674284320524e-6],
-      linf = [6.314906965243505e-5])
+    using OrdinaryDiffEq: RDPK3SpFSAL49
+    Trixi.mpi_isroot() && println("═"^100)
+    Trixi.mpi_isroot() && println(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"))
+    trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"),
+      alg = RDPK3SpFSAL49(), tspan = (0.0, 10.0))
+    l2_expected, linf_expected = analysis_callback(sol)
+
+    Trixi.mpi_isroot() && println("═"^100)
+    Trixi.mpi_isroot() && println(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"))
+    # Errors are exactly the same as in the elixir_advection_extended.jl
+    trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"),
+      alg = RDPK3SpFSAL49())
+    l2_actual, linf_actual = analysis_callback(sol)
+    
+    Trixi.mpi_isroot() && @test l2_actual == l2_expected
+    Trixi.mpi_isroot() && @test linf_actual == linf_expected
   end
 
   @trixi_testset "elixir_advection_mortar.jl" begin
diff --git a/test/test_threaded.jl b/test/test_threaded.jl
index 9b30836d0ed..2337d73f30a 100644
--- a/test/test_threaded.jl
+++ b/test/test_threaded.jl
@@ -12,27 +12,38 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
 @testset "Threaded tests" begin
   @testset "TreeMesh" begin
     @trixi_testset "elixir_advection_restart.jl" begin
-      @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_restart.jl"),
-        # Expected errors are exactly the same as in the serial test!
-        l2   = [7.81674284320524e-6],
-        linf = [6.314906965243505e-5])
+      elixir = joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_extended.jl")
+      Trixi.mpi_isroot() && println("═"^100)
+      Trixi.mpi_isroot() && println(elixir)
+      trixi_include(@__MODULE__, elixir, tspan = (0.0, 10.0))
+      l2_expected, linf_expected = analysis_callback(sol)
+
+      elixir = joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_restart.jl")
+      Trixi.mpi_isroot() && println("═"^100)
+      Trixi.mpi_isroot() && println(elixir)
+      # Errors are exactly the same as in the elixir_advection_extended.jl
+      trixi_include(@__MODULE__, elixir)
+      l2_actual, linf_actual = analysis_callback(sol)
+      
+      Trixi.mpi_isroot() && @test l2_actual == l2_expected
+      Trixi.mpi_isroot() && @test linf_actual == linf_expected
 
-        # Ensure that we do not have excessive memory allocations
-        # (e.g., from type instabilities)
-        let
-          t = sol.t[end]
-          u_ode = sol.u[end]
-          du_ode = similar(u_ode)
-          @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
-        end
+      # Ensure that we do not have excessive memory allocations
+      # (e.g., from type instabilities)
+      let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
+      end
     end
 
     @trixi_testset "elixir_advection_restart.jl with threaded time integration" begin
       @test_trixi_include(joinpath(examples_dir(), "tree_2d_dgsem", "elixir_advection_restart.jl"),
         alg = CarpenterKennedy2N54(williamson_condition = false, thread = OrdinaryDiffEq.True()),
         # Expected errors are exactly the same as in the serial test!
-        l2   = [7.81674284320524e-6],
-        linf = [6.314906965243505e-5])
+        l2   = [8.005068880114254e-6],
+        linf = [6.39093577996519e-5])
     end
 
     @trixi_testset "elixir_advection_amr_refine_twice.jl" begin
diff --git a/test/test_tree_2d_advection.jl b/test/test_tree_2d_advection.jl
index 973d0caf88b..36cb1e882cc 100644
--- a/test/test_tree_2d_advection.jl
+++ b/test/test_tree_2d_advection.jl
@@ -25,10 +25,22 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_2d_dgsem")
   end
 
   @trixi_testset "elixir_advection_restart.jl" begin
-    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"),
-      # Expected errors are exactly the same as in the parallel test!
-      l2   = [7.81674284320524e-6],
-      linf = [6.314906965243505e-5])
+    using OrdinaryDiffEq: SSPRK43
+    println("═"^100)
+    println(joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"))
+    trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "elixir_advection_extended.jl"),
+      alg = SSPRK43(), tspan = (0.0, 10.0))
+    l2_expected, linf_expected = analysis_callback(sol)
+
+    println("═"^100)
+    println(joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"))
+    # Errors are exactly the same as in the elixir_advection_extended.jl
+    trixi_include(@__MODULE__, joinpath(EXAMPLES_DIR, "elixir_advection_restart.jl"),
+      alg = SSPRK43())
+    l2_actual, linf_actual = analysis_callback(sol)
+    
+    @test l2_actual == l2_expected
+    @test linf_actual == linf_expected
   end
 
   @trixi_testset "elixir_advection_mortar.jl" begin

From 73384acbf45cf10710cfc817bc91a1812a0db1fd Mon Sep 17 00:00:00 2001
From: Benjamin Bolm <74359358+bennibolm@users.noreply.github.com>
Date: Sat, 16 Sep 2023 16:16:12 +0200
Subject: [PATCH 152/163] Assure conservation for SSP scheme (#1640)

* Add denominator variable for SSP scheme

* Fix format

* Implement suggestions
---
 src/time_integration/methods_SSP.jl | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/time_integration/methods_SSP.jl b/src/time_integration/methods_SSP.jl
index 8ecad69748b..a0ed889968a 100644
--- a/src/time_integration/methods_SSP.jl
+++ b/src/time_integration/methods_SSP.jl
@@ -24,14 +24,16 @@ The third-order SSP Runge-Kutta method of Shu and Osher.
     This is an experimental feature and may change in future releases.
 """
 struct SimpleSSPRK33{StageCallbacks} <: SimpleAlgorithmSSP
-    a::SVector{3, Float64}
-    b::SVector{3, Float64}
+    numerator_a::SVector{3, Float64}
+    numerator_b::SVector{3, Float64}
+    denominator::SVector{3, Float64}
     c::SVector{3, Float64}
     stage_callbacks::StageCallbacks
 
     function SimpleSSPRK33(; stage_callbacks = ())
-        a = SVector(0.0, 3 / 4, 1 / 3)
-        b = SVector(1.0, 1 / 4, 2 / 3)
+        numerator_a = SVector(0.0, 3.0, 1.0) # a = numerator_a / denominator
+        numerator_b = SVector(1.0, 1.0, 2.0) # b = numerator_b / denominator
+        denominator = SVector(1.0, 4.0, 3.0)
         c = SVector(0.0, 1.0, 1 / 2)
 
         # Butcher tableau
@@ -42,7 +44,8 @@ struct SimpleSSPRK33{StageCallbacks} <: SimpleAlgorithmSSP
         # --------------------
         #   b | 1/6  1/6  2/3
 
-        new{typeof(stage_callbacks)}(a, b, c, stage_callbacks)
+        new{typeof(stage_callbacks)}(numerator_a, numerator_b, denominator, c,
+                                     stage_callbacks)
     end
 end
 
@@ -166,7 +169,9 @@ function solve!(integrator::SimpleIntegratorSSP)
             end
 
             # perform convex combination
-            @. integrator.u = alg.a[stage] * integrator.r0 + alg.b[stage] * integrator.u
+            @. integrator.u = (alg.numerator_a[stage] * integrator.r0 +
+                               alg.numerator_b[stage] * integrator.u) /
+                              alg.denominator[stage]
         end
 
         integrator.iter += 1

From a64004d98c7a1b0c4894663c176f21c2178a3630 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 19 Sep 2023 11:49:30 +0200
Subject: [PATCH 153/163] CompatHelper: bump compat for Documenter to 1 for
 package docs, (keep existing compat) (#1641)

* CompatHelper: bump compat for Documenter to 1 for package docs, (keep existing compat)

* remove strict since it is removed and active by default

* allow only v1 of Documenter.jl

* ignore size threshold for API reference of Trixi.jl

* try to fix size_threshold_ignore

---------

Co-authored-by: CompatHelper Julia <compathelper_noreply@julialang.org>
Co-authored-by: Hendrik Ranocha <mail@ranocha.de>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 docs/Project.toml |  2 +-
 docs/make.jl      | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/docs/Project.toml b/docs/Project.toml
index 9fc974d6f38..ffa86e0b9f7 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -13,7 +13,7 @@ Trixi2Vtk = "bc1476a1-1ca6-4cc3-950b-c312b255ff95"
 
 [compat]
 CairoMakie = "0.6, 0.7, 0.8, 0.9, 0.10"
-Documenter = "0.27"
+Documenter = "1"
 ForwardDiff = "0.10"
 HOHQMesh = "0.1, 0.2"
 LaTeXStrings = "1.2"
diff --git a/docs/make.jl b/docs/make.jl
index f882fcf1219..df8ac04be12 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -77,7 +77,7 @@ makedocs(
     # Specify modules for which docstrings should be shown
     modules = [Trixi, Trixi2Vtk],
     # Set sitename to Trixi.jl
-    sitename="Trixi.jl",
+    sitename = "Trixi.jl",
     # Provide additional formatting options
     format = Documenter.HTML(
         # Disable pretty URLs during manual testing
@@ -85,7 +85,8 @@ makedocs(
         # Explicitly add favicon as asset
         assets = ["assets/favicon.ico"],
         # Set canonical URL to GitHub pages URL
-        canonical = "https://trixi-framework.github.io/Trixi.jl/stable"
+        canonical = "https://trixi-framework.github.io/Trixi.jl/stable",
+        size_threshold_ignore = ["reference-trixi.md"]
     ),
     # Explicitly specify documentation structure
     pages = [
@@ -124,9 +125,8 @@ makedocs(
         "Authors" => "authors.md",
         "Contributing" => "contributing.md",
         "Code of Conduct" => "code_of_conduct.md",
-        "License" => "license.md"
-    ],
-    strict = true # to make the GitHub action fail when doctests fail, see https://github.com/neuropsychology/Psycho.jl/issues/34
+        "License" => "license.md",
+    ]
 )
 
 deploydocs(

From 7e228985a993e6e50249e8a13d9116db9f14e3bb Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Wed, 20 Sep 2023 08:03:13 +0200
Subject: [PATCH 154/163] Increase type stab, avoid allocs (#1642)

* Increase type stab, avoid allocs

* format

* test seems no longer broken

* only essentials

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 src/solvers/dgsem_unstructured/dg_2d.jl | 4 ++--
 test/test_threaded.jl                   | 6 +-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl
index 7b8dafdddd2..b12a96c4c31 100644
--- a/src/solvers/dgsem_unstructured/dg_2d.jl
+++ b/src/solvers/dgsem_unstructured/dg_2d.jl
@@ -357,9 +357,9 @@ function calc_boundary_flux_by_type!(cache, t, BCs::Tuple{}, BC_indices::Tuple{}
     nothing
 end
 
-function calc_boundary_flux!(cache, t, boundary_condition, boundary_indexing,
+function calc_boundary_flux!(cache, t, boundary_condition::BC, boundary_indexing,
                              mesh::UnstructuredMesh2D, equations,
-                             surface_integral, dg::DG)
+                             surface_integral, dg::DG) where {BC}
     @unpack surface_flux_values = cache.elements
     @unpack element_id, element_side_id = cache.boundaries
 
diff --git a/test/test_threaded.jl b/test/test_threaded.jl
index 2337d73f30a..b13b5d0f5fc 100644
--- a/test/test_threaded.jl
+++ b/test/test_threaded.jl
@@ -312,11 +312,7 @@ Trixi.mpi_isroot() && isdir(outdir) && rm(outdir, recursive=true)
         t = sol.t[end]
         u_ode = sol.u[end]
         du_ode = similar(u_ode)
-        if (Threads.nthreads() < 2) || (VERSION < v"1.9")
-          @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
-        else
-          @test_broken (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
-        end
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 5000
       end
     end
 

From 09441e1c0553cfc1e88f582354457233d4d872b2 Mon Sep 17 00:00:00 2001
From: Michael Schlottke-Lakemper <michael@sloede.com>
Date: Wed, 20 Sep 2023 09:15:27 +0200
Subject: [PATCH 155/163] Add Aqua.jl testing (#1628)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add Aqua.jl testing

* [deps] before [compat]

* Skip ambiguities (too many false positives) and account for @jlchan being a 🏴‍☠️

* Bump Flux minimum version

* Adapt test values

* Add back pre-v0.14 version for Flux.jl to satisfy Julia v1.8

* Add Aqua badge

* Explain piracy exceptions

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 README.md                  |  1 +
 docs/src/index.md          |  1 +
 test/Project.toml          | 20 +++++++++++---------
 test/runtests.jl           |  1 +
 test/test_aqua.jl          | 18 ++++++++++++++++++
 test/test_tree_2d_euler.jl |  2 +-
 6 files changed, 33 insertions(+), 10 deletions(-)
 create mode 100644 test/test_aqua.jl

diff --git a/README.md b/README.md
index c177ad2347f..673708d8b89 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@
 [![Build Status](https://github.com/trixi-framework/Trixi.jl/workflows/CI/badge.svg)](https://github.com/trixi-framework/Trixi.jl/actions?query=workflow%3ACI)
 [![Codecov](https://codecov.io/gh/trixi-framework/Trixi.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/trixi-framework/Trixi.jl)
 [![Coveralls](https://coveralls.io/repos/github/trixi-framework/Trixi.jl/badge.svg?branch=main)](https://coveralls.io/github/trixi-framework/Trixi.jl?branch=main)
+[![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl)
 [![License: MIT](https://img.shields.io/badge/License-MIT-success.svg)](https://opensource.org/licenses/MIT)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3996439.svg)](https://doi.org/10.5281/zenodo.3996439)
 [![Downloads](https://shields.io/endpoint?url=https://pkgs.genieframework.com/api/v1/badge/Trixi)](https://pkgs.genieframework.com?packages=Trixi)
diff --git a/docs/src/index.md b/docs/src/index.md
index bb2afd1019f..9ffaee26c40 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -7,6 +7,7 @@
 [![Build Status](https://github.com/trixi-framework/Trixi.jl/workflows/CI/badge.svg)](https://github.com/trixi-framework/Trixi.jl/actions?query=workflow%3ACI)
 [![Codecov](https://codecov.io/gh/trixi-framework/Trixi.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/trixi-framework/Trixi.jl)
 [![Coveralls](https://coveralls.io/repos/github/trixi-framework/Trixi.jl/badge.svg?branch=main)](https://coveralls.io/github/trixi-framework/Trixi.jl?branch=main)
+[![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl)
 [![License: MIT](https://img.shields.io/badge/License-MIT-success.svg)](https://opensource.org/licenses/MIT)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3996439.svg)](https://doi.org/10.5281/zenodo.3996439)
 
diff --git a/test/Project.toml b/test/Project.toml
index 7115a19b441..c45be49a5d0 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,13 +1,5 @@
-[compat]
-BSON = "0.3.3"
-CairoMakie = "0.6, 0.7, 0.8, 0.9, 0.10"
-Flux = "0.13 - 0.13.12" # TODO: Return to "0.13" once https://github.com/FluxML/Flux.jl/issues/2204 is resolved
-ForwardDiff = "0.10"
-MPI = "0.20"
-OrdinaryDiffEq = "6.49.1"
-Plots = "1.16"
-
 [deps]
+Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
@@ -21,6 +13,16 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
+[compat]
+Aqua = "0.7"
+BSON = "0.3.3"
+CairoMakie = "0.6, 0.7, 0.8, 0.9, 0.10"
+Flux = "0.13.15, 0.14"
+ForwardDiff = "0.10"
+MPI = "0.20"
+OrdinaryDiffEq = "6.49.1"
+Plots = "1.16"
+
 [preferences.OrdinaryDiffEq]
 PrecompileAutoSpecialize = false
 PrecompileAutoSwitch = false
diff --git a/test/runtests.jl b/test/runtests.jl
index f1adbaaf1df..7e195fe7402 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -109,6 +109,7 @@ const TRIXI_NTHREADS = clamp(Sys.CPU_THREADS, 2, 3)
 
     @time if TRIXI_TEST == "all" || TRIXI_TEST == "misc_part2"
         include("test_special_elixirs.jl")
+        include("test_aqua.jl")
     end
 
     @time if TRIXI_TEST == "all" || TRIXI_TEST == "performance_specializations_part1"
diff --git a/test/test_aqua.jl b/test/test_aqua.jl
new file mode 100644
index 00000000000..f7ab4f545d0
--- /dev/null
+++ b/test/test_aqua.jl
@@ -0,0 +1,18 @@
+module TestAqua
+
+using Aqua
+using Test
+using Trixi
+
+include("test_trixi.jl")
+
+@timed_testset "Aqua.jl" begin
+    Aqua.test_all(Trixi,
+                  ambiguities = false,
+                  # exceptions necessary for adding a new method `StartUpDG.estimate_h`
+                  # in src/solvers/dgmulti/sbp.jl
+                  piracy = (treat_as_own = [Trixi.StartUpDG.RefElemData,
+                                            Trixi.StartUpDG.MeshData],))
+end
+
+end #module
diff --git a/test/test_tree_2d_euler.jl b/test/test_tree_2d_euler.jl
index e1e3ad32e7d..1b8a261a60d 100644
--- a/test/test_tree_2d_euler.jl
+++ b/test/test_tree_2d_euler.jl
@@ -140,7 +140,7 @@ EXAMPLES_DIR = pkgdir(Trixi, "examples", "tree_2d_dgsem")
   @trixi_testset "elixir_euler_sedov_blast_wave_neuralnetwork_perssonperaire.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov_blast_wave_neuralnetwork_perssonperaire.jl"),
       l2   = [0.0845430093623868, 0.09271459184623232, 0.09271459184623232, 0.4377291875101709],
-      linf = [1.3608553480069898, 1.6822884847136004, 1.6822884847135997, 4.220147414536653],
+      linf = [1.3608553480069898, 1.6822884847136004, 1.6822884847135997, 4.2201475428867035],
       maxiters = 30,
       coverage_override = (maxiters=6,))
   end

From ea4e2cd0863893c16ff5cb2b0091e76c5a9b9b4e Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Wed, 20 Sep 2023 16:15:47 +0200
Subject: [PATCH 156/163] @muladd for Navier Stokes (#1644)

* introduce muladd, update AMR tests

* format
---
 .../compressible_navier_stokes_1d.jl          | 38 +++++++++----
 .../compressible_navier_stokes_2d.jl          | 56 +++++++++++++------
 .../compressible_navier_stokes_3d.jl          | 47 +++++++++++-----
 test/test_parabolic_1d.jl                     |  8 +--
 4 files changed, 106 insertions(+), 43 deletions(-)

diff --git a/src/equations/compressible_navier_stokes_1d.jl b/src/equations/compressible_navier_stokes_1d.jl
index dca846cac1e..74d672ce7ae 100644
--- a/src/equations/compressible_navier_stokes_1d.jl
+++ b/src/equations/compressible_navier_stokes_1d.jl
@@ -1,3 +1,10 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
 @doc raw"""
     CompressibleNavierStokesDiffusion1D(equations; mu, Pr,
                                         gradient_variables=GradientVariablesPrimitive())
@@ -77,7 +84,8 @@ w_2 = \frac{\rho v1}{p},\, w_3 = -\frac{\rho}{p}
     This code is experimental and may be changed or removed in any future release.
 """
 struct CompressibleNavierStokesDiffusion1D{GradientVariables, RealT <: Real,
-                                           E <: AbstractCompressibleEulerEquations{1}} <:
+                                           E <: AbstractCompressibleEulerEquations{1}
+                                           } <:
        AbstractCompressibleNavierStokesDiffusion{1, 3}
     # TODO: parabolic
     # 1) For now save gamma and inv(gamma-1) again, but could potentially reuse them from the Euler equations
@@ -109,7 +117,8 @@ function CompressibleNavierStokesDiffusion1D(equations::CompressibleEulerEquatio
     CompressibleNavierStokesDiffusion1D{typeof(gradient_variables), typeof(gamma),
                                         typeof(equations)}(gamma, inv_gamma_minus_one,
                                                            μ, Pr, kappa,
-                                                           equations, gradient_variables)
+                                                           equations,
+                                                           gradient_variables)
 end
 
 # TODO: parabolic
@@ -263,7 +272,8 @@ end
                                                                                       u_inner,
                                                                                       orientation::Integer,
                                                                                       direction,
-                                                                                      x, t,
+                                                                                      x,
+                                                                                      t,
                                                                                       operator_type::Gradient,
                                                                                       equations::CompressibleNavierStokesDiffusion1D{
                                                                                                                                      GradientVariablesPrimitive
@@ -278,7 +288,8 @@ end
                                                                                       u_inner,
                                                                                       orientation::Integer,
                                                                                       direction,
-                                                                                      x, t,
+                                                                                      x,
+                                                                                      t,
                                                                                       operator_type::Divergence,
                                                                                       equations::CompressibleNavierStokesDiffusion1D{
                                                                                                                                      GradientVariablesPrimitive
@@ -299,7 +310,8 @@ end
                                                                                        u_inner,
                                                                                        orientation::Integer,
                                                                                        direction,
-                                                                                       x, t,
+                                                                                       x,
+                                                                                       t,
                                                                                        operator_type::Gradient,
                                                                                        equations::CompressibleNavierStokesDiffusion1D{
                                                                                                                                       GradientVariablesPrimitive
@@ -316,7 +328,8 @@ end
                                                                                        u_inner,
                                                                                        orientation::Integer,
                                                                                        direction,
-                                                                                       x, t,
+                                                                                       x,
+                                                                                       t,
                                                                                        operator_type::Divergence,
                                                                                        equations::CompressibleNavierStokesDiffusion1D{
                                                                                                                                       GradientVariablesPrimitive
@@ -337,7 +350,8 @@ end
                                                                                       w_inner,
                                                                                       orientation::Integer,
                                                                                       direction,
-                                                                                      x, t,
+                                                                                      x,
+                                                                                      t,
                                                                                       operator_type::Gradient,
                                                                                       equations::CompressibleNavierStokesDiffusion1D{
                                                                                                                                      GradientVariablesEntropy
@@ -354,7 +368,8 @@ end
                                                                                       w_inner,
                                                                                       orientation::Integer,
                                                                                       direction,
-                                                                                      x, t,
+                                                                                      x,
+                                                                                      t,
                                                                                       operator_type::Divergence,
                                                                                       equations::CompressibleNavierStokesDiffusion1D{
                                                                                                                                      GradientVariablesEntropy
@@ -374,7 +389,8 @@ end
                                                                                        w_inner,
                                                                                        orientation::Integer,
                                                                                        direction,
-                                                                                       x, t,
+                                                                                       x,
+                                                                                       t,
                                                                                        operator_type::Gradient,
                                                                                        equations::CompressibleNavierStokesDiffusion1D{
                                                                                                                                       GradientVariablesEntropy
@@ -394,10 +410,12 @@ end
                                                                                        w_inner,
                                                                                        orientation::Integer,
                                                                                        direction,
-                                                                                       x, t,
+                                                                                       x,
+                                                                                       t,
                                                                                        operator_type::Divergence,
                                                                                        equations::CompressibleNavierStokesDiffusion1D{
                                                                                                                                       GradientVariablesEntropy
                                                                                                                                       })
     return SVector(flux_inner[1], flux_inner[2], flux_inner[3])
 end
+end # @muladd
diff --git a/src/equations/compressible_navier_stokes_2d.jl b/src/equations/compressible_navier_stokes_2d.jl
index f762fe5d5ee..b10ffa3b9d3 100644
--- a/src/equations/compressible_navier_stokes_2d.jl
+++ b/src/equations/compressible_navier_stokes_2d.jl
@@ -1,3 +1,10 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
 @doc raw"""
     CompressibleNavierStokesDiffusion2D(equations; mu, Pr,
                                         gradient_variables=GradientVariablesPrimitive())
@@ -77,7 +84,8 @@ w_2 = \frac{\rho v_1}{p},\, w_3 = \frac{\rho v_2}{p},\, w_4 = -\frac{\rho}{p}
     This code is experimental and may be changed or removed in any future release.
 """
 struct CompressibleNavierStokesDiffusion2D{GradientVariables, RealT <: Real,
-                                           E <: AbstractCompressibleEulerEquations{2}} <:
+                                           E <: AbstractCompressibleEulerEquations{2}
+                                           } <:
        AbstractCompressibleNavierStokesDiffusion{2, 4}
     # TODO: parabolic
     # 1) For now save gamma and inv(gamma-1) again, but could potentially reuse them from the Euler equations
@@ -109,7 +117,8 @@ function CompressibleNavierStokesDiffusion2D(equations::CompressibleEulerEquatio
     CompressibleNavierStokesDiffusion2D{typeof(gradient_variables), typeof(gamma),
                                         typeof(equations)}(gamma, inv_gamma_minus_one,
                                                            μ, Pr, kappa,
-                                                           equations, gradient_variables)
+                                                           equations,
+                                                           gradient_variables)
 end
 
 # TODO: parabolic
@@ -301,12 +310,14 @@ end
                                                                         <:Adiabatic})(flux_inner,
                                                                                       u_inner,
                                                                                       normal::AbstractVector,
-                                                                                      x, t,
+                                                                                      x,
+                                                                                      t,
                                                                                       operator_type::Gradient,
                                                                                       equations::CompressibleNavierStokesDiffusion2D{
                                                                                                                                      GradientVariablesPrimitive
                                                                                                                                      })
-    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x,
+                                                                                    t,
                                                                                     equations)
     return SVector(u_inner[1], v1, v2, u_inner[4])
 end
@@ -315,7 +326,8 @@ end
                                                                         <:Adiabatic})(flux_inner,
                                                                                       u_inner,
                                                                                       normal::AbstractVector,
-                                                                                      x, t,
+                                                                                      x,
+                                                                                      t,
                                                                                       operator_type::Divergence,
                                                                                       equations::CompressibleNavierStokesDiffusion2D{
                                                                                                                                      GradientVariablesPrimitive
@@ -324,7 +336,8 @@ end
     normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x,
                                                                                                            t,
                                                                                                            equations)
-    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x,
+                                                                                    t,
                                                                                     equations)
     _, tau_1n, tau_2n, _ = flux_inner # extract fluxes for 2nd and 3rd equations
     normal_energy_flux = v1 * tau_1n + v2 * tau_2n + normal_heat_flux
@@ -335,12 +348,14 @@ end
                                                                         <:Isothermal})(flux_inner,
                                                                                        u_inner,
                                                                                        normal::AbstractVector,
-                                                                                       x, t,
+                                                                                       x,
+                                                                                       t,
                                                                                        operator_type::Gradient,
                                                                                        equations::CompressibleNavierStokesDiffusion2D{
                                                                                                                                       GradientVariablesPrimitive
                                                                                                                                       })
-    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x,
+                                                                                    t,
                                                                                     equations)
     T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t,
                                                                                 equations)
@@ -351,7 +366,8 @@ end
                                                                         <:Isothermal})(flux_inner,
                                                                                        u_inner,
                                                                                        normal::AbstractVector,
-                                                                                       x, t,
+                                                                                       x,
+                                                                                       t,
                                                                                        operator_type::Divergence,
                                                                                        equations::CompressibleNavierStokesDiffusion2D{
                                                                                                                                       GradientVariablesPrimitive
@@ -371,12 +387,14 @@ end
                                                                         <:Adiabatic})(flux_inner,
                                                                                       w_inner,
                                                                                       normal::AbstractVector,
-                                                                                      x, t,
+                                                                                      x,
+                                                                                      t,
                                                                                       operator_type::Gradient,
                                                                                       equations::CompressibleNavierStokesDiffusion2D{
                                                                                                                                      GradientVariablesEntropy
                                                                                                                                      })
-    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x,
+                                                                                    t,
                                                                                     equations)
     negative_rho_inv_p = w_inner[4] # w_4 = -rho / p
     return SVector(w_inner[1], -v1 * negative_rho_inv_p, -v2 * negative_rho_inv_p,
@@ -388,7 +406,8 @@ end
                                                                         <:Adiabatic})(flux_inner,
                                                                                       w_inner,
                                                                                       normal::AbstractVector,
-                                                                                      x, t,
+                                                                                      x,
+                                                                                      t,
                                                                                       operator_type::Divergence,
                                                                                       equations::CompressibleNavierStokesDiffusion2D{
                                                                                                                                      GradientVariablesEntropy
@@ -396,7 +415,8 @@ end
     normal_heat_flux = boundary_condition.boundary_condition_heat_flux.boundary_value_normal_flux_function(x,
                                                                                                            t,
                                                                                                            equations)
-    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x,
+                                                                                    t,
                                                                                     equations)
     _, tau_1n, tau_2n, _ = flux_inner # extract fluxes for 2nd and 3rd equations
     normal_energy_flux = v1 * tau_1n + v2 * tau_2n + normal_heat_flux
@@ -407,12 +427,14 @@ end
                                                                         <:Isothermal})(flux_inner,
                                                                                        w_inner,
                                                                                        normal::AbstractVector,
-                                                                                       x, t,
+                                                                                       x,
+                                                                                       t,
                                                                                        operator_type::Gradient,
                                                                                        equations::CompressibleNavierStokesDiffusion2D{
                                                                                                                                       GradientVariablesEntropy
                                                                                                                                       })
-    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x, t,
+    v1, v2 = boundary_condition.boundary_condition_velocity.boundary_value_function(x,
+                                                                                    t,
                                                                                     equations)
     T = boundary_condition.boundary_condition_heat_flux.boundary_value_function(x, t,
                                                                                 equations)
@@ -426,10 +448,12 @@ end
                                                                         <:Isothermal})(flux_inner,
                                                                                        w_inner,
                                                                                        normal::AbstractVector,
-                                                                                       x, t,
+                                                                                       x,
+                                                                                       t,
                                                                                        operator_type::Divergence,
                                                                                        equations::CompressibleNavierStokesDiffusion2D{
                                                                                                                                       GradientVariablesEntropy
                                                                                                                                       })
     return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4])
 end
+end # @muladd
diff --git a/src/equations/compressible_navier_stokes_3d.jl b/src/equations/compressible_navier_stokes_3d.jl
index 166b53bf615..de2cad99ea8 100644
--- a/src/equations/compressible_navier_stokes_3d.jl
+++ b/src/equations/compressible_navier_stokes_3d.jl
@@ -1,3 +1,10 @@
+# By default, Julia/LLVM does not use fused multiply-add operations (FMAs).
+# Since these FMAs can increase the performance of many numerical algorithms,
+# we need to opt-in explicitly.
+# See https://ranocha.de/blog/Optimizing_EC_Trixi for further details.
+@muladd begin
+#! format: noindent
+
 @doc raw"""
     CompressibleNavierStokesDiffusion3D(equations; mu, Pr,
                                         gradient_variables=GradientVariablesPrimitive())
@@ -77,7 +84,8 @@ w_2 = \frac{\rho v_1}{p},\, w_3 = \frac{\rho v_2}{p},\, w_4 = \frac{\rho v_3}{p}
     This code is experimental and may be changed or removed in any future release.
 """
 struct CompressibleNavierStokesDiffusion3D{GradientVariables, RealT <: Real,
-                                           E <: AbstractCompressibleEulerEquations{3}} <:
+                                           E <: AbstractCompressibleEulerEquations{3}
+                                           } <:
        AbstractCompressibleNavierStokesDiffusion{3, 5}
     # TODO: parabolic
     # 1) For now save gamma and inv(gamma-1) again, but could potentially reuse them from the Euler equations
@@ -109,7 +117,8 @@ function CompressibleNavierStokesDiffusion3D(equations::CompressibleEulerEquatio
     CompressibleNavierStokesDiffusion3D{typeof(gradient_variables), typeof(gamma),
                                         typeof(equations)}(gamma, inv_gamma_minus_one,
                                                            μ, Pr, kappa,
-                                                           equations, gradient_variables)
+                                                           equations,
+                                                           gradient_variables)
 end
 
 # TODO: parabolic
@@ -319,9 +328,12 @@ end
 
 @inline function vorticity(u, gradients, equations::CompressibleNavierStokesDiffusion3D)
     # Ensure that we have velocity `gradients` by way of the `convert_gradient_variables` function.
-    _, dv1dx, dv2dx, dv3dx, _ = convert_derivative_to_primitive(u, gradients[1], equations)
-    _, dv1dy, dv2dy, dv3dy, _ = convert_derivative_to_primitive(u, gradients[2], equations)
-    _, dv1dz, dv2dz, dv3dz, _ = convert_derivative_to_primitive(u, gradients[3], equations)
+    _, dv1dx, dv2dx, dv3dx, _ = convert_derivative_to_primitive(u, gradients[1],
+                                                                equations)
+    _, dv1dy, dv2dy, dv3dy, _ = convert_derivative_to_primitive(u, gradients[2],
+                                                                equations)
+    _, dv1dz, dv2dz, dv3dz, _ = convert_derivative_to_primitive(u, gradients[3],
+                                                                equations)
 
     return SVector(dv3dy - dv2dz, dv1dz - dv3dx, dv2dx - dv1dy)
 end
@@ -330,7 +342,8 @@ end
                                                                         <:Adiabatic})(flux_inner,
                                                                                       u_inner,
                                                                                       normal::AbstractVector,
-                                                                                      x, t,
+                                                                                      x,
+                                                                                      t,
                                                                                       operator_type::Gradient,
                                                                                       equations::CompressibleNavierStokesDiffusion3D{
                                                                                                                                      GradientVariablesPrimitive
@@ -345,7 +358,8 @@ end
                                                                         <:Adiabatic})(flux_inner,
                                                                                       u_inner,
                                                                                       normal::AbstractVector,
-                                                                                      x, t,
+                                                                                      x,
+                                                                                      t,
                                                                                       operator_type::Divergence,
                                                                                       equations::CompressibleNavierStokesDiffusion3D{
                                                                                                                                      GradientVariablesPrimitive
@@ -367,7 +381,8 @@ end
                                                                         <:Isothermal})(flux_inner,
                                                                                        u_inner,
                                                                                        normal::AbstractVector,
-                                                                                       x, t,
+                                                                                       x,
+                                                                                       t,
                                                                                        operator_type::Gradient,
                                                                                        equations::CompressibleNavierStokesDiffusion3D{
                                                                                                                                       GradientVariablesPrimitive
@@ -384,7 +399,8 @@ end
                                                                         <:Isothermal})(flux_inner,
                                                                                        u_inner,
                                                                                        normal::AbstractVector,
-                                                                                       x, t,
+                                                                                       x,
+                                                                                       t,
                                                                                        operator_type::Divergence,
                                                                                        equations::CompressibleNavierStokesDiffusion3D{
                                                                                                                                       GradientVariablesPrimitive
@@ -404,7 +420,8 @@ end
                                                                         <:Adiabatic})(flux_inner,
                                                                                       w_inner,
                                                                                       normal::AbstractVector,
-                                                                                      x, t,
+                                                                                      x,
+                                                                                      t,
                                                                                       operator_type::Gradient,
                                                                                       equations::CompressibleNavierStokesDiffusion3D{
                                                                                                                                      GradientVariablesEntropy
@@ -422,7 +439,8 @@ end
                                                                         <:Adiabatic})(flux_inner,
                                                                                       w_inner,
                                                                                       normal::AbstractVector,
-                                                                                      x, t,
+                                                                                      x,
+                                                                                      t,
                                                                                       operator_type::Divergence,
                                                                                       equations::CompressibleNavierStokesDiffusion3D{
                                                                                                                                      GradientVariablesEntropy
@@ -443,7 +461,8 @@ end
                                                                         <:Isothermal})(flux_inner,
                                                                                        w_inner,
                                                                                        normal::AbstractVector,
-                                                                                       x, t,
+                                                                                       x,
+                                                                                       t,
                                                                                        operator_type::Gradient,
                                                                                        equations::CompressibleNavierStokesDiffusion3D{
                                                                                                                                       GradientVariablesEntropy
@@ -463,7 +482,8 @@ end
                                                                         <:Isothermal})(flux_inner,
                                                                                        w_inner,
                                                                                        normal::AbstractVector,
-                                                                                       x, t,
+                                                                                       x,
+                                                                                       t,
                                                                                        operator_type::Divergence,
                                                                                        equations::CompressibleNavierStokesDiffusion3D{
                                                                                                                                       GradientVariablesEntropy
@@ -471,3 +491,4 @@ end
     return SVector(flux_inner[1], flux_inner[2], flux_inner[3], flux_inner[4],
                    flux_inner[5])
 end
+end # @muladd
diff --git a/test/test_parabolic_1d.jl b/test/test_parabolic_1d.jl
index 3c2b8855ce8..f00138c698c 100644
--- a/test/test_parabolic_1d.jl
+++ b/test/test_parabolic_1d.jl
@@ -80,8 +80,8 @@ isdir(outdir) && rm(outdir, recursive=true)
     @test_trixi_include(joinpath(examples_dir(), "tree_1d_dgsem", "elixir_navierstokes_convergence_walls_amr.jl"),
       equations_parabolic = CompressibleNavierStokesDiffusion1D(equations, mu=mu(),
                                                                 Prandtl=prandtl_number()),
-      l2 = [2.527877257772131e-5, 2.5539911566937718e-5, 0.0001211860451244785],
-      linf = [0.00014663867588948776, 0.00019422448348348196, 0.0009556439394007299]
+      l2 = [2.5278824700860636e-5, 2.5540078777006958e-5, 0.00012118655083858043],
+      linf = [0.0001466387075579334, 0.00019422427462629705, 0.0009556446847707178]
     )
   end
 
@@ -90,8 +90,8 @@ isdir(outdir) && rm(outdir, recursive=true)
       equations_parabolic = CompressibleNavierStokesDiffusion1D(equations, mu=mu(),
                                                                 Prandtl=prandtl_number(), 
                                                                 gradient_variables = GradientVariablesEntropy()),
-      l2 = [2.4593699163175966e-5, 2.392863645712634e-5, 0.00011252526651714956],
-      linf = [0.00011850555445525046, 0.0001898777490968537, 0.0009597561467877824]
+      l2 = [2.459359632523962e-5, 2.3928390718460263e-5, 0.00011252414117082376],
+      linf = [0.0001185052018830568, 0.00018987717854305393, 0.0009597503607920999]
     )
   end
 end

From 3bda0519dbd084937cd05eb339a4d35b1a6f4d9f Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Fri, 22 Sep 2023 13:14:40 +0200
Subject: [PATCH 157/163] Print leaf cells for tree meshes (#1645)

---
 src/meshes/tree_mesh.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/meshes/tree_mesh.jl b/src/meshes/tree_mesh.jl
index 93ba982bce9..05699d17d16 100644
--- a/src/meshes/tree_mesh.jl
+++ b/src/meshes/tree_mesh.jl
@@ -199,6 +199,7 @@ function Base.show(io::IO, ::MIME"text/plain",
             "length" => mesh.tree.length_level_0,
             "periodicity" => mesh.tree.periodicity,
             "current #cells" => mesh.tree.length,
+            "#leaf-cells" => count_leaf_cells(mesh.tree),
             "maximum #cells" => mesh.tree.capacity,
         ]
         summary_box(io, "TreeMesh{" * string(NDIMS) * ", " * string(TreeType) * "}",

From 5b203620b2b2b5eb009c2076a43d99b42f37f852 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Fri, 22 Sep 2023 14:40:55 +0200
Subject: [PATCH 158/163] set version to v0.5.44

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index d134a8e548b..fe062b1afaf 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.44-pre"
+version = "0.5.44"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From 0b4bf985846388b8cfee8dc4a2ce462b20b5d533 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <mail@ranocha.de>
Date: Fri, 22 Sep 2023 14:41:17 +0200
Subject: [PATCH 159/163] set development version to v0.5.45-pre

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index fe062b1afaf..69b2e872b6f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Trixi"
 uuid = "a7f1ee26-1774-49b1-8366-f1abc58fbfcb"
 authors = ["Michael Schlottke-Lakemper <m.schlottke-lakemper@hlrs.de>", "Gregor Gassner <ggassner@uni-koeln.de>", "Hendrik Ranocha <mail@ranocha.de>", "Andrew R. Winters <andrew.ross.winters@liu.se>", "Jesse Chan <jesse.chan@rice.edu>"]
-version = "0.5.44"
+version = "0.5.45-pre"
 
 [deps]
 CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"

From dc9b89fc48e2239dae1108d0719496d956ec19b3 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Tue, 26 Sep 2023 09:04:46 +0200
Subject: [PATCH 160/163] bump compat for SciMLBase.jl to v2 (#1647)

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 69b2e872b6f..d318389a6d2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -71,7 +71,7 @@ PrecompileTools = "1.1"
 RecipesBase = "1.1"
 Reexport = "1.0"
 Requires = "1.1"
-SciMLBase = "1.90"
+SciMLBase = "1.90, 2"
 Setfield = "0.8, 1"
 SimpleUnPack = "1.1"
 StartUpDG = "0.17"

From e45700d8453e848e069739ea3dee51d721185f71 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 1 Oct 2023 20:48:04 +0200
Subject: [PATCH 161/163] Bump crate-ci/typos from 1.16.9 to 1.16.15 (#1652)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.16.9 to 1.16.15.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.16.9...v1.16.15)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/SpellCheck.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index a06121e7ca1..e608dc8d7c1 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v3
       - name: Check spelling
-        uses: crate-ci/typos@v1.16.9
+        uses: crate-ci/typos@v1.16.15

From 0cf3e6768684f20e025411e5a10e4c6f41c928c6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Sun, 1 Oct 2023 22:17:39 +0200
Subject: [PATCH 162/163] Bump actions/checkout from 3 to 4 (#1653)

Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v3...v4)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
---
 .github/workflows/CacheNotebooks.yml    | 2 +-
 .github/workflows/DocPreviewCleanup.yml | 2 +-
 .github/workflows/Documenter.yml        | 2 +-
 .github/workflows/FormatCheck.yml       | 2 +-
 .github/workflows/Invalidations.yml     | 4 ++--
 .github/workflows/ReviewChecklist.yml   | 2 +-
 .github/workflows/SpellCheck.yml        | 2 +-
 .github/workflows/benchmark.yml         | 2 +-
 .github/workflows/ci.yml                | 4 ++--
 9 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/CacheNotebooks.yml b/.github/workflows/CacheNotebooks.yml
index c8599d13f26..f89560d8158 100644
--- a/.github/workflows/CacheNotebooks.yml
+++ b/.github/workflows/CacheNotebooks.yml
@@ -11,7 +11,7 @@ jobs:
     steps:
       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
       - name: Checkout caching branch
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           ref: tutorial_notebooks
 
diff --git a/.github/workflows/DocPreviewCleanup.yml b/.github/workflows/DocPreviewCleanup.yml
index 66d0b342b2e..0850369c9cc 100644
--- a/.github/workflows/DocPreviewCleanup.yml
+++ b/.github/workflows/DocPreviewCleanup.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout gh-pages branch
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           ref: gh-pages
 
diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml
index 6b557960c89..129c41a3b5c 100644
--- a/.github/workflows/Documenter.yml
+++ b/.github/workflows/Documenter.yml
@@ -33,7 +33,7 @@ jobs:
   build:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v1
         with:
           version: '1.9'
diff --git a/.github/workflows/FormatCheck.yml b/.github/workflows/FormatCheck.yml
index 628d938dd76..ce46360b832 100644
--- a/.github/workflows/FormatCheck.yml
+++ b/.github/workflows/FormatCheck.yml
@@ -20,7 +20,7 @@ jobs:
         with:
           version: ${{ matrix.julia-version }}
 
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Install JuliaFormatter and format
         # This will use the latest version by default but you can set the version like so:
         #
diff --git a/.github/workflows/Invalidations.yml b/.github/workflows/Invalidations.yml
index ba81f83e0ad..18048d26be8 100644
--- a/.github/workflows/Invalidations.yml
+++ b/.github/workflows/Invalidations.yml
@@ -19,12 +19,12 @@ jobs:
     - uses: julia-actions/setup-julia@v1
       with:
         version: '1'
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - uses: julia-actions/julia-buildpkg@v1
     - uses: julia-actions/julia-invalidations@v1
       id: invs_pr
 
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
       with:
         ref: ${{ github.event.repository.default_branch }}
     - uses: julia-actions/julia-buildpkg@v1
diff --git a/.github/workflows/ReviewChecklist.yml b/.github/workflows/ReviewChecklist.yml
index 959a04752d7..d8854411804 100644
--- a/.github/workflows/ReviewChecklist.yml
+++ b/.github/workflows/ReviewChecklist.yml
@@ -12,7 +12,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Add review checklist
         uses: trixi-framework/add-pr-review-checklist@v1
         with:
diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index e608dc8d7c1..eae6d8e0be9 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -8,6 +8,6 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout Actions Repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Check spelling
         uses: crate-ci/typos@v1.16.15
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index c5c95558c8c..2ea30d6fddb 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -15,7 +15,7 @@ jobs:
         arch:
           - x64
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           fetch-depth: 0
       - run: |
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4790f93d913..cf8107736e9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -101,7 +101,7 @@ jobs:
             arch: x64
             trixi_test: threaded
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.version }}
@@ -175,7 +175,7 @@ jobs:
       # Instead, we use the more tedious approach described above.
       # At first, we check out the repository and download all artifacts
       # (and list files for debugging).
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: actions/download-artifact@v3
       - run: ls -R
       # Next, we merge the individual coverage files and upload

From 9e41775351ddf1d92b32bbd6c685536ee004466c Mon Sep 17 00:00:00 2001
From: Jesse Chan <1156048+jlchan@users.noreply.github.com>
Date: Mon, 2 Oct 2023 01:16:37 -0500
Subject: [PATCH 163/163] Add updates and "parabolic terms" row to overview.md
 (#1651)

* update overview.md

* add note on parabolic terms
---
 docs/src/overview.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/src/overview.md b/docs/src/overview.md
index 51a6272ae8e..9cd11a5df93 100644
--- a/docs/src/overview.md
+++ b/docs/src/overview.md
@@ -55,14 +55,15 @@ different features on different mesh types.
 | Element type                                                 | line, square, cube |     line, quadᵃ, hexᵃ    |             quadᵃ            |     quadᵃ, hexᵃ     |    simplex, quadᵃ, hexᵃ    |
 | Adaptive mesh refinement                                     |          ✅         |             ❌            |               ❌              |          ✅          |               ❌            | [`AMRCallback`](@ref)
 | Solver type                                                  |   [`DGSEM`](@ref)  |      [`DGSEM`](@ref)     |        [`DGSEM`](@ref)       |   [`DGSEM`](@ref)   |       [`DGMulti`](@ref)    |
-| Domain                                                       |      hypercube     |     mapped hypercube     |           arbitrary          |      arbitrary      |       arbitraryᵇ   |
+| Domain                                                       |      hypercube     |     mapped hypercube     |           arbitrary          |      arbitrary      |       arbitrary    |
 | Weak form                                                    |          ✅         |             ✅            |               ✅              |          ✅          |               ✅            | [`VolumeIntegralWeakForm`](@ref)
 | Flux differencing                                            |          ✅         |             ✅            |               ✅              |          ✅          |               ✅            | [`VolumeIntegralFluxDifferencing`](@ref)
 | Shock capturing                                              |          ✅         |             ✅            |               ✅              |          ✅          |               ❌            | [`VolumeIntegralShockCapturingHG`](@ref)
 | Nonconservative equations                                    |          ✅         |             ✅            |               ✅              |          ✅          |               ✅            | e.g., GLM MHD or shallow water equations
+| Parabolic termsᵇ                                             |          ✅         |             ✅            |               ❌              |          ✅          |               ✅            | e.g., [`CompressibleNavierStokesDiffusion2D`](@ref)
 
 ᵃ: quad = quadrilateral, hex = hexahedron
-ᵇ: curved meshes supported for `SBP` and `GaussSBP` approximation types for `VolumeIntegralFluxDifferencing` solvers on quadrilateral and hexahedral `DGMultiMesh`es (non-conservative terms not yet supported)
+ᵇ: Parabolic terms do not currently support adaptivity. 
 
 ## Time integration methods