From 863795efc58dd9dd4d87c462565bad7a06d860cb Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Sun, 1 Dec 2024 17:10:20 +0100
Subject: [PATCH 1/8] More efficient PERK implementation (#2180)

* More efficient PERK implementation

* fmt

* test vals

* minor changes

* consistency

* change memory layout

* Time also PERK3

* remove unnecessary stuff

* test resize perk3

* make algorithms immutable
---
 .../elixir_burgers_perk3.jl                   |   4 +-
 .../tree_1d_dgsem/elixir_advection_perk2.jl   |   2 +-
 ext/TrixiConvexECOSExt.jl                     |   2 +-
 ext/TrixiNLsolveExt.jl                        |   2 +-
 .../methods_PERK2.jl                          |  75 ++++-------
 .../methods_PERK3.jl                          | 122 ++++++++----------
 .../paired_explicit_runge_kutta.jl            |  32 ++++-
 test/test_structured_1d.jl                    |  13 +-
 test/test_unit.jl                             |   8 +-
 9 files changed, 124 insertions(+), 136 deletions(-)

diff --git a/examples/structured_1d_dgsem/elixir_burgers_perk3.jl b/examples/structured_1d_dgsem/elixir_burgers_perk3.jl
index bf91fde74ea..5ff3aff3678 100644
--- a/examples/structured_1d_dgsem/elixir_burgers_perk3.jl
+++ b/examples/structured_1d_dgsem/elixir_burgers_perk3.jl
@@ -1,9 +1,9 @@
 # Convex and ECOS are imported because they are used for finding the optimal time step and optimal 
-# monomial coefficients in the stability polynomial of P-ERK time integrators.
+# monomial coefficients in the stability polynomial of PERK time integrators.
 using Convex, ECOS
 
 # NLsolve is imported to solve the system of nonlinear equations to find the coefficients
-# in the Butcher tableau in the third order P-ERK time integrator.
+# in the Butcher tableau in the third order PERK time integrator.
 using NLsolve
 
 using OrdinaryDiffEq
diff --git a/examples/tree_1d_dgsem/elixir_advection_perk2.jl b/examples/tree_1d_dgsem/elixir_advection_perk2.jl
index 69b587b42ec..86b173d6564 100644
--- a/examples/tree_1d_dgsem/elixir_advection_perk2.jl
+++ b/examples/tree_1d_dgsem/elixir_advection_perk2.jl
@@ -1,6 +1,6 @@
 
 # Convex and ECOS are imported because they are used for finding the optimal time step and optimal 
-# monomial coefficients in the stability polynomial of P-ERK time integrators.
+# monomial coefficients in the stability polynomial of PERK time integrators.
 using Convex, ECOS
 
 using OrdinaryDiffEq
diff --git a/ext/TrixiConvexECOSExt.jl b/ext/TrixiConvexECOSExt.jl
index 9b897436dbb..a83ac0a524f 100644
--- a/ext/TrixiConvexECOSExt.jl
+++ b/ext/TrixiConvexECOSExt.jl
@@ -1,7 +1,7 @@
 # Package extension for adding Convex-based features to Trixi.jl
 module TrixiConvexECOSExt
 
-# Required for coefficient optimization in P-ERK scheme integrators
+# Required for coefficient optimization in PERK scheme integrators
 if isdefined(Base, :get_extension)
     using Convex: MOI, solve!, Variable, minimize, evaluate
     using ECOS: Optimizer
diff --git a/ext/TrixiNLsolveExt.jl b/ext/TrixiNLsolveExt.jl
index fa188d04c71..2e6bca0ba7b 100644
--- a/ext/TrixiNLsolveExt.jl
+++ b/ext/TrixiNLsolveExt.jl
@@ -2,7 +2,7 @@
 module TrixiNLsolveExt
 
 # Required for finding coefficients in Butcher tableau in the third order of 
-# P-ERK scheme integrators
+# PERK scheme integrators
 if isdefined(Base, :get_extension)
     using NLsolve: nlsolve
 else
diff --git a/src/time_integration/paired_explicit_runge_kutta/methods_PERK2.jl b/src/time_integration/paired_explicit_runge_kutta/methods_PERK2.jl
index 2451680a505..37d994dc9bf 100644
--- a/src/time_integration/paired_explicit_runge_kutta/methods_PERK2.jl
+++ b/src/time_integration/paired_explicit_runge_kutta/methods_PERK2.jl
@@ -34,8 +34,8 @@ function compute_PairedExplicitRK2_butcher_tableau(num_stages, eig_vals, tspan,
     # - 2 Since first entry of A is always zero (explicit method) and second is given by c_2 (consistency)
     coeffs_max = num_stages - 2
 
-    a_matrix = zeros(coeffs_max, 2)
-    a_matrix[:, 1] = c[3:end]
+    a_matrix = zeros(2, coeffs_max)
+    a_matrix[1, :] = c[3:end]
 
     consistency_order = 2
 
@@ -56,8 +56,8 @@ function compute_PairedExplicitRK2_butcher_tableau(num_stages, eig_vals, tspan,
         num_monomial_coeffs = length(monomial_coeffs)
         @assert num_monomial_coeffs == coeffs_max
         A = compute_a_coeffs(num_stages, stage_scaling_factors, monomial_coeffs)
-        a_matrix[:, 1] -= A
-        a_matrix[:, 2] = A
+        a_matrix[1, :] -= A
+        a_matrix[2, :] = A
     end
 
     return a_matrix, c, dt_opt
@@ -78,8 +78,8 @@ function compute_PairedExplicitRK2_butcher_tableau(num_stages,
     # - 2 Since first entry of A is always zero (explicit method) and second is given by c_2 (consistency)
     coeffs_max = num_stages - 2
 
-    a_matrix = zeros(coeffs_max, 2)
-    a_matrix[:, 1] = c[3:end]
+    a_matrix = zeros(2, coeffs_max)
+    a_matrix[1, :] = c[3:end]
 
     path_monomial_coeffs = joinpath(base_path_monomial_coeffs,
                                     "gamma_" * string(num_stages) * ".txt")
@@ -91,8 +91,8 @@ function compute_PairedExplicitRK2_butcher_tableau(num_stages,
     @assert num_monomial_coeffs == coeffs_max
     A = compute_a_coeffs(num_stages, stage_scaling_factors, monomial_coeffs)
 
-    a_matrix[:, 1] -= A
-    a_matrix[:, 2] = A
+    a_matrix[1, :] -= A
+    a_matrix[2, :] = A
 
     return a_matrix, c
 end
@@ -131,16 +131,17 @@ optimized for a certain simulation setup (PDE, IC & BC, Riemann Solver, DG Solve
 
 Note: To use this integrator, the user must import the `Convex` and `ECOS` packages.
 """
-mutable struct PairedExplicitRK2 <: AbstractPairedExplicitRKSingle
-    const num_stages::Int
+struct PairedExplicitRK2 <: AbstractPairedExplicitRKSingle
+    num_stages::Int
 
     a_matrix::Matrix{Float64}
     c::Vector{Float64}
     b1::Float64
     bS::Float64
     cS::Float64
+
     dt_opt::Union{Float64, Nothing}
-end # struct PairedExplicitRK2
+end
 
 # Constructor that reads the coefficients from a file
 function PairedExplicitRK2(num_stages, base_path_monomial_coeffs::AbstractString,
@@ -200,9 +201,8 @@ mutable struct PairedExplicitRK2Integrator{RealT <: Real, uType, Params, Sol, F,
     finalstep::Bool # added for convenience
     dtchangeable::Bool
     force_stepfail::Bool
-    # PairedExplicitRK2 stages:
+    # Additional PERK register
     k1::uType
-    k_higher::uType
 end
 
 function init(ode::ODEProblem, alg::PairedExplicitRK2;
@@ -211,9 +211,7 @@ function init(ode::ODEProblem, alg::PairedExplicitRK2;
     du = zero(u0)
     u_tmp = zero(u0)
 
-    # PairedExplicitRK2 stages
-    k1 = zero(u0)
-    k_higher = zero(u0)
+    k1 = zero(u0) # Additional PERK register
 
     t0 = first(ode.tspan)
     tdir = sign(ode.tspan[end] - ode.tspan[1])
@@ -226,7 +224,7 @@ function init(ode::ODEProblem, alg::PairedExplicitRK2;
                                                                      ode.tspan;
                                                                      kwargs...),
                                              false, true, false,
-                                             k1, k_higher)
+                                             k1)
 
     # initialize callbacks
     if callback isa CallbackSet
@@ -262,48 +260,21 @@ function step!(integrator::PairedExplicitRK2Integrator)
     end
 
     @trixi_timeit timer() "Paired Explicit Runge-Kutta ODE integration step" begin
-        # k1
-        integrator.f(integrator.du, integrator.u, prob.p, integrator.t)
-        @threaded for i in eachindex(integrator.du)
-            integrator.k1[i] = integrator.du[i] * integrator.dt
-        end
-
-        # Construct current state
-        @threaded for i in eachindex(integrator.u)
-            integrator.u_tmp[i] = integrator.u[i] + alg.c[2] * integrator.k1[i]
-        end
-        # k2
-        integrator.f(integrator.du, integrator.u_tmp, prob.p,
-                     integrator.t + alg.c[2] * integrator.dt)
-
-        @threaded for i in eachindex(integrator.du)
-            integrator.k_higher[i] = integrator.du[i] * integrator.dt
-        end
+        # First and second stage are identical across all single/standalone PERK methods
+        PERK_k1!(integrator, prob.p)
+        PERK_k2!(integrator, prob.p, alg)
 
         # Higher stages
         for stage in 3:(alg.num_stages)
-            # Construct current state
-            @threaded for i in eachindex(integrator.u)
-                integrator.u_tmp[i] = integrator.u[i] +
-                                      alg.a_matrix[stage - 2, 1] *
-                                      integrator.k1[i] +
-                                      alg.a_matrix[stage - 2, 2] *
-                                      integrator.k_higher[i]
-            end
-
-            integrator.f(integrator.du, integrator.u_tmp, prob.p,
-                         integrator.t + alg.c[stage] * integrator.dt)
-
-            @threaded for i in eachindex(integrator.du)
-                integrator.k_higher[i] = integrator.du[i] * integrator.dt
-            end
+            PERK_ki!(integrator, prob.p, alg, stage)
         end
 
         @threaded for i in eachindex(integrator.u)
-            integrator.u[i] += alg.b1 * integrator.k1[i] +
-                               alg.bS * integrator.k_higher[i]
+            integrator.u[i] += integrator.dt *
+                               (alg.b1 * integrator.k1[i] +
+                                alg.bS * integrator.du[i])
         end
-    end # PairedExplicitRK2 step
+    end
 
     integrator.iter += 1
     integrator.t += integrator.dt
diff --git a/src/time_integration/paired_explicit_runge_kutta/methods_PERK3.jl b/src/time_integration/paired_explicit_runge_kutta/methods_PERK3.jl
index 02bc3eeba36..5be1d498d59 100644
--- a/src/time_integration/paired_explicit_runge_kutta/methods_PERK3.jl
+++ b/src/time_integration/paired_explicit_runge_kutta/methods_PERK3.jl
@@ -59,11 +59,11 @@ function compute_PairedExplicitRK3_butcher_tableau(num_stages, tspan,
                                                     monomial_coeffs, c;
                                                     verbose)
     end
-    # Fill A-matrix in P-ERK style
-    a_matrix = zeros(num_stages - 2, 2)
-    a_matrix[:, 1] = c[3:end]
-    a_matrix[:, 1] -= a_unknown
-    a_matrix[:, 2] = a_unknown
+    # Fill A-matrix in PERK style
+    a_matrix = zeros(2, num_stages - 2)
+    a_matrix[1, :] = c[3:end]
+    a_matrix[1, :] -= a_unknown
+    a_matrix[2, :] = a_unknown
 
     return a_matrix, c, dt_opt
 end
@@ -80,8 +80,8 @@ function compute_PairedExplicitRK3_butcher_tableau(num_stages,
     # - 2 Since First entry of A is always zero (explicit method) and second is given by c_2 (consistency)
     a_coeffs_max = num_stages - 2
 
-    a_matrix = zeros(a_coeffs_max, 2)
-    a_matrix[:, 1] = c[3:end]
+    a_matrix = zeros(2, a_coeffs_max)
+    a_matrix[1, :] = c[3:end]
 
     path_a_coeffs = joinpath(base_path_a_coeffs,
                              "a_" * string(num_stages) * ".txt")
@@ -91,9 +91,9 @@ function compute_PairedExplicitRK3_butcher_tableau(num_stages,
     num_a_coeffs = size(a_coeffs, 1)
 
     @assert num_a_coeffs == a_coeffs_max
-    # Fill A-matrix in P-ERK style
-    a_matrix[:, 1] -= a_coeffs
-    a_matrix[:, 2] = a_coeffs
+    # Fill A-matrix in PERK style
+    a_matrix[1, :] -= a_coeffs
+    a_matrix[2, :] = a_coeffs
 
     return a_matrix, c
 end
@@ -107,7 +107,7 @@ end
                       verbose = false, cS2 = 1.0f0)
 
     Parameters:
-    - `num_stages` (`Int`): Number of stages in the paired explicit Runge-Kutta (P-ERK) method.
+    - `num_stages` (`Int`): Number of stages in the paired explicit Runge-Kutta (PERK) method.
     - `base_path_a_coeffs` (`AbstractString`): Path to a file containing some coefficients in the A-matrix in 
       the Butcher tableau of the Runge Kutta method.
       The matrix should be stored in a text file at `joinpath(base_path_a_coeffs, "a_$(num_stages).txt")` and separated by line breaks.
@@ -122,7 +122,7 @@ end
       s is the number of stages, default is 1.0f0.
 
 The following structures and methods provide an implementation of
-the third-order paired explicit Runge-Kutta (P-ERK) method
+the third-order paired explicit Runge-Kutta (PERK) method
 optimized for a certain simulation setup (PDE, IC & BC, Riemann Solver, DG Solver).
 The original paper is
 - Nasab, Vermeire (2022)
@@ -135,13 +135,14 @@ Multirate Time-Integration based on Dynamic ODE Partitioning through Adaptively
 
 Note: To use this integrator, the user must import the `Convex`, `ECOS`, and `NLsolve` packages.
 """
-mutable struct PairedExplicitRK3 <: AbstractPairedExplicitRKSingle
-    const num_stages::Int # S
+struct PairedExplicitRK3 <: AbstractPairedExplicitRKSingle
+    num_stages::Int # S
 
     a_matrix::Matrix{Float64}
     c::Vector{Float64}
+
     dt_opt::Union{Float64, Nothing}
-end # struct PairedExplicitRK3
+end
 
 # Constructor for previously computed A Coeffs
 function PairedExplicitRK3(num_stages, base_path_a_coeffs::AbstractString,
@@ -195,9 +196,9 @@ mutable struct PairedExplicitRK3Integrator{RealT <: Real, uType, Params, Sol, F,
     finalstep::Bool # added for convenience
     dtchangeable::Bool
     force_stepfail::Bool
-    # PairedExplicitRK stages:
+    # Additional PERK3 registers
     k1::uType
-    k_higher::uType
+    kS1::uType
 end
 
 function init(ode::ODEProblem, alg::PairedExplicitRK3;
@@ -206,9 +207,9 @@ function init(ode::ODEProblem, alg::PairedExplicitRK3;
     du = zero(u0)
     u_tmp = zero(u0)
 
-    # PairedExplicitRK stages
+    # Additional PERK3 registers
     k1 = zero(u0)
-    k_higher = zero(u0)
+    kS1 = zero(u0)
 
     t0 = first(ode.tspan)
     tdir = sign(ode.tspan[end] - ode.tspan[1])
@@ -221,7 +222,7 @@ function init(ode::ODEProblem, alg::PairedExplicitRK3;
                                                                      ode.tspan;
                                                                      kwargs...),
                                              false, true, false,
-                                             k1, k_higher)
+                                             k1, kS1)
 
     # initialize callbacks
     if callback isa CallbackSet
@@ -258,72 +259,42 @@ function step!(integrator::PairedExplicitRK3Integrator)
     end
 
     @trixi_timeit timer() "Paired Explicit Runge-Kutta ODE integration step" begin
-        # k1
-        integrator.f(integrator.du, integrator.u, prob.p, integrator.t)
-        @threaded for i in eachindex(integrator.du)
-            integrator.k1[i] = integrator.du[i] * integrator.dt
-        end
-
-        # Construct current state
-        @threaded for i in eachindex(integrator.du)
-            integrator.u_tmp[i] = integrator.u[i] + alg.c[2] * integrator.k1[i]
-        end
-        # k2
-        integrator.f(integrator.du, integrator.u_tmp, prob.p,
-                     integrator.t + alg.c[2] * integrator.dt)
-
-        @threaded for i in eachindex(integrator.du)
-            integrator.k_higher[i] = integrator.du[i] * integrator.dt
-        end
+        # First and second stage are identical across all single/standalone PERK methods
+        PERK_k1!(integrator, prob.p)
+        PERK_k2!(integrator, prob.p, alg)
 
-        # Higher stages
         for stage in 3:(alg.num_stages - 1)
-            # Construct current state
-            @threaded for i in eachindex(integrator.du)
-                integrator.u_tmp[i] = integrator.u[i] +
-                                      alg.a_matrix[stage - 2, 1] *
-                                      integrator.k1[i] +
-                                      alg.a_matrix[stage - 2, 2] *
-                                      integrator.k_higher[i]
-            end
-
-            integrator.f(integrator.du, integrator.u_tmp, prob.p,
-                         integrator.t + alg.c[stage] * integrator.dt)
-
-            @threaded for i in eachindex(integrator.du)
-                integrator.k_higher[i] = integrator.du[i] * integrator.dt
-            end
+            PERK_ki!(integrator, prob.p, alg, stage)
         end
 
-        # Last stage
-        @threaded for i in eachindex(integrator.du)
-            integrator.u_tmp[i] = integrator.u[i] +
-                                  alg.a_matrix[alg.num_stages - 2, 1] *
-                                  integrator.k1[i] +
-                                  alg.a_matrix[alg.num_stages - 2, 2] *
-                                  integrator.k_higher[i]
+        # We need to store `du` of the S-1 stage in `kS1` for the final update:
+        @threaded for i in eachindex(integrator.u)
+            integrator.kS1[i] = integrator.du[i]
         end
 
-        integrator.f(integrator.du, integrator.u_tmp, prob.p,
-                     integrator.t + alg.c[alg.num_stages] * integrator.dt)
+        PERK_ki!(integrator, prob.p, alg, alg.num_stages)
 
         @threaded for i in eachindex(integrator.u)
             # "Own" PairedExplicitRK based on SSPRK33.
-            # Note that 'k_higher' carries the values of K_{S-1}
+            # Note that 'kS1' carries the values of K_{S-1}
             # and that we construct 'K_S' "in-place" from 'integrator.du'
-            integrator.u[i] += (integrator.k1[i] + integrator.k_higher[i] +
-                                4.0 * integrator.du[i] * integrator.dt) / 6.0
+            integrator.u[i] += integrator.dt *
+                               (integrator.k1[i] + integrator.kS1[i] +
+                                4.0 * integrator.du[i]) / 6.0
         end
-    end # PairedExplicitRK step timer
+    end
 
     integrator.iter += 1
     integrator.t += integrator.dt
 
-    # handle callbacks
-    if callbacks isa CallbackSet
-        for cb in callbacks.discrete_callbacks
-            if cb.condition(integrator.u, integrator.t, integrator)
-                cb.affect!(integrator)
+    @trixi_timeit timer() "Step-Callbacks" begin
+        # handle callbacks
+        if callbacks isa CallbackSet
+            foreach(callbacks.discrete_callbacks) do cb
+                if cb.condition(integrator.u, integrator.t, integrator)
+                    cb.affect!(integrator)
+                end
+                return nothing
             end
         end
     end
@@ -334,4 +305,13 @@ function step!(integrator::PairedExplicitRK3Integrator)
         terminate!(integrator)
     end
 end
+
+function Base.resize!(integrator::PairedExplicitRK3Integrator, new_size)
+    resize!(integrator.u, new_size)
+    resize!(integrator.du, new_size)
+    resize!(integrator.u_tmp, new_size)
+
+    resize!(integrator.k1, new_size)
+    resize!(integrator.kS1, new_size)
+end
 end # @muladd
diff --git a/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl b/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl
index c606326738f..87f067fc39d 100644
--- a/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl
+++ b/src/time_integration/paired_explicit_runge_kutta/paired_explicit_runge_kutta.jl
@@ -108,13 +108,42 @@ function solve!(integrator::AbstractPairedExplicitRKIntegrator)
 
     @trixi_timeit timer() "main loop" while !integrator.finalstep
         step!(integrator)
-    end # "main loop" timer
+    end
 
     return TimeIntegratorSolution((first(prob.tspan), integrator.t),
                                   (prob.u0, integrator.u),
                                   integrator.sol.prob)
 end
 
+# Function that computes the first stage of a general PERK method
+@inline function PERK_k1!(integrator::AbstractPairedExplicitRKIntegrator, p)
+    integrator.f(integrator.k1, integrator.u, p, integrator.t)
+end
+
+@inline function PERK_k2!(integrator::AbstractPairedExplicitRKSingleIntegrator, p, alg)
+    @threaded for i in eachindex(integrator.du)
+        integrator.u_tmp[i] = integrator.u[i] +
+                              alg.c[2] * integrator.dt * integrator.k1[i]
+    end
+
+    integrator.f(integrator.du, integrator.u_tmp, p,
+                 integrator.t + alg.c[2] * integrator.dt)
+end
+
+@inline function PERK_ki!(integrator::AbstractPairedExplicitRKSingleIntegrator, p, alg,
+                          stage)
+    # Construct current state
+    @threaded for i in eachindex(integrator.u)
+        integrator.u_tmp[i] = integrator.u[i] +
+                              integrator.dt *
+                              (alg.a_matrix[1, stage - 2] * integrator.k1[i] +
+                               alg.a_matrix[2, stage - 2] * integrator.du[i])
+    end
+
+    integrator.f(integrator.du, integrator.u_tmp, p,
+                 integrator.t + alg.c[stage] * integrator.dt)
+end
+
 # used for AMR (Adaptive Mesh Refinement)
 function Base.resize!(integrator::AbstractPairedExplicitRKIntegrator, new_size)
     resize!(integrator.u, new_size)
@@ -122,7 +151,6 @@ function Base.resize!(integrator::AbstractPairedExplicitRKIntegrator, new_size)
     resize!(integrator.u_tmp, new_size)
 
     resize!(integrator.k1, new_size)
-    resize!(integrator.k_higher, new_size)
 end
 
 # get a cache where the RHS can be stored
diff --git a/test/test_structured_1d.jl b/test/test_structured_1d.jl
index 3061257d508..ccb0c8cacac 100644
--- a/test/test_structured_1d.jl
+++ b/test/test_structured_1d.jl
@@ -72,6 +72,15 @@ end
         du_ode = similar(u_ode)
         @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 8000
     end
+
+    # Test `resize!`
+    integrator = init(ode, ode_algorithm, dt = 42.0, callback = callbacks)
+    resize!(integrator, 42)
+    @test length(integrator.u) == 42
+    @test length(integrator.du) == 42
+    @test length(integrator.u_tmp) == 42
+    @test length(integrator.k1) == 42
+    @test length(integrator.kS1) == 42
 end
 
 # Testing the third-order paired explicit Runge-Kutta (PERK) method without stepsize callback
@@ -82,8 +91,8 @@ end
                         save_solution=SaveSolutionCallback(dt = 0.1 + 1.0e-8), # Adding a small epsilon to avoid floating-point precision issues
                         callbacks=CallbackSet(summary_callback, save_solution,
                                               analysis_callback, alive_callback),
-                        l2=[5.726144786001842e-7],
-                        linf=[3.430730019182704e-6])
+                        l2=[5.726144824784944e-7],
+                        linf=[3.43073006914274e-6])
     # Ensure that we do not have excessive memory allocations
     # (e.g., from type instabilities)
     let
diff --git a/test/test_unit.jl b/test/test_unit.jl
index 641c7244efe..cd219050555 100644
--- a/test/test_unit.jl
+++ b/test/test_unit.jl
@@ -1700,7 +1700,7 @@ end
 
     ode_algorithm = Trixi.PairedExplicitRK2(6, path_coeff_file)
 
-    @test isapprox(ode_algorithm.a_matrix,
+    @test isapprox(transpose(ode_algorithm.a_matrix),
                    [0.12405417889682908 0.07594582110317093
                     0.16178873711001726 0.13821126288998273
                     0.16692313960864164 0.2330768603913584
@@ -1713,7 +1713,7 @@ end
     tspan = (0.0, 1.0)
     ode_algorithm = Trixi.PairedExplicitRK2(12, tspan, vec(eig_vals))
 
-    @test isapprox(ode_algorithm.a_matrix,
+    @test isapprox(transpose(ode_algorithm.a_matrix),
                    [0.06453812656711647 0.02637096434197444
                     0.09470601372274887 0.041657622640887494
                     0.12332877820069793 0.058489403617483886
@@ -1733,7 +1733,7 @@ end
 
     ode_algorithm = Trixi.PairedExplicitRK3(8, path_coeff_file)
 
-    @test isapprox(ode_algorithm.a_matrix,
+    @test isapprox(transpose(ode_algorithm.a_matrix),
                    [0.33551678438002486 0.06448322158043965
                     0.49653494442225443 0.10346507941960345
                     0.6496890912144586 0.15031092070647037
@@ -1748,7 +1748,7 @@ end
     tspan = (0.0, 1.0)
     ode_algorithm = Trixi.PairedExplicitRK3(13, tspan, vec(eig_vals))
 
-    @test isapprox(ode_algorithm.a_matrix,
+    @test isapprox(transpose(ode_algorithm.a_matrix),
                    [0.19121164778938382 0.008788355190848427
                     0.28723462747227385 0.012765384448655121
                     0.38017717196008227 0.019822834000382223

From 7580e9ae3b3538b8535344ec54c5229f7af31f46 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 2 Dec 2024 06:20:46 +0100
Subject: [PATCH 2/8] Bump crate-ci/typos from 1.27.0 to 1.28.1 (#2186)

Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.27.0 to 1.28.1.
- [Release notes](https://github.com/crate-ci/typos/releases)
- [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md)
- [Commits](https://github.com/crate-ci/typos/compare/v1.27.0...v1.28.1)

---
updated-dependencies:
- dependency-name: crate-ci/typos
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 .github/workflows/SpellCheck.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/SpellCheck.yml b/.github/workflows/SpellCheck.yml
index a40e187e82a..3d557915755 100644
--- a/.github/workflows/SpellCheck.yml
+++ b/.github/workflows/SpellCheck.yml
@@ -10,4 +10,4 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v4
       - name: Check spelling
-        uses: crate-ci/typos@v1.27.0
+        uses: crate-ci/typos@v1.28.1

From 50d30e9ef818052518fd48a065e60d509482e49f Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 2 Dec 2024 12:38:11 +0100
Subject: [PATCH 3/8] Bump codecov/codecov-action from 4 to 5 (#2185)

* Bump codecov/codecov-action from 4 to 5

Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 4 to 5.
- [Release notes](https://github.com/codecov/codecov-action/releases)
- [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md)
- [Commits](https://github.com/codecov/codecov-action/compare/v4...v5)

---
updated-dependencies:
- dependency-name: codecov/codecov-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update .github/workflows/ci.yml

Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
---
 .github/workflows/ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0c636ee8b0b..020dd72d0fe 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -133,9 +133,9 @@ jobs:
       - uses: julia-actions/julia-processcoverage@v1
         with:
           directories: src,examples,ext
-      - uses: codecov/codecov-action@v4
+      - uses: codecov/codecov-action@v5
         with:
-          file: ./lcov.info
+          files: ./lcov.info
           flags: unittests
           name: codecov-umbrella
           fail_ci_if_error: true

From 5db379bb21f2603391e6a68332c84d051ab09e9a Mon Sep 17 00:00:00 2001
From: Daniel Doehring <daniel.doehring@rwth-aachen.de>
Date: Mon, 2 Dec 2024 20:31:31 +0100
Subject: [PATCH 4/8] Arbitrary Precision LGL Basis (#2128)

* test higher precision

* try to get higher order convergence

* type stability

* fmt

* conservation

* correct

* working version

* changes

* rm ODE

* rm unused

* Apply suggestions from code review

Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>

* RealT for TreeMesh

* undo tree

* Revert "undo tree"

This reverts commit 8103d0a333c7e78339d4bbbdf88e35eedaec1491.

* ones, zeros

* Apply suggestions from code review

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>

* reprod test vals f32

* LV suggestion

* typo

* shorten

* data safety

* typesafety

* fix

* docstring

* comments

* Type general projection

* error

* syntax

* bring back conv

* tolerance always dynamic

* convert pi

* tighter tolerances

* warning only

* update some float test vals

* remove unused/NON_TESTED

* restructure arguments

* docstrings

* remove random 1000

* old behaviour

* compliance with deprecated dgsem

* Avoid breaking change

* avoid ambiguity

* pass datatype through

* do not cast to float

* remvoe comment

* more type generality

* do not mix up things

* move example

* tests

* News etc

* fmt

* update test vals to different basis

* try to disable warnings

* fmt

* CI hazzle

* fun with Ci

* more warnings

* Compat entries

* Tolerances for quad precision types

* Comment

* Apply suggestions from code review

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* comment

* shorter code for analysis

* Apply suggestions from code review

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>

---------

Co-authored-by: Joshua Lampert <51029046+JoshuaLampert@users.noreply.github.com>
Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 NEWS.md                                       |   2 +
 README.md                                     |   1 +
 docs/src/index.md                             |   1 +
 .../elixir_advection_float128.jl              |  60 +++++++
 .../elixir_advection_doublefloat.jl           |  63 ++++++++
 src/auxiliary/precompile.jl                   |   6 +-
 src/auxiliary/special_elixirs.jl              |  16 +-
 src/callbacks_step/analysis_dg1d.jl           |   9 +-
 src/callbacks_step/analysis_dg2d.jl           |   9 +-
 src/callbacks_step/analysis_dg2d_parallel.jl  |   9 +-
 src/callbacks_step/analysis_dg3d.jl           |  11 +-
 src/callbacks_step/analysis_dg3d_parallel.jl  |   9 +-
 src/solvers/dgsem/basis_lobatto_legendre.jl   | 153 +++++++++---------
 src/solvers/dgsem/l2projection.jl             |  28 ++--
 test/Project.toml                             |   4 +
 test/test_p4est_2d.jl                         |   4 +-
 test/test_structured_1d.jl                    |  14 ++
 test/test_tree_1d_advection.jl                |  14 ++
 test/test_tree_2d_mhd.jl                      |  34 ++--
 test/test_trixi.jl                            |  12 +-
 test/test_unit.jl                             |   6 +
 test/test_unstructured_2d.jl                  |   4 +-
 22 files changed, 330 insertions(+), 139 deletions(-)
 create mode 100644 examples/structured_1d_dgsem/elixir_advection_float128.jl
 create mode 100644 examples/tree_1d_dgsem/elixir_advection_doublefloat.jl

diff --git a/NEWS.md b/NEWS.md
index 2f71c45e57e..c187f229519 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -11,6 +11,8 @@ for human readability.
 - New time integrator `PairedExplicitRK3`, implementing the third-order paired explicit Runge-Kutta
   method with [Convex.jl](https://github.com/jump-dev/Convex.jl), [ECOS.jl](https://github.com/jump-dev/ECOS.jl),
   and [NLsolve.jl](https://github.com/JuliaNLSolvers/NLsolve.jl) ([#2008])
+- `LobattoLegendreBasis` and related datastructures made fully floating-type general,
+  enabling calculations with higher than double (`Float64`) precision ([#2128])
 
 ## Changes when updating to v0.9 from v0.8.x
 
diff --git a/README.md b/README.md
index 70c28799f31..5b58612bb09 100644
--- a/README.md
+++ b/README.md
@@ -33,6 +33,7 @@ installation and postprocessing procedures. Its features include:
   * Hierarchical quadtree/octree grid with adaptive mesh refinement
   * Forests of quadtrees/octrees with [p4est](https://github.com/cburstedde/p4est) via [P4est.jl](https://github.com/trixi-framework/P4est.jl)
 * High-order accuracy in space and time
+  * Arbitrary floating-point precision
 * Discontinuous Galerkin methods
   * Kinetic energy-preserving and entropy-stable methods based on flux differencing
   * Entropy-stable shock capturing
diff --git a/docs/src/index.md b/docs/src/index.md
index 88752cc7f11..40191e97b9a 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -26,6 +26,7 @@ installation and postprocessing procedures. Its features include:
   * Hierarchical quadtree/octree grid with adaptive mesh refinement
   * Forests of quadtrees/octrees with [p4est](https://github.com/cburstedde/p4est) via [P4est.jl](https://github.com/trixi-framework/P4est.jl)
 * High-order accuracy in space and time
+  * Arbitrary floating-point precision
 * Discontinuous Galerkin methods
   * Kinetic energy-preserving and entropy-stable methods based on flux differencing
   * Entropy-stable shock capturing
diff --git a/examples/structured_1d_dgsem/elixir_advection_float128.jl b/examples/structured_1d_dgsem/elixir_advection_float128.jl
new file mode 100644
index 00000000000..7f72eba58e3
--- /dev/null
+++ b/examples/structured_1d_dgsem/elixir_advection_float128.jl
@@ -0,0 +1,60 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+using Quadmath
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+# See https://github.com/JuliaMath/Quadmath.jl
+RealT = Float128
+
+advection_velocity = 4 / 3 # Does not need to be in higher precision
+equations = LinearScalarAdvectionEquation1D(advection_velocity)
+
+solver = DGSEM(RealT = RealT, polydeg = 13, surface_flux = flux_lax_friedrichs)
+
+# CARE: Important to use higher precision datatype for coordinates
+# as these are used for type promotion of the mesh (points etc.)
+coordinates_min = (-one(RealT),)
+coordinates_max = (one(RealT),)
+cells_per_dimension = (1,)
+
+# `StructuredMesh` infers datatype from coordinates
+mesh = StructuredMesh(cells_per_dimension, coordinates_min, coordinates_max)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
+                                    solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# CARE: Important to use higher precision datatype in specification of final time
+tspan = (zero(RealT), one(RealT))
+
+ode = semidiscretize(semi, tspan);
+
+summary_callback = SummaryCallback()
+
+analysis_callback = AnalysisCallback(semi, interval = 100,
+                                     extra_analysis_errors = (:conservation_error,))
+
+# cfl does not need to be in higher precision
+stepsize_callback = StepsizeCallback(cfl = 0.25)
+
+callbacks = CallbackSet(summary_callback,
+                        stepsize_callback,
+                        analysis_callback)
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, Feagin14(),
+            # Turn off adaptivity to avoid setting very small tolerances
+            adaptive = false,
+            dt = 42, # `dt` does not need to be in higher precision
+            save_everystep = false, callback = callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/examples/tree_1d_dgsem/elixir_advection_doublefloat.jl b/examples/tree_1d_dgsem/elixir_advection_doublefloat.jl
new file mode 100644
index 00000000000..8405d0125d0
--- /dev/null
+++ b/examples/tree_1d_dgsem/elixir_advection_doublefloat.jl
@@ -0,0 +1,63 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+using DoubleFloats
+
+###############################################################################
+# semidiscretization of the linear advection equation
+
+# See https://github.com/JuliaMath/DoubleFloats.jl
+RealT = Double64
+
+advection_velocity = 4 / 3 # Does not need to be in higher precision
+equations = LinearScalarAdvectionEquation1D(advection_velocity)
+
+solver = DGSEM(RealT = RealT, polydeg = 7, surface_flux = flux_lax_friedrichs)
+
+# CARE: Important to use higher precision datatype for coordinates
+# as these are used for type promotion of the mesh (points etc.)
+coordinates_min = -one(RealT) # minimum coordinate
+coordinates_max = one(RealT) # maximum coordinate
+
+# For `TreeMesh` the datatype has to be specified explicitly,
+# i.e., is not inferred from the coordinates.
+mesh = TreeMesh(coordinates_min, coordinates_max,
+                initial_refinement_level = 3,
+                n_cells_max = 30_000,
+                RealT = RealT)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition_convergence_test,
+                                    solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+# CARE: Important to use higher precision datatype in specification of final time
+tspan = (zero(RealT), one(RealT))
+
+ode = semidiscretize(semi, tspan);
+
+summary_callback = SummaryCallback()
+
+analysis_callback = AnalysisCallback(semi, interval = 100,
+                                     extra_analysis_errors = (:conservation_error,))
+
+# cfl does not need to be in higher precision
+stepsize_callback = StepsizeCallback(cfl = 1.4)
+
+callbacks = CallbackSet(summary_callback,
+                        stepsize_callback,
+                        analysis_callback)
+
+###############################################################################
+# run the simulation
+
+sol = solve(ode, DP8(),
+            # Turn off adaptivity to avoid setting very small tolerances
+            adaptive = false,
+            dt = 42, # `dt` does not need to be in higher precision
+            save_everystep = false, callback = callbacks);
+
+# Print the timer summary
+summary_callback()
diff --git a/src/auxiliary/precompile.jl b/src/auxiliary/precompile.jl
index dfda8ece687..5a1de036808 100644
--- a/src/auxiliary/precompile.jl
+++ b/src/auxiliary/precompile.jl
@@ -329,8 +329,10 @@ function _precompile_manual_()
     @assert Base.precompile(Tuple{typeof(Trixi.gauss_nodes_weights), Int})
     @assert Base.precompile(Tuple{typeof(Trixi.calc_forward_upper), Int})
     @assert Base.precompile(Tuple{typeof(Trixi.calc_forward_lower), Int})
-    @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_upper), Int, Val{:gauss}})
-    @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_lower), Int, Val{:gauss}})
+    @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_upper), Int,
+                                  Val{:gauss}})
+    @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_lower), Int,
+                                  Val{:gauss}})
     @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_upper), Int,
                                   Val{:gauss_lobatto}})
     @assert Base.precompile(Tuple{typeof(Trixi.calc_reverse_lower), Int,
diff --git a/src/auxiliary/special_elixirs.jl b/src/auxiliary/special_elixirs.jl
index d71a27aa96a..3e960945e7a 100644
--- a/src/auxiliary/special_elixirs.jl
+++ b/src/auxiliary/special_elixirs.jl
@@ -6,10 +6,11 @@
 #! format: noindent
 
 """
-    convergence_test([mod::Module=Main,] elixir::AbstractString, iterations; kwargs...)
+    convergence_test([mod::Module=Main,] elixir::AbstractString, iterations, RealT = Float64; kwargs...)
 
 Run `iterations` Trixi.jl simulations using the setup given in `elixir` and compute
 the experimental order of convergence (EOC) in the ``L^2`` and ``L^\\infty`` norm.
+Use `RealT` as the data type to represent the errors.
 In each iteration, the resolution of the respective mesh will be doubled.
 Additional keyword arguments `kwargs...` and the optional module `mod` are passed directly
 to [`trixi_include`](@ref).
@@ -18,12 +19,14 @@ This function assumes that the spatial resolution is set via the keywords
 `initial_refinement_level` (an integer) or `cells_per_dimension` (a tuple of
 integers, one per spatial dimension).
 """
-function convergence_test(mod::Module, elixir::AbstractString, iterations; kwargs...)
+function convergence_test(mod::Module, elixir::AbstractString, iterations,
+                          RealT = Float64;
+                          kwargs...)
     @assert(iterations>1,
             "Number of iterations must be bigger than 1 for a convergence analysis")
 
     # Types of errors to be calculated
-    errors = Dict(:l2 => Float64[], :linf => Float64[])
+    errors = Dict(:l2 => RealT[], :linf => RealT[])
 
     initial_resolution = extract_initial_resolution(elixir, kwargs)
 
@@ -105,7 +108,7 @@ function analyze_convergence(errors, iterations,
         println("")
 
         # Print mean EOCs
-        mean_values = zeros(nvariables)
+        mean_values = zeros(eltype(errors[:l2]), nvariables)
         for v in 1:nvariables
             mean_values[v] = sum(eocs[kind][:, v]) ./ length(eocs[kind][:, v])
             @printf("%-10s", "mean")
@@ -119,8 +122,9 @@ function analyze_convergence(errors, iterations,
     return eoc_mean_values
 end
 
-function convergence_test(elixir::AbstractString, iterations; kwargs...)
-    convergence_test(Main, elixir::AbstractString, iterations; kwargs...)
+function convergence_test(elixir::AbstractString, iterations, RealT = Float64;
+                          kwargs...)
+    convergence_test(Main, elixir::AbstractString, iterations, RealT; kwargs...)
 end
 
 # Helper methods used in the functions defined above
diff --git a/src/callbacks_step/analysis_dg1d.jl b/src/callbacks_step/analysis_dg1d.jl
index d2613c325be..b4d49a39d5a 100644
--- a/src/callbacks_step/analysis_dg1d.jl
+++ b/src/callbacks_step/analysis_dg1d.jl
@@ -61,16 +61,17 @@ function calc_error_norms(func, u, t, analyzer,
                                        inv.(view(inverse_jacobian, :, element)))
 
         # Calculate errors at each analysis node
-        @. jacobian_local = abs(jacobian_local)
-
         for i in eachnode(analyzer)
             u_exact = initial_condition(get_node_coords(x_local, equations, dg, i), t,
                                         equations)
             diff = func(u_exact, equations) -
                    func(get_node_vars(u_local, equations, dg, i), equations)
-            l2_error += diff .^ 2 * (weights[i] * jacobian_local[i])
+            # We take absolute value as we need the Jacobian here for the volume calculation
+            abs_jacobian_local_i = abs(jacobian_local[i])
+
+            l2_error += diff .^ 2 * (weights[i] * abs_jacobian_local_i)
             linf_error = @. max(linf_error, abs(diff))
-            total_volume += weights[i] * jacobian_local[i]
+            total_volume += weights[i] * abs_jacobian_local_i
         end
     end
 
diff --git a/src/callbacks_step/analysis_dg2d.jl b/src/callbacks_step/analysis_dg2d.jl
index e089133fa17..e33533738ec 100644
--- a/src/callbacks_step/analysis_dg2d.jl
+++ b/src/callbacks_step/analysis_dg2d.jl
@@ -161,16 +161,17 @@ function calc_error_norms(func, u, t, analyzer,
                                        jacobian_tmp1)
 
         # Calculate errors at each analysis node
-        @. jacobian_local = abs(jacobian_local)
-
         for j in eachnode(analyzer), i in eachnode(analyzer)
             u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j),
                                         t, equations)
             diff = func(u_exact, equations) -
                    func(get_node_vars(u_local, equations, dg, i, j), equations)
-            l2_error += diff .^ 2 * (weights[i] * weights[j] * jacobian_local[i, j])
+            # We take absolute value as we need the Jacobian here for the volume calculation
+            abs_jacobian_local_ij = abs(jacobian_local[i, j])
+
+            l2_error += diff .^ 2 * (weights[i] * weights[j] * abs_jacobian_local_ij)
             linf_error = @. max(linf_error, abs(diff))
-            total_volume += weights[i] * weights[j] * jacobian_local[i, j]
+            total_volume += weights[i] * weights[j] * abs_jacobian_local_ij
         end
     end
 
diff --git a/src/callbacks_step/analysis_dg2d_parallel.jl b/src/callbacks_step/analysis_dg2d_parallel.jl
index 5b3ae858ab7..10b6e30f95c 100644
--- a/src/callbacks_step/analysis_dg2d_parallel.jl
+++ b/src/callbacks_step/analysis_dg2d_parallel.jl
@@ -114,16 +114,17 @@ function calc_error_norms(func, u, t, analyzer,
                                        jacobian_tmp1)
 
         # Calculate errors at each analysis node
-        @. jacobian_local = abs(jacobian_local)
-
         for j in eachnode(analyzer), i in eachnode(analyzer)
             u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j),
                                         t, equations)
             diff = func(u_exact, equations) -
                    func(get_node_vars(u_local, equations, dg, i, j), equations)
-            l2_error += diff .^ 2 * (weights[i] * weights[j] * jacobian_local[i, j])
+            # We take absolute value as we need the Jacobian here for the volume calculation
+            abs_jacobian_local_ij = abs(jacobian_local[i, j])
+
+            l2_error += diff .^ 2 * (weights[i] * weights[j] * abs_jacobian_local_ij)
             linf_error = @. max(linf_error, abs(diff))
-            volume += weights[i] * weights[j] * jacobian_local[i, j]
+            volume += weights[i] * weights[j] * abs_jacobian_local_ij
         end
     end
 
diff --git a/src/callbacks_step/analysis_dg3d.jl b/src/callbacks_step/analysis_dg3d.jl
index 3c9c3a69f5e..efc1dc3bb3c 100644
--- a/src/callbacks_step/analysis_dg3d.jl
+++ b/src/callbacks_step/analysis_dg3d.jl
@@ -187,18 +187,19 @@ function calc_error_norms(func, u, t, analyzer,
                                        jacobian_tmp1, jacobian_tmp2)
 
         # Calculate errors at each analysis node
-        @. jacobian_local = abs(jacobian_local)
-
         for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer)
             u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j,
                                                         k), t, equations)
             diff = func(u_exact, equations) -
                    func(get_node_vars(u_local, equations, dg, i, j, k), equations)
+            # We take absolute value as we need the Jacobian here for the volume calculation
+            abs_jacobian_local_ijk = abs(jacobian_local[i, j, k])
+
             l2_error += diff .^ 2 *
-                        (weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k])
+                        (weights[i] * weights[j] * weights[k] * abs_jacobian_local_ijk)
             linf_error = @. max(linf_error, abs(diff))
-            total_volume += weights[i] * weights[j] * weights[k] *
-                            jacobian_local[i, j, k]
+            total_volume += (weights[i] * weights[j] * weights[k] *
+                             abs_jacobian_local_ijk)
         end
     end
 
diff --git a/src/callbacks_step/analysis_dg3d_parallel.jl b/src/callbacks_step/analysis_dg3d_parallel.jl
index 285f0f62de6..ab9ba6a0055 100644
--- a/src/callbacks_step/analysis_dg3d_parallel.jl
+++ b/src/callbacks_step/analysis_dg3d_parallel.jl
@@ -31,17 +31,18 @@ function calc_error_norms(func, u, t, analyzer,
                                        jacobian_tmp1, jacobian_tmp2)
 
         # Calculate errors at each analysis node
-        @. jacobian_local = abs(jacobian_local)
-
         for k in eachnode(analyzer), j in eachnode(analyzer), i in eachnode(analyzer)
             u_exact = initial_condition(get_node_coords(x_local, equations, dg, i, j,
                                                         k), t, equations)
             diff = func(u_exact, equations) -
                    func(get_node_vars(u_local, equations, dg, i, j, k), equations)
+            # We take absolute value as we need the Jacobian here for the volume calculation
+            abs_jacobian_local_ijk = abs(jacobian_local[i, j, k])
+
             l2_error += diff .^ 2 *
-                        (weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k])
+                        (weights[i] * weights[j] * weights[k] * abs_jacobian_local_ijk)
             linf_error = @. max(linf_error, abs(diff))
-            volume += weights[i] * weights[j] * weights[k] * jacobian_local[i, j, k]
+            volume += weights[i] * weights[j] * weights[k] * abs_jacobian_local_ijk
         end
     end
 
diff --git a/src/solvers/dgsem/basis_lobatto_legendre.jl b/src/solvers/dgsem/basis_lobatto_legendre.jl
index cac1dba9c74..3ea668c0264 100644
--- a/src/solvers/dgsem/basis_lobatto_legendre.jl
+++ b/src/solvers/dgsem/basis_lobatto_legendre.jl
@@ -6,7 +6,7 @@
 #! format: noindent
 
 """
-    LobattoLegendreBasis([RealT=Float64,] polydeg::Integer)
+    LobattoLegendreBasis([RealT = Float64,] polydeg::Integer)
 
 Create a nodal Lobatto-Legendre basis for polynomials of degree `polydeg`.
 
@@ -37,15 +37,14 @@ end
 function LobattoLegendreBasis(RealT, polydeg::Integer)
     nnodes_ = polydeg + 1
 
-    # compute everything using `Float64` by default
-    nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_)
+    nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_, RealT)
     inverse_weights_ = inv.(weights_)
 
-    _, inverse_vandermonde_legendre_ = vandermonde_legendre(nodes_)
+    _, inverse_vandermonde_legendre_ = vandermonde_legendre(nodes_, RealT)
 
-    boundary_interpolation_ = zeros(nnodes_, 2)
-    boundary_interpolation_[:, 1] = calc_lhat(-1.0, nodes_, weights_)
-    boundary_interpolation_[:, 2] = calc_lhat(1.0, nodes_, weights_)
+    boundary_interpolation_ = zeros(RealT, nnodes_, 2)
+    boundary_interpolation_[:, 1] = calc_lhat(-one(RealT), nodes_, weights_)
+    boundary_interpolation_[:, 2] = calc_lhat(one(RealT), nodes_, weights_)
 
     derivative_matrix_ = polynomial_derivative_matrix(nodes_)
     derivative_split_ = calc_dsplit(nodes_, weights_)
@@ -79,7 +78,6 @@ function LobattoLegendreBasis(RealT, polydeg::Integer)
                                                            derivative_split_transpose,
                                                            derivative_dhat)
 end
-
 LobattoLegendreBasis(polydeg::Integer) = LobattoLegendreBasis(Float64, polydeg)
 
 function Base.show(io::IO, basis::LobattoLegendreBasis)
@@ -165,11 +163,10 @@ function MortarL2(basis::LobattoLegendreBasis)
     RealT = real(basis)
     nnodes_ = nnodes(basis)
 
-    # compute everything using `Float64` by default
-    forward_upper_ = calc_forward_upper(nnodes_)
-    forward_lower_ = calc_forward_lower(nnodes_)
-    reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss))
-    reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss))
+    forward_upper_ = calc_forward_upper(nnodes_, RealT)
+    forward_lower_ = calc_forward_lower(nnodes_, RealT)
+    reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss), RealT)
+    reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss), RealT)
 
     # type conversions to get the requested real type and enable possible
     # optimizations of runtime performance and latency
@@ -228,10 +225,10 @@ end
 # end
 
 # function MortarEC(basis::LobattoLegendreBasis{RealT}, surface_flux)
-#   forward_upper   = calc_forward_upper(n_nodes)
-#   forward_lower   = calc_forward_lower(n_nodes)
-#   l2reverse_upper = calc_reverse_upper(n_nodes, Val(:gauss_lobatto))
-#   l2reverse_lower = calc_reverse_lower(n_nodes, Val(:gauss_lobatto))
+#   forward_upper   = calc_forward_upper(n_nodes, RealT)
+#   forward_lower   = calc_forward_lower(n_nodes, RealT)
+#   l2reverse_upper = calc_reverse_upper(n_nodes, Val(:gauss_lobatto), RealT)
+#   l2reverse_lower = calc_reverse_lower(n_nodes, Val(:gauss_lobatto), RealT)
 
 #   # type conversions to make use of StaticArrays etc.
 #   nnodes_ = nnodes(basis)
@@ -263,8 +260,7 @@ function SolutionAnalyzer(basis::LobattoLegendreBasis;
     RealT = real(basis)
     nnodes_ = analysis_polydeg + 1
 
-    # compute everything using `Float64` by default
-    nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_)
+    nodes_, weights_ = gauss_lobatto_nodes_weights(nnodes_, RealT)
     vandermonde_ = polynomial_interpolation_matrix(get_nodes(basis), nodes_)
 
     # type conversions to get the requested real type and enable possible
@@ -322,11 +318,10 @@ end
 function AdaptorL2(basis::LobattoLegendreBasis{RealT}) where {RealT}
     nnodes_ = nnodes(basis)
 
-    # compute everything using `Float64` by default
-    forward_upper_ = calc_forward_upper(nnodes_)
-    forward_lower_ = calc_forward_lower(nnodes_)
-    reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss))
-    reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss))
+    forward_upper_ = calc_forward_upper(nnodes_, RealT)
+    forward_lower_ = calc_forward_lower(nnodes_, RealT)
+    reverse_upper_ = calc_reverse_upper(nnodes_, Val(:gauss), RealT)
+    reverse_lower_ = calc_reverse_lower(nnodes_, Val(:gauss), RealT)
 
     # type conversions to get the requested real type and enable possible
     # optimizations of runtime performance and latency
@@ -408,7 +403,7 @@ end
 # This implements algorithm 37 "PolynomialDerivativeMatrix" from Kopriva's book.
 function polynomial_derivative_matrix(nodes)
     n_nodes = length(nodes)
-    d = zeros(n_nodes, n_nodes)
+    d = zeros(eltype(nodes), n_nodes, n_nodes)
     wbary = barycentric_weights(nodes)
 
     for i in 1:n_nodes, j in 1:n_nodes
@@ -481,7 +476,7 @@ For details, see (especially Section 3)
 """
 function barycentric_weights(nodes)
     n_nodes = length(nodes)
-    weights = ones(n_nodes)
+    weights = ones(eltype(nodes), n_nodes)
 
     for j in 2:n_nodes, k in 1:(j - 1)
         weights[k] *= nodes[k] - nodes[j]
@@ -528,7 +523,7 @@ For details, see e.g. Section 2 of
 """
 function lagrange_interpolating_polynomials(x, nodes, wbary)
     n_nodes = length(nodes)
-    polynomials = zeros(n_nodes)
+    polynomials = zeros(eltype(nodes), n_nodes)
 
     for i in 1:n_nodes
         # Avoid division by zero when `x` is close to node by using 
@@ -553,7 +548,7 @@ function lagrange_interpolating_polynomials(x, nodes, wbary)
 end
 
 """
-    gauss_lobatto_nodes_weights(n_nodes::Integer)
+    gauss_lobatto_nodes_weights(n_nodes::Integer, RealT = Float64)
 
 Computes nodes ``x_j`` and weights ``w_j`` for the (Legendre-)Gauss-Lobatto quadrature.
 This implements algorithm 25 "GaussLobattoNodesAndWeights" from the book
@@ -563,15 +558,14 @@ This implements algorithm 25 "GaussLobattoNodesAndWeights" from the book
   Algorithms for scientists and engineers. 
   [DOI:10.1007/978-90-481-2261-5](https://doi.org/10.1007/978-90-481-2261-5)
 """
-# From FLUXO (but really from blue book by Kopriva)
-function gauss_lobatto_nodes_weights(n_nodes::Integer)
-    # From Kopriva's book
-    n_iterations = 10
-    tolerance = 1e-15
+function gauss_lobatto_nodes_weights(n_nodes::Integer, RealT = Float64)
+    # From FLUXO (but really from blue book by Kopriva)
+    n_iterations = 20
+    tolerance = 2 * eps(RealT) # Relative tolerance for Newton iteration
 
     # Initialize output
-    nodes = zeros(n_nodes)
-    weights = zeros(n_nodes)
+    nodes = zeros(RealT, n_nodes)
+    weights = zeros(RealT, n_nodes)
 
     # Special case for polynomial degree zero (first order finite volume)
     if n_nodes == 1
@@ -584,15 +578,15 @@ function gauss_lobatto_nodes_weights(n_nodes::Integer)
     N = n_nodes - 1
 
     # Calculate values at boundary
-    nodes[1] = -1.0
-    nodes[end] = 1.0
-    weights[1] = 2 / (N * (N + 1))
+    nodes[1] = -1
+    nodes[end] = 1
+    weights[1] = RealT(2) / (N * (N + 1))
     weights[end] = weights[1]
 
     # Calculate interior values
     if N > 1
-        cont1 = pi / N
-        cont2 = 3 / (8 * N * pi)
+        cont1 = convert(RealT, pi) / N
+        cont2 = 3 / (8 * N * convert(RealT, pi))
 
         # Use symmetry -> only left side is computed
         for i in 1:(div(N + 1, 2) - 1)
@@ -608,6 +602,10 @@ function gauss_lobatto_nodes_weights(n_nodes::Integer)
                 if abs(dx) < tolerance * abs(nodes[i + 1])
                     break
                 end
+
+                if k == n_iterations
+                    @warn "`gauss_lobatto_nodes_weights` Newton iteration did not converge"
+                end
             end
 
             # Calculate weight
@@ -622,8 +620,8 @@ function gauss_lobatto_nodes_weights(n_nodes::Integer)
 
     # If odd number of nodes, set center node to origin (= 0.0) and calculate weight
     if n_nodes % 2 == 1
-        _, _, L = calc_q_and_l(N, 0)
-        nodes[div(N, 2) + 1] = 0.0
+        _, _, L = calc_q_and_l(N, zero(RealT))
+        nodes[div(N, 2) + 1] = 0
         weights[div(N, 2) + 1] = weights[1] / L^2
     end
 
@@ -631,11 +629,13 @@ function gauss_lobatto_nodes_weights(n_nodes::Integer)
 end
 
 # From FLUXO (but really from blue book by Kopriva, algorithm 24)
-function calc_q_and_l(N::Integer, x::Float64)
-    L_Nm2 = 1.0
+function calc_q_and_l(N::Integer, x::Real)
+    RealT = typeof(x)
+
+    L_Nm2 = one(RealT)
     L_Nm1 = x
-    Lder_Nm2 = 0.0
-    Lder_Nm1 = 1.0
+    Lder_Nm2 = zero(RealT)
+    Lder_Nm1 = one(RealT)
 
     local L
     for i in 2:N
@@ -652,10 +652,9 @@ function calc_q_and_l(N::Integer, x::Float64)
 
     return q, qder, L
 end
-calc_q_and_l(N::Integer, x::Real) = calc_q_and_l(N, convert(Float64, x))
 
 """
-    gauss_nodes_weights(n_nodes::Integer)
+    gauss_nodes_weights(n_nodes::Integer, RealT = Float64)
 
 Computes nodes ``x_j`` and weights ``w_j`` for the Gauss-Legendre quadrature.
 This implements algorithm 23 "LegendreGaussNodesAndWeights" from the book
@@ -665,31 +664,30 @@ This implements algorithm 23 "LegendreGaussNodesAndWeights" from the book
   Algorithms for scientists and engineers. 
   [DOI:10.1007/978-90-481-2261-5](https://doi.org/10.1007/978-90-481-2261-5)
 """
-function gauss_nodes_weights(n_nodes::Integer)
-    # From Kopriva's book
-    n_iterations = 10
-    tolerance = 1e-15
+function gauss_nodes_weights(n_nodes::Integer, RealT = Float64)
+    n_iterations = 20
+    tolerance = 2 * eps(RealT) # Relative tolerance for Newton iteration
 
     # Initialize output
-    nodes = ones(n_nodes) * 1000
-    weights = zeros(n_nodes)
+    nodes = ones(RealT, n_nodes)
+    weights = zeros(RealT, n_nodes)
 
     # Get polynomial degree for convenience
     N = n_nodes - 1
     if N == 0
-        nodes .= 0.0
-        weights .= 2.0
+        nodes .= 0
+        weights .= 2
         return nodes, weights
     elseif N == 1
-        nodes[1] = -sqrt(1 / 3)
+        nodes[1] = -sqrt(one(RealT) / 3)
         nodes[end] = -nodes[1]
-        weights .= 1.0
+        weights .= 1
         return nodes, weights
     else # N > 1
         # Use symmetry property of the roots of the Legendre polynomials
         for i in 0:(div(N + 1, 2) - 1)
             # Starting guess for Newton method
-            nodes[i + 1] = -cos(pi / (2 * N + 2) * (2 * i + 1))
+            nodes[i + 1] = -cos(convert(RealT, pi) / (2 * N + 2) * (2 * i + 1))
 
             # Newton iteration to find root of Legendre polynomial (= integration node)
             for k in 0:n_iterations
@@ -699,6 +697,10 @@ function gauss_nodes_weights(n_nodes::Integer)
                 if abs(dx) < tolerance * abs(nodes[i + 1])
                     break
                 end
+
+                if k == n_iterations
+                    @warn "`gauss_nodes_weights` Newton iteration did not converge"
+                end
             end
 
             # Calculate weight
@@ -712,8 +714,8 @@ function gauss_nodes_weights(n_nodes::Integer)
 
         # If odd number of nodes, set center node to origin (= 0.0) and calculate weight
         if n_nodes % 2 == 1
-            poly, deriv = legendre_polynomial_and_derivative(N + 1, 0.0)
-            nodes[div(N, 2) + 1] = 0.0
+            poly, deriv = legendre_polynomial_and_derivative(N + 1, zero(RealT))
+            nodes[div(N, 2) + 1] = 0
             weights[div(N, 2) + 1] = (2 * N + 3) / deriv^2
         end
 
@@ -733,20 +735,21 @@ This implements algorithm 22 "LegendrePolynomialAndDerivative" from the book
   [DOI:10.1007/978-90-481-2261-5](https://doi.org/10.1007/978-90-481-2261-5)
 """
 function legendre_polynomial_and_derivative(N::Int, x::Real)
+    RealT = typeof(x)
     if N == 0
-        poly = 1.0
-        deriv = 0.0
+        poly = one(RealT)
+        deriv = zero(RealT)
     elseif N == 1
-        poly = convert(Float64, x)
-        deriv = 1.0
+        poly = x
+        deriv = one(RealT)
     else
-        poly_Nm2 = 1.0
-        poly_Nm1 = convert(Float64, x)
-        deriv_Nm2 = 0.0
-        deriv_Nm1 = 1.0
+        poly_Nm2 = one(RealT)
+        poly_Nm1 = copy(x)
+        deriv_Nm2 = zero(RealT)
+        deriv_Nm1 = one(RealT)
 
-        poly = 0.0
-        deriv = 0.0
+        poly = zero(RealT)
+        deriv = zero(RealT)
         for i in 2:N
             poly = ((2 * i - 1) * x * poly_Nm1 - (i - 1) * poly_Nm2) / i
             deriv = deriv_Nm2 + (2 * i - 1) * poly_Nm1
@@ -765,10 +768,10 @@ function legendre_polynomial_and_derivative(N::Int, x::Real)
 end
 
 # Calculate Legendre vandermonde matrix and its inverse
-function vandermonde_legendre(nodes, N)
+function vandermonde_legendre(nodes, N::Integer, RealT = Float64)
     n_nodes = length(nodes)
     n_modes = N + 1
-    vandermonde = zeros(n_nodes, n_modes)
+    vandermonde = zeros(RealT, n_nodes, n_modes)
 
     for i in 1:n_nodes
         for m in 1:n_modes
@@ -779,5 +782,7 @@ function vandermonde_legendre(nodes, N)
     inverse_vandermonde = inv(vandermonde)
     return vandermonde, inverse_vandermonde
 end
-vandermonde_legendre(nodes) = vandermonde_legendre(nodes, length(nodes) - 1)
+vandermonde_legendre(nodes, RealT = Float64) = vandermonde_legendre(nodes,
+                                                                    length(nodes) - 1,
+                                                                    RealT)
 end # @muladd
diff --git a/src/solvers/dgsem/l2projection.jl b/src/solvers/dgsem/l2projection.jl
index 0bb46f5ca15..436c1b92032 100644
--- a/src/solvers/dgsem/l2projection.jl
+++ b/src/solvers/dgsem/l2projection.jl
@@ -23,9 +23,9 @@
 # Calculate forward projection matrix for discrete L2 projection from large to upper
 #
 # Note: This is actually an interpolation.
-function calc_forward_upper(n_nodes)
+function calc_forward_upper(n_nodes, RealT = Float64)
     # Calculate nodes, weights, and barycentric weights
-    nodes, weights = gauss_lobatto_nodes_weights(n_nodes)
+    nodes, _ = gauss_lobatto_nodes_weights(n_nodes, RealT)
     wbary = barycentric_weights(nodes)
 
     # Calculate projection matrix (actually: interpolation)
@@ -43,9 +43,9 @@ end
 # Calculate forward projection matrix for discrete L2 projection from large to lower
 #
 # Note: This is actually an interpolation.
-function calc_forward_lower(n_nodes)
+function calc_forward_lower(n_nodes, RealT = Float64)
     # Calculate nodes, weights, and barycentric weights
-    nodes, weights = gauss_lobatto_nodes_weights(n_nodes)
+    nodes, _ = gauss_lobatto_nodes_weights(n_nodes, RealT)
     wbary = barycentric_weights(nodes)
 
     # Calculate projection matrix (actually: interpolation)
@@ -64,9 +64,9 @@ end
 #
 # Note: To not make the L2 projection exact, first convert to Gauss nodes,
 # perform projection, and convert back to Gauss-Lobatto.
-function calc_reverse_upper(n_nodes, ::Val{:gauss})
+function calc_reverse_upper(n_nodes, ::Val{:gauss}, RealT = Float64)
     # Calculate nodes, weights, and barycentric weights for Legendre-Gauss
-    gauss_nodes, gauss_weights = gauss_nodes_weights(n_nodes)
+    gauss_nodes, gauss_weights = gauss_nodes_weights(n_nodes, RealT)
     gauss_wbary = barycentric_weights(gauss_nodes)
 
     # Calculate projection matrix (actually: discrete L2 projection with errors)
@@ -80,7 +80,7 @@ function calc_reverse_upper(n_nodes, ::Val{:gauss})
     end
 
     # Calculate Vandermondes
-    lobatto_nodes, lobatto_weights = gauss_lobatto_nodes_weights(n_nodes)
+    lobatto_nodes, _ = gauss_lobatto_nodes_weights(n_nodes, RealT)
     gauss2lobatto = polynomial_interpolation_matrix(gauss_nodes, lobatto_nodes)
     lobatto2gauss = polynomial_interpolation_matrix(lobatto_nodes, gauss_nodes)
 
@@ -91,9 +91,9 @@ end
 #
 # Note: To not make the L2 projection exact, first convert to Gauss nodes,
 # perform projection, and convert back to Gauss-Lobatto.
-function calc_reverse_lower(n_nodes, ::Val{:gauss})
+function calc_reverse_lower(n_nodes, ::Val{:gauss}, RealT = Float64)
     # Calculate nodes, weights, and barycentric weights for Legendre-Gauss
-    gauss_nodes, gauss_weights = gauss_nodes_weights(n_nodes)
+    gauss_nodes, gauss_weights = gauss_nodes_weights(n_nodes, RealT)
     gauss_wbary = barycentric_weights(gauss_nodes)
 
     # Calculate projection matrix (actually: discrete L2 projection with errors)
@@ -107,7 +107,7 @@ function calc_reverse_lower(n_nodes, ::Val{:gauss})
     end
 
     # Calculate Vandermondes
-    lobatto_nodes, lobatto_weights = gauss_lobatto_nodes_weights(n_nodes)
+    lobatto_nodes, _ = gauss_lobatto_nodes_weights(n_nodes, RealT)
     gauss2lobatto = polynomial_interpolation_matrix(gauss_nodes, lobatto_nodes)
     lobatto2gauss = polynomial_interpolation_matrix(lobatto_nodes, gauss_nodes)
 
@@ -116,9 +116,9 @@ end
 
 # Calculate reverse projection matrix for discrete L2 projection from upper to large (Gauss-Lobatto
 # version)
-function calc_reverse_upper(n_nodes, ::Val{:gauss_lobatto})
+function calc_reverse_upper(n_nodes, ::Val{:gauss_lobatto}, RealT = Float64)
     # Calculate nodes, weights, and barycentric weights
-    nodes, weights = gauss_lobatto_nodes_weights(n_nodes)
+    nodes, weights = gauss_lobatto_nodes_weights(n_nodes, RealT)
     wbary = barycentric_weights(nodes)
 
     # Calculate projection matrix (actually: discrete L2 projection with errors)
@@ -135,9 +135,9 @@ end
 
 # Calculate reverse projection matrix for discrete L2 projection from lower to large (Gauss-Lobatto
 # version)
-function calc_reverse_lower(n_nodes, ::Val{:gauss_lobatto})
+function calc_reverse_lower(n_nodes, ::Val{:gauss_lobatto}, RealT = Float64)
     # Calculate nodes, weights, and barycentric weights
-    nodes, weights = gauss_lobatto_nodes_weights(n_nodes)
+    nodes, weights = gauss_lobatto_nodes_weights(n_nodes, RealT)
     wbary = barycentric_weights(nodes)
 
     # Calculate projection matrix (actually: discrete L2 projection with errors)
diff --git a/test/Project.toml b/test/Project.toml
index 64c20ea4c37..f9a6241421b 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -3,6 +3,7 @@ Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 Convex = "f65535da-76fb-5f13-bab9-19810c17039a"
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+DoubleFloats = "497a8b3b-efae-58df-a0af-a86822472b78"
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 ECOS = "e2685f51-7e38-5353-a97d-a921fd2c8199"
 ExplicitImports = "7d51a73a-1435-4ff3-83d9-f097790105c7"
@@ -14,6 +15,7 @@ NLsolve = "2774e3e8-f4cf-5e23-947b-6d7e65073b56"
 OrdinaryDiffEq = "1dea7af3-3e70-54e6-95c3-0bf5283fa5ed"
 Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+Quadmath = "be4d8f0f-7fa4-5f49-b795-2f01399ab2dd"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
@@ -23,6 +25,7 @@ Aqua = "0.8"
 CairoMakie = "0.10"
 Convex = "0.16"
 DelimitedFiles = "1"
+DoubleFloats = "1.4.0"
 Downloads = "1"
 ECOS = "1.1.2"
 ExplicitImports = "1.0.1"
@@ -34,6 +37,7 @@ NLsolve = "4.5.1"
 OrdinaryDiffEq = "6.49.1"
 Plots = "1.19"
 Printf = "1"
+Quadmath = "0.5.10"
 Random = "1"
 StableRNGs = "1.0.2"
 Test = "1"
diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl
index 7a1a3f3ac09..0e70282e77c 100644
--- a/test/test_p4est_2d.jl
+++ b/test/test_p4est_2d.jl
@@ -231,10 +231,10 @@ end
                             0.3507514f0
                         ],
                         linf=[
-                            0.2942562f0,
+                            0.2930063f0,
                             0.4079147f0,
                             0.3972956f0,
-                            1.0810697f0
+                            1.0764117f0
                         ],
                         tspan=(0.0f0, 1.0f0),
                         rtol=10 * sqrt(eps(Float32)), # to make CI pass
diff --git a/test/test_structured_1d.jl b/test/test_structured_1d.jl
index ccb0c8cacac..a5b561ed8fe 100644
--- a/test/test_structured_1d.jl
+++ b/test/test_structured_1d.jl
@@ -58,6 +58,20 @@ end
     end
 end
 
+@trixi_testset "elixir_advection_float128.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_float128.jl"),
+                        l2=Float128[6.49879312655540217059228636803492411e-09],
+                        linf=Float128[5.35548407857266390181158920649552284e-08])
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
+end
+
 # Testing the third-order paired explicit Runge-Kutta (PERK) method with its optimal CFL number
 @trixi_testset "elixir_burgers_perk3.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_burgers_perk3.jl"),
diff --git a/test/test_tree_1d_advection.jl b/test/test_tree_1d_advection.jl
index f061e2e1c30..0665a1be105 100644
--- a/test/test_tree_1d_advection.jl
+++ b/test/test_tree_1d_advection.jl
@@ -142,6 +142,20 @@ end
         @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 8000
     end
 end
+
+@trixi_testset "elixir_advection_doublefloat.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_advection_doublefloat.jl"),
+                        l2=Double64[6.80895929885700039832943251427357703e-11],
+                        linf=Double64[5.82834770064525291688100323411704252e-10])
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
+end
 end
 
 end # module
diff --git a/test/test_tree_2d_mhd.jl b/test/test_tree_2d_mhd.jl
index b7152cb8b75..5d52f4a5d42 100644
--- a/test/test_tree_2d_mhd.jl
+++ b/test/test_tree_2d_mhd.jl
@@ -150,24 +150,24 @@ end
 
 @trixi_testset "elixir_mhd_ec_float32.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_mhd_ec_float32.jl"),
-                        l2=Float32[0.036355644,
+                        l2=Float32[0.03635566,
+                                   0.042947732,
                                    0.042947736,
-                                   0.04294775,
-                                   0.025748005,
-                                   0.16211236,
-                                   0.017452478,
-                                   0.017452475,
-                                   0.026877576,
-                                   2.0951437f-7],
-                        linf=Float32[0.22100884,
-                                     0.28798944,
-                                     0.2879896,
-                                     0.20858234,
-                                     0.8812654,
-                                     0.09208202,
-                                     0.09208143,
-                                     0.14795381,
-                                     2.0912607f-6])
+                                   0.025748001,
+                                   0.16211228,
+                                   0.01745248,
+                                   0.017452491,
+                                   0.026877586,
+                                   2.417227f-7],
+                        linf=Float32[0.2210092,
+                                     0.28798974,
+                                     0.28799006,
+                                     0.20858109,
+                                     0.8812673,
+                                     0.09208107,
+                                     0.09208131,
+                                     0.14795369,
+                                     2.2078211f-6])
     # Ensure that we do not have excessive memory allocations
     # (e.g., from type instabilities)
     let
diff --git a/test/test_trixi.jl b/test/test_trixi.jl
index be3521ed16a..024f3654ea0 100644
--- a/test/test_trixi.jl
+++ b/test/test_trixi.jl
@@ -15,6 +15,7 @@ are compared approximately against these reference values, using `atol, rtol`
 as absolute/relative tolerance.
 """
 macro test_trixi_include(elixir, args...)
+    # Note: The variables below are just Symbols, not actual errors/types
     local l2 = get_kwarg(args, :l2, nothing)
     local linf = get_kwarg(args, :linf, nothing)
     local RealT = get_kwarg(args, :RealT, :Float64)
@@ -24,6 +25,12 @@ macro test_trixi_include(elixir, args...)
     elseif RealT === :Float32
         atol_default = 500 * eps(Float32)
         rtol_default = sqrt(eps(Float32))
+    elseif RealT === :Float128
+        atol_default = 500 * eps(Float128)
+        rtol_default = sqrt(eps(Float128))
+    elseif RealT === :Double64
+        atol_default = 500 * eps(Double64)
+        rtol_default = sqrt(eps(Double64))
     end
     local atol = get_kwarg(args, :atol, atol_default)
     local rtol = get_kwarg(args, :rtol, rtol_default)
@@ -167,7 +174,10 @@ macro test_nowarn_mod(expr, additional_ignore_content = String[])
                                      r"WARNING: Method definition .* in module .* at .* overwritten .*.\n",
                                      # Warnings from third party packages
                                      r"┌ Warning: Problem status ALMOST_INFEASIBLE; solution may be inaccurate.\n└ @ Convex ~/.julia/packages/Convex/.*\n",
-                                     r"┌ Warning: Problem status ALMOST_OPTIMAL; solution may be inaccurate.\n└ @ Convex ~/.julia/packages/Convex/.*\n"]
+                                     r"┌ Warning: Problem status ALMOST_OPTIMAL; solution may be inaccurate.\n└ @ Convex ~/.julia/packages/Convex/.*\n",
+                                     # Warnings for higher-precision floating data types
+                                     r"┌ Warning: #= /home/runner/work/Trixi.jl/Trixi.jl/src/solvers/dgsem/interpolation.jl:118 =#:\n│ `LoopVectorization.check_args` on your inputs failed; running fallback `@inbounds @fastmath` loop instead.\n│ Use `warn_check_args=false`, e.g. `@turbo warn_check_args=false ...`, to disable this warning.\n└ @ Trixi ~/.julia/packages/LoopVectorization/tIJUA/src/condense_loopset.jl:1166\n",
+                                     r"┌ Warning: #= /home/runner/work/Trixi.jl/Trixi.jl/src/solvers/dgsem/interpolation.jl:136 =#:\n│ `LoopVectorization.check_args` on your inputs failed; running fallback `@inbounds @fastmath` loop instead.\n│ Use `warn_check_args=false`, e.g. `@turbo warn_check_args=false ...`, to disable this warning.\n└ @ Trixi ~/.julia/packages/LoopVectorization/tIJUA/src/condense_loopset.jl:1166\n"]
                 append!(ignore_content, $additional_ignore_content)
                 for pattern in ignore_content
                     stderr_content = replace(stderr_content, pattern => "")
diff --git a/test/test_unit.jl b/test/test_unit.jl
index cd219050555..97e32726ca7 100644
--- a/test/test_unit.jl
+++ b/test/test_unit.jl
@@ -268,6 +268,12 @@ end
 @timed_testset "interpolation" begin
     @testset "nodes and weights" begin
         @test Trixi.gauss_nodes_weights(1) == ([0.0], [2.0])
+
+        @test Trixi.gauss_nodes_weights(2)[1] ≈ [-1 / sqrt(3), 1 / sqrt(3)]
+        @test Trixi.gauss_nodes_weights(2)[2] == [1.0, 1.0]
+
+        @test Trixi.gauss_nodes_weights(3)[1] ≈ [-sqrt(3 / 5), 0.0, sqrt(3 / 5)]
+        @test Trixi.gauss_nodes_weights(3)[2] ≈ [5 / 9, 8 / 9, 5 / 9]
     end
 
     @testset "multiply_dimensionwise" begin
diff --git a/test/test_unstructured_2d.jl b/test/test_unstructured_2d.jl
index c433338a238..fed71e49f0b 100644
--- a/test/test_unstructured_2d.jl
+++ b/test/test_unstructured_2d.jl
@@ -335,8 +335,8 @@ end
                         ],
                         linf=[
                             Float32(2.776782342826098),
-                            3.2162943f0, # this needs to be adapted
-                            3.6683278f0, # this needed to be adapted
+                            3.2116454f0, # this needs to be adapted
+                            3.6616623f0, # this needed to be adapted
                             Float32(2.052861364219655)
                         ],
                         tspan=(0.0f0, 0.25f0),

From 09d5eb027327dbcb3b04578fe0bfea99feac34c9 Mon Sep 17 00:00:00 2001
From: Manuel Torrilhon <torrilhon@rwth-aachen.de>
Date: Mon, 2 Dec 2024 23:34:25 +0100
Subject: [PATCH 5/8] Collaborating Institutes (#2187)

* research groups in readme file

* research groups in doc/src/index.md

* Update Andrew's affiliation

Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>

* Remove temporary changelog file to avoid non-unique header link

---------

Co-authored-by: Michael Schlottke-Lakemper <michael@sloede.com>
Co-authored-by: Andrew Winters <andrew.ross.winters@liu.se>
---
 README.md         | 21 ++++++++++++++++++++-
 docs/make.jl      |  2 ++
 docs/src/index.md | 33 +++++++++++++++++++++++++++------
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 5b58612bb09..543a0339e41 100644
--- a/README.md
+++ b/README.md
@@ -261,6 +261,25 @@ To get in touch with the developers,
 [join us on Slack](https://join.slack.com/t/trixi-framework/shared_invite/zt-sgkc6ppw-6OXJqZAD5SPjBYqLd8MU~g)
 or [create an issue](https://github.com/trixi-framework/Trixi.jl/issues/new).
 
+## Participating research groups
+Participating research groups in alphabetical order:
+
+[Applied and Computational Mathematics, RWTH Aachen University :de:](https://www.acom.rwth-aachen.de)
+
+[Applied Mathematics, Department of Mathematics, University of Hamburg :de:](https://www.math.uni-hamburg.de/en/forschung/bereiche/am.html)
+
+[Division of Applied Mathematics, Department of Mathematics, Linköping University :sweden:](https://liu.se/en/employee/andwi94)
+
+[Computational and Applied Mathematics, Rice University :us:](https://jlchan.github.io/)
+
+[High-Performance Computing, Institute of Software Technology, German Aerospace Center (DLR) :de:](https://www.dlr.de/en/sc/about-us/departments/high-performance-computing)
+
+[High-Performance Scientific Computing, University of Augsburg :de:](https://hpsc.math.uni-augsburg.de)
+
+[Numerical Mathematics, Institute of Mathematics, Johannes Gutenberg University Mainz :de:](https://ranocha.de)
+
+[Numerical Simulation, Department of Mathematics and Computer Science, University of Cologne :de:](https://www.mi.uni-koeln.de/NumSim/)
+
 
 ## Acknowledgments
 <p align="center" style="font-size:0;"><!--
@@ -279,7 +298,7 @@ Forschungsgemeinschaft](https://www.dfg.de/) (DFG, German Research Foundation)
 through the following grants:
 * Excellence Strategy EXC 2044-390685587, Mathematics Münster: Dynamics-Geometry-Structure.
 * Research unit FOR 5409 "Structure-Preserving Numerical Methods for Bulk- and
-  Interface Coupling of Heterogeneous Models (SNuBIC)" (project number 463312734).
+  Interface Coupling of Heterogeneous Models ([SNuBIC](https://www.snubic.io))" (project number 463312734).
 * Individual grant no. 528753982.
 
 This project has benefited from funding from the [European Research Council](https://erc.europa.eu)
diff --git a/docs/make.jl b/docs/make.jl
index 4eb8fb11b65..c65a20133a8 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -117,6 +117,8 @@ open(joinpath(@__DIR__, "src", "changelog.md"), "w") do io
         println(io, line)
     end
 end
+# Remove temporary file
+rm(joinpath(@__DIR__, "src", "changelog_tmp.md"))
 
 # Make documentation
 makedocs(
diff --git a/docs/src/index.md b/docs/src/index.md
index 40191e97b9a..79da54f3cf9 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -10,6 +10,7 @@
 [![Aqua QA](https://raw.githubusercontent.com/JuliaTesting/Aqua.jl/master/badge.svg)](https://github.com/JuliaTesting/Aqua.jl)
 [![License: MIT](https://img.shields.io/badge/License-MIT-success.svg)](https://opensource.org/licenses/MIT)
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3996439.svg)](https://doi.org/10.5281/zenodo.3996439)
+[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8695/badge)](https://www.bestpractices.dev/projects/8695)
 
 [**Trixi.jl**](https://github.com/trixi-framework/Trixi.jl)
 is a numerical simulation framework for conservation
@@ -53,7 +54,7 @@ installation and postprocessing procedures. Its features include:
   * Hyperbolic diffusion equations for elliptic problems
   * Lattice-Boltzmann equations (D2Q9 and D3Q27 schemes)
   * Shallow water equations
-  * Scalar advection
+  * Several scalar conservation laws (e.g., linear advection, Burgers' equation, LWR traffic flow)
 * Multi-physics simulations
   * [Self-gravitating gas dynamics](https://github.com/trixi-framework/paper-self-gravitating-gas-dynamics)
 * Shared-memory parallelization via multithreading
@@ -99,11 +100,11 @@ If the installed version does not match the current release, please check the
 [Troubleshooting](@ref old-release) section.
 
 The commands above can also be used to update Trixi.jl. A brief list of notable
-changes to Trixi.jl is available in [`NEWS.md`](https://github.com/trixi-framework/Trixi.jl/NEWS.md).
+changes to Trixi.jl is available in the [`Changelog`](@ref).
 
 ### [For developers](@id for-developers)
-If you plan on editing Trixi.jl itself, you can download Trixi.jl to a local folder
-and use the code from the cloned directory:
+If you plan on editing Trixi.jl itself, you can download Trixi.jl locally and use the
+code from the cloned directory:
 ```bash
 git clone git@github.com:trixi-framework/Trixi.jl.git
 cd Trixi.jl
@@ -333,7 +334,7 @@ Schlottke-Lakemper](https://www.uni-augsburg.de/fakultaet/mntf/math/prof/hpsc)
 (University of Augsburg, Germany) and
 [Gregor Gassner](https://www.mi.uni-koeln.de/NumSim/gregor-gassner)
 (University of Cologne, Germany). Together with [Hendrik Ranocha](https://ranocha.de)
-(Johannes Gutenberg University Mainz, Germany) and [Andrew Winters](https://liu.se/en/employee/andwi94)
+(Johannes Gutenberg University Mainz, Germany), [Andrew Winters](https://liu.se/en/employee/andwi94)
 (Linköping University, Sweden), and [Jesse Chan](https://jlchan.github.io) (Rice University, US),
 they are the principal developers of Trixi.jl.
 The full list of contributors can be found under [Authors](@ref).
@@ -350,6 +351,26 @@ To get in touch with the developers,
 or [create an issue](https://github.com/trixi-framework/Trixi.jl/issues/new).
 
 
+## Participating research groups
+Participating research groups in alphabetical order:
+
+[Applied and Computational Mathematics, RWTH Aachen University 🇩🇪](https://www.acom.rwth-aachen.de)
+
+[Applied Mathematics, Department of Mathematics, University of Hamburg 🇩🇪](https://www.math.uni-hamburg.de/en/forschung/bereiche/am.html)
+
+[Division of Applied Mathematics, Department of Mathematics, Linköping University 🇸🇪](https://liu.se/en/employee/andwi94)
+
+[Computational and Applied Mathematics, Rice University 🇺🇸](https://jlchan.github.io/)
+
+[High-Performance Computing, Institute of Software Technology, German Aerospace Center (DLR) 🇩🇪](https://www.dlr.de/en/sc/about-us/departments/high-performance-computing)
+
+[High-Performance Scientific Computing, University of Augsburg 🇩🇪](https://hpsc.math.uni-augsburg.de)
+
+[Numerical Mathematics, Institute of Mathematics, Johannes Gutenberg University Mainz 🇩🇪](https://ranocha.de)
+
+[Numerical Simulation, Department of Mathematics and Computer Science, University of Cologne 🇩🇪](https://www.mi.uni-koeln.de/NumSim/)
+
+
 ## Acknowledgments
 
 ```@raw html
@@ -378,7 +399,7 @@ Forschungsgemeinschaft](https://www.dfg.de/) (DFG, German Research Foundation)
 through the following grants:
 * Excellence Strategy EXC 2044-390685587, Mathematics Münster: Dynamics-Geometry-Structure.
 * Research unit FOR 5409 "Structure-Preserving Numerical Methods for Bulk- and
-  Interface Coupling of Heterogeneous Models (SNuBIC)" (project number 463312734).
+  Interface Coupling of Heterogeneous Models ([SNuBIC](https://www.snubic.io))" (project number 463312734).
 * Individual grant no. 528753982.
 
 This project has benefited from funding from the [European Research Council](https://erc.europa.eu)

From 5b1835126d08a9a505dc8519fc347c5bc523d0a9 Mon Sep 17 00:00:00 2001
From: Huiyu Xie <huiyuxie.sde@gmail.com>
Date: Mon, 2 Dec 2024 20:55:18 -1000
Subject: [PATCH 6/8] Remove (#2183)

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 .github/workflows/Downgrade.yml  |  8 ++++----
 .github/workflows/ci.yml         | 10 +++++-----
 .github/workflows/downstream.yml |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml
index a7c4f9caf99..b9bdca5aeac 100644
--- a/.github/workflows/Downgrade.yml
+++ b/.github/workflows/Downgrade.yml
@@ -27,9 +27,9 @@ jobs:
   downgrade_test:
     if: "!contains(github.event.head_commit.message, 'skip ci')"
     # We could also include the Julia version as in
-    # name: ${{ matrix.trixi_test }} - ${{ matrix.os }} - Julia ${{ matrix.version }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    # name: ${{ matrix.trixi_test }} - ${{ matrix.os }} - Julia ${{ matrix.version }} - ${{ matrix.arch }}
     # to be more specific. However, that requires us updating the required CI tests whenever we update Julia.
-    name: Downgrade ${{ matrix.trixi_test }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    name: Downgrade ${{ matrix.trixi_test }} - ${{ matrix.os }} - ${{ matrix.arch }}
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
@@ -76,11 +76,11 @@ jobs:
           projects: ., test
       - uses: julia-actions/julia-buildpkg@v1
         env:
-          PYTHON: ""
+          PYTHON: ''
       - name: Run tests without coverage
         uses: julia-actions/julia-runtest@v1
         with:
           coverage: false
         env:
-          PYTHON: ""
+          PYTHON: ''
           TRIXI_TEST: ${{ matrix.trixi_test }}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 020dd72d0fe..8862747fefc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -44,9 +44,9 @@ jobs:
   test:
     if: "!contains(github.event.head_commit.message, 'skip ci')"
     # We could also include the Julia version as in
-    # name: ${{ matrix.trixi_test }} - ${{ matrix.os }} - Julia ${{ matrix.version }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    # name: ${{ matrix.trixi_test }} - ${{ matrix.os }} - Julia ${{ matrix.version }} - ${{ matrix.arch }}
     # to be more specific. However, that requires us updating the required CI tests whenever we update Julia.
-    name: ${{ matrix.trixi_test }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    name: ${{ matrix.trixi_test }} - ${{ matrix.os }} - ${{ matrix.arch }}
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
@@ -115,20 +115,20 @@ jobs:
       - uses: julia-actions/cache@v2
       - uses: julia-actions/julia-buildpkg@v1
         env:
-          PYTHON: ""
+          PYTHON: ''
       - name: Run tests without coverage
         uses: julia-actions/julia-runtest@v1
         with:
           coverage: false
         env:
-          PYTHON: ""
+          PYTHON: ''
           TRIXI_TEST: ${{ matrix.trixi_test }}
       - name: Run tests with coverage
         uses: julia-actions/julia-runtest@v1
         with:
           coverage: true
         env:
-          PYTHON: ""
+          PYTHON: ''
           TRIXI_TEST: ${{ matrix.trixi_test }}
       - uses: julia-actions/julia-processcoverage@v1
         with:
diff --git a/.github/workflows/downstream.yml b/.github/workflows/downstream.yml
index 939eb533ffc..13a01984827 100644
--- a/.github/workflows/downstream.yml
+++ b/.github/workflows/downstream.yml
@@ -46,9 +46,9 @@ jobs:
   test:
     if: "!contains(github.event.head_commit.message, 'skip ci')"
     # We could also include the Julia version as in
-    # name: ${{ matrix.trixi_test }} - ${{ matrix.os }} - Julia ${{ matrix.version }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    # name: ${{ matrix.trixi_test }} - ${{ matrix.os }} - Julia ${{ matrix.version }} - ${{ matrix.arch }}
     # to be more specific. However, that requires us updating the required CI tests whenever we update Julia.
-    name: ${{ matrix.package }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    name: ${{ matrix.package }} - ${{ matrix.os }} - ${{ matrix.arch }}
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false

From abe8640c13107eec3fb4dba20f5769cb3da3c57f Mon Sep 17 00:00:00 2001
From: Huiyu Xie <huiyuxie.sde@gmail.com>
Date: Mon, 2 Dec 2024 20:55:34 -1000
Subject: [PATCH 7/8] Enable GPU compatibility check in CI (#2168)

* Add yaml file

* Update info

* Add more

* Format

* Test

* Fix

* Reduce

* Update .github/workflows/GPUCompat.yml

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>

* Align

---------

Co-authored-by: Hendrik Ranocha <ranocha@users.noreply.github.com>
---
 .github/workflows/GPUCompat.yml | 86 +++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 .github/workflows/GPUCompat.yml

diff --git a/.github/workflows/GPUCompat.yml b/.github/workflows/GPUCompat.yml
new file mode 100644
index 00000000000..335e1c83c4c
--- /dev/null
+++ b/.github/workflows/GPUCompat.yml
@@ -0,0 +1,86 @@
+name: GPU Package Compatibility
+
+on:
+  pull_request:
+    paths-ignore:
+      - 'AUTHORS.md'
+      - 'CITATION.bib'
+      - 'CONTRIBUTING.md'
+      - 'LICENSE.md'
+      - 'NEWS.md'
+      - 'README.md'
+      - '.zenodo.json'
+      - '.github/workflows/benchmark.yml'
+      - '.github/workflows/CompatHelper.yml'
+      - '.github/workflows/TagBot.yml'
+      - 'benchmark/**'
+      - 'docs/**'
+      - 'utils/**'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    if: "!contains(github.event.head_commit.message, 'skip ci')"
+    name: ${{ matrix.os }} - ${{ matrix.arch }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - version: '1.10'
+            os: ubuntu-latest
+            arch: x64
+          - version: '1.10'
+            os: windows-latest
+            arch: x64
+          # CUDA.jl only supports 64-bit Linux and Windows, see https://github.com/JuliaGPU/CUDA.jl?tab=readme-ov-file#requirements
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Julia
+        uses: julia-actions/setup-julia@v2
+        with:
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+
+      - name: Display version info
+        run: julia -e 'using InteractiveUtils; versioninfo(verbose=true)'
+
+      - name: Cache Julia packages
+        uses: julia-actions/cache@v2
+
+      - name: Build project
+        uses: julia-actions/julia-buildpkg@v1
+
+      # Only CUDA.jl is needed for GPU compatibility test now
+      - name: Add CUDA.jl to environment
+        run: |
+          julia --project=. -e '
+          using Pkg; 
+          Pkg.activate(temp=true);
+          Pkg.develop(PackageSpec(path=pwd())); 
+          Pkg.add("CUDA");
+          Pkg.update()'
+
+    #   - name: Add Metal.jl to environment
+    #     run: |
+    #       julia --project=. -e '
+    #       using Pkg;
+    #       Pkg.activate(temp=true);
+    #       Pkg.develop(PackageSpec(path=pwd()));
+    #       Pkg.add("Metal");
+    #       Pkg.update()'
+
+    #   - name: Add AMDGPU.jl to environment
+    #     run: |
+    #       julia --project=. -e '
+    #       using Pkg;
+    #       Pkg.activate(temp=true);
+    #       Pkg.develop(PackageSpec(path=pwd()));
+    #       Pkg.add("AMDGPU");
+    #       Pkg.update()'

From 7779fd2d1bb6fc108a5a2d840c168e6fb2e18369 Mon Sep 17 00:00:00 2001
From: Hendrik Ranocha <ranocha@users.noreply.github.com>
Date: Tue, 3 Dec 2024 09:00:30 +0100
Subject: [PATCH 8/8] require P4est.jl v0.4.12 (#2189)

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index ac1af2c49eb..092289ed32c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -88,7 +88,7 @@ MuladdMacro = "0.2.2"
 NLsolve = "4.5.1"
 Octavian = "0.3.21"
 OffsetArrays = "1.12"
-P4est = "0.4.9"
+P4est = "0.4.12"
 Polyester = "0.7.10"
 PrecompileTools = "1.1"
 Preferences = "1.3"