adapt auxiliary math functions for Float32 (#2048)

* adapt auxiliary math functions * adapt tests * more tests and fix shock capturing volume integral * fix typo in tests * p4est_2d_dgsem/elixir_euler_shockcapturing_ec_float32.jl * format * fix stolarsky_mean test * use Float32 for test tolerances * format * increase tolerance for CI * format
trixi-framework · Aug 22, 2024 · 8d7b20a · 8d7b20a
1 parent 20ab97b
commit 8d7b20a
Show file tree

Hide file tree

Showing 8 changed files with 224 additions and 288 deletions.
diff --git a/examples/p4est_2d_dgsem/elixir_euler_shockcapturing_ec_float32.jl b/examples/p4est_2d_dgsem/elixir_euler_shockcapturing_ec_float32.jl
@@ -0,0 +1,66 @@
+
+using OrdinaryDiffEq
+using Trixi
+
+###############################################################################
+# semidiscretization of the compressible Euler equations
+
+equations = CompressibleEulerEquations2D(1.4f0)
+
+initial_condition = initial_condition_weak_blast_wave
+
+surface_flux = flux_ranocha
+volume_flux = flux_ranocha
+polydeg = 4
+basis = LobattoLegendreBasis(Float32, polydeg)
+indicator_sc = IndicatorHennemannGassner(equations, basis,
+                                         alpha_max = 1.0f0,
+                                         alpha_min = 0.001f0,
+                                         alpha_smooth = true,
+                                         variable = density_pressure)
+volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
+                                                 volume_flux_dg = volume_flux,
+                                                 volume_flux_fv = surface_flux)
+
+solver = DGSEM(polydeg = polydeg, surface_flux = surface_flux,
+               volume_integral = volume_integral, RealT = Float32)
+
+###############################################################################
+
+coordinates_min = (-1.0f0, -1.0f0)
+coordinates_max = (+1.0f0, +1.0f0)
+
+trees_per_dimension = (4, 4)
+mesh = P4estMesh(trees_per_dimension,
+                 polydeg = 4, initial_refinement_level = 2,
+                 coordinates_min = coordinates_min, coordinates_max = coordinates_max,
+                 periodicity = true, RealT = Float32)
+
+semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)
+
+###############################################################################
+# ODE solvers, callbacks etc.
+
+tspan = (0.0f0, 2.0f0)
+ode = semidiscretize(semi, tspan)
+
+summary_callback = SummaryCallback()
+
+analysis_interval = 100
+analysis_callback = AnalysisCallback(semi, interval = analysis_interval)
+
+alive_callback = AliveCallback(analysis_interval = analysis_interval)
+
+stepsize_callback = StepsizeCallback(cfl = 1.0f0)
+
+callbacks = CallbackSet(summary_callback,
+                        analysis_callback,
+                        alive_callback,
+                        stepsize_callback)
+###############################################################################
+# run the simulation
+
+sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
+            dt = 1.0f0, # solve needs some value here but it will be overwritten by the stepsize_callback
+            save_everystep = false, callback = callbacks);
+summary_callback() # print the timer summary
diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl
@@ -124,7 +124,7 @@ end
 end
 
 """
-    ln_mean(x, y)
+    Trixi.ln_mean(x::Real, y::Real)
 
 Compute the logarithmic mean
 
@@ -171,18 +171,24 @@ Given ε = 1.0e-4, we use the following algorithm.
   for Intel, AMD, and VIA CPUs.
   [https://www.agner.org/optimize/instruction_tables.pdf](https://www.agner.org/optimize/instruction_tables.pdf)
 """
-@inline function ln_mean(x, y)
-    epsilon_f2 = 1.0e-4
+@inline ln_mean(x::Real, y::Real) = ln_mean(promote(x, y)...)
+
+@inline function ln_mean(x::RealT, y::RealT) where {RealT <: Real}
+    epsilon_f2 = convert(RealT, 1.0e-4)
     f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
     if f2 < epsilon_f2
-        return (x + y) / @evalpoly(f2, 2, 2/3, 2/5, 2/7)
+        return (x + y) / @evalpoly(f2,
+                         2,
+                         convert(RealT, 2 / 3),
+                         convert(RealT, 2 / 5),
+                         convert(RealT, 2 / 7))
     else
         return (y - x) / log(y / x)
     end
 end
 
 """
-    inv_ln_mean(x, y)
+    Trixi.inv_ln_mean(x::Real, y::Real)
 
 Compute the inverse `1 / ln_mean(x, y)` of the logarithmic mean
 [`ln_mean`](@ref).
@@ -191,11 +197,17 @@ This function may be used to increase performance where the inverse of the
 logarithmic mean is needed, by replacing a (slow) division by a (fast)
 multiplication.
 """
-@inline function inv_ln_mean(x, y)
-    epsilon_f2 = 1.0e-4
+@inline inv_ln_mean(x::Real, y::Real) = inv_ln_mean(promote(x, y)...)
+
+@inline function inv_ln_mean(x::RealT, y::RealT) where {RealT <: Real}
+    epsilon_f2 = convert(RealT, 1.0e-4)
     f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
     if f2 < epsilon_f2
-        return @evalpoly(f2, 2, 2/3, 2/5, 2/7) / (x + y)
+        return @evalpoly(f2,
+                         2,
+                         convert(RealT, 2 / 3),
+                         convert(RealT, 2 / 5),
+                         convert(RealT, 2 / 7)) / (x + y)
     else
         return log(y / x) / (y - x)
     end
@@ -273,7 +285,7 @@ end
 # [Fortran](https://godbolt.org/z/Yrsa1js7P)
 # or [C++](https://godbolt.org/z/674G7Pccv).
 """
-    max(x, y, ...)
+    Trixi.max(x, y, ...)
 
 Return the maximum of the arguments. See also the `maximum` function to take
 the maximum element from a collection.
@@ -292,7 +304,7 @@ julia> max(2, 5, 1)
 @inline max(args...) = @fastmath max(args...)
 
 """
-    min(x, y, ...)
+    Trixi.min(x, y, ...)
 
 Return the minimum of the arguments. See also the `minimum` function to take
 the minimum element from a collection.
@@ -311,7 +323,7 @@ julia> min(2, 5, 1)
 @inline min(args...) = @fastmath min(args...)
 
 """
-    positive_part(x)
+    Trixi.positive_part(x)
 
 Return `x` if `x` is positive, else zero. In other words, return
 `(x + abs(x)) / 2` for real numbers `x`.
@@ -321,7 +333,7 @@ Return `x` if `x` is positive, else zero. In other words, return
 end
 
 """
-    negative_part(x)
+    Trixi.negative_part(x)
 
 Return `x` if `x` is negative, else zero. In other words, return
 `(x - abs(x)) / 2` for real numbers `x`.
@@ -331,7 +343,7 @@ Return `x` if `x` is negative, else zero. In other words, return
 end
 
 """
-    stolarsky_mean(x, y, gamma)
+    Trixi.stolarsky_mean(x::Real, y:Real, gamma::Real)
 
 Compute an instance of a weighted Stolarsky mean of the form
 
@@ -382,15 +394,18 @@ Given ε = 1.0e-4, we use the following algorithm.
   for Intel, AMD, and VIA CPUs.
   [https://www.agner.org/optimize/instruction_tables.pdf](https://www.agner.org/optimize/instruction_tables.pdf)
 """
-@inline function stolarsky_mean(x, y, gamma)
-    epsilon_f2 = 1.0e-4
+@inline stolarsky_mean(x::Real, y::Real, gamma::Real) = stolarsky_mean(promote(x, y,
+                                                                               gamma)...)
+
+@inline function stolarsky_mean(x::RealT, y::RealT, gamma::RealT) where {RealT <: Real}
+    epsilon_f2 = convert(RealT, 1.0e-4)
     f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
     if f2 < epsilon_f2
         # convenience coefficients
-        c1 = (1 / 3) * (gamma - 2)
-        c2 = -(1 / 15) * (gamma + 1) * (gamma - 3) * c1
-        c3 = -(1 / 21) * (2 * gamma * (gamma - 2) - 9) * c2
-        return 0.5 * (x + y) * @evalpoly(f2, 1, c1, c2, c3)
+        c1 = convert(RealT, 1 / 3) * (gamma - 2)
+        c2 = convert(RealT, -1 / 15) * (gamma + 1) * (gamma - 3) * c1
+        c3 = convert(RealT, -1 / 21) * (2 * gamma * (gamma - 2) - 9) * c2
+        return 0.5f0 * (x + y) * @evalpoly(f2, 1, c1, c2, c3)
     else
         return (gamma - 1) / gamma * (y^gamma - x^gamma) /
                (y^(gamma - 1) - x^(gamma - 1))

diff --git a/src/solvers/dgmulti/shock_capturing.jl b/src/solvers/dgmulti/shock_capturing.jl
@@ -160,10 +160,14 @@ function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha,
                                        mesh::DGMultiMesh, dg::DGMulti)
     empty!(element_ids_dg)
     empty!(element_ids_dgfv)
+    # For `Float64`, this gives 1.8189894035458565e-12
+    # For `Float32`, this gives 1.1920929f-5
+    RealT = eltype(alpha)
+    atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0))
 
     for element in eachelement(mesh, dg)
         # Clip blending factor for values close to zero (-> pure DG)
-        dg_only = isapprox(alpha[element], 0, atol = 1e-12)
+        dg_only = isapprox(alpha[element], 0, atol = atol)
         if dg_only
             push!(element_ids_dg, element)
         else

diff --git a/src/solvers/dgsem_p4est/dg_2d.jl b/src/solvers/dgsem_p4est/dg_2d.jl
@@ -379,7 +379,7 @@ end
         # the interpretation of global SBP operators coupled discontinuously via
         # central fluxes/SATs
         surface_flux_values[v, node_index, direction_index, element_index] = flux_[v] +
-                                                                             0.5 *
+                                                                             0.5f0 *
                                                                              noncons_[v]
     end
 end

diff --git a/src/solvers/dgsem_tree/dg.jl b/src/solvers/dgsem_tree/dg.jl
@@ -24,10 +24,14 @@ function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha,
                                        cache)
     empty!(element_ids_dg)
     empty!(element_ids_dgfv)
+    # For `Float64`, this gives 1.8189894035458565e-12
+    # For `Float32`, this gives 1.1920929f-5
+    RealT = eltype(alpha)
+    atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0))
 
     for element in eachelement(dg, cache)
         # Clip blending factor for values close to zero (-> pure DG)
-        dg_only = isapprox(alpha[element], 0, atol = 1e-12)
+        dg_only = isapprox(alpha[element], 0, atol = atol)
         if dg_only
             push!(element_ids_dg, element)
         else

diff --git a/src/solvers/dgsem_unstructured/dg_2d.jl b/src/solvers/dgsem_unstructured/dg_2d.jl
@@ -20,6 +20,8 @@ function create_cache(mesh::UnstructuredMesh2D, equations,
 
     # perform a check on the sufficient metric identities condition for free-stream preservation
     # and halt computation if it fails
+    # For `Float64`, this gives 1.8189894035458565e-12
+    # For `Float32`, this gives 1.1920929f-5
     atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0))
     if !isapprox(max_discrete_metric_identities(dg, cache), 0, atol = atol)
         error("metric terms fail free-stream preservation check with maximum error $(max_discrete_metric_identities(dg, cache))")

diff --git a/test/test_p4est_2d.jl b/test/test_p4est_2d.jl
@@ -221,6 +221,34 @@ end
     end
 end
 
+@trixi_testset "elixir_euler_shockcapturing_ec_float32.jl" begin
+    @test_trixi_include(joinpath(EXAMPLES_DIR,
+                                 "elixir_euler_shockcapturing_ec_float32.jl"),
+                        l2=[
+                            0.09539953f0,
+                            0.10563527f0,
+                            0.105637245f0,
+                            0.3507514f0,
+                        ],
+                        linf=[
+                            0.2942562f0,
+                            0.4079147f0,
+                            0.3972956f0,
+                            1.0810697f0,
+                        ],
+                        tspan=(0.0f0, 1.0f0),
+                        rtol=10 * sqrt(eps(Float32)), # to make CI pass
+                        RealT=Float32)
+    # Ensure that we do not have excessive memory allocations
+    # (e.g., from type instabilities)
+    let
+        t = sol.t[end]
+        u_ode = sol.u[end]
+        du_ode = similar(u_ode)
+        @test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
+    end
+end
+
 @trixi_testset "elixir_euler_sedov.jl" begin
     @test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov.jl"),
                         l2=[