Skip to content

Commit

Permalink
adapt auxiliary math functions for Float32 (#2048)
Browse files Browse the repository at this point in the history
* adapt auxiliary math functions

* adapt tests

* more tests and fix shock capturing volume integral

* fix typo in tests

* p4est_2d_dgsem/elixir_euler_shockcapturing_ec_float32.jl

* format

* fix stolarsky_mean test

* use Float32 for test tolerances

* format

* increase tolerance for CI

* format
  • Loading branch information
ranocha authored Aug 22, 2024
1 parent 20ab97b commit 8d7b20a
Show file tree
Hide file tree
Showing 8 changed files with 224 additions and 288 deletions.
66 changes: 66 additions & 0 deletions examples/p4est_2d_dgsem/elixir_euler_shockcapturing_ec_float32.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@

using OrdinaryDiffEq
using Trixi

###############################################################################
# semidiscretization of the compressible Euler equations

equations = CompressibleEulerEquations2D(1.4f0)

initial_condition = initial_condition_weak_blast_wave

surface_flux = flux_ranocha
volume_flux = flux_ranocha
polydeg = 4
basis = LobattoLegendreBasis(Float32, polydeg)
indicator_sc = IndicatorHennemannGassner(equations, basis,
alpha_max = 1.0f0,
alpha_min = 0.001f0,
alpha_smooth = true,
variable = density_pressure)
volume_integral = VolumeIntegralShockCapturingHG(indicator_sc;
volume_flux_dg = volume_flux,
volume_flux_fv = surface_flux)

solver = DGSEM(polydeg = polydeg, surface_flux = surface_flux,
volume_integral = volume_integral, RealT = Float32)

###############################################################################

coordinates_min = (-1.0f0, -1.0f0)
coordinates_max = (+1.0f0, +1.0f0)

trees_per_dimension = (4, 4)
mesh = P4estMesh(trees_per_dimension,
polydeg = 4, initial_refinement_level = 2,
coordinates_min = coordinates_min, coordinates_max = coordinates_max,
periodicity = true, RealT = Float32)

semi = SemidiscretizationHyperbolic(mesh, equations, initial_condition, solver)

###############################################################################
# ODE solvers, callbacks etc.

tspan = (0.0f0, 2.0f0)
ode = semidiscretize(semi, tspan)

summary_callback = SummaryCallback()

analysis_interval = 100
analysis_callback = AnalysisCallback(semi, interval = analysis_interval)

alive_callback = AliveCallback(analysis_interval = analysis_interval)

stepsize_callback = StepsizeCallback(cfl = 1.0f0)

callbacks = CallbackSet(summary_callback,
analysis_callback,
alive_callback,
stepsize_callback)
###############################################################################
# run the simulation

sol = solve(ode, CarpenterKennedy2N54(williamson_condition = false),
dt = 1.0f0, # solve needs some value here but it will be overwritten by the stepsize_callback
save_everystep = false, callback = callbacks);
summary_callback() # print the timer summary
53 changes: 34 additions & 19 deletions src/auxiliary/math.jl
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ end
end

"""
ln_mean(x, y)
Trixi.ln_mean(x::Real, y::Real)
Compute the logarithmic mean
Expand Down Expand Up @@ -171,18 +171,24 @@ Given ε = 1.0e-4, we use the following algorithm.
for Intel, AMD, and VIA CPUs.
[https://www.agner.org/optimize/instruction_tables.pdf](https://www.agner.org/optimize/instruction_tables.pdf)
"""
@inline function ln_mean(x, y)
epsilon_f2 = 1.0e-4
@inline ln_mean(x::Real, y::Real) = ln_mean(promote(x, y)...)

@inline function ln_mean(x::RealT, y::RealT) where {RealT <: Real}
epsilon_f2 = convert(RealT, 1.0e-4)
f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
if f2 < epsilon_f2
return (x + y) / @evalpoly(f2, 2, 2/3, 2/5, 2/7)
return (x + y) / @evalpoly(f2,
2,
convert(RealT, 2 / 3),
convert(RealT, 2 / 5),
convert(RealT, 2 / 7))
else
return (y - x) / log(y / x)
end
end

"""
inv_ln_mean(x, y)
Trixi.inv_ln_mean(x::Real, y::Real)
Compute the inverse `1 / ln_mean(x, y)` of the logarithmic mean
[`ln_mean`](@ref).
Expand All @@ -191,11 +197,17 @@ This function may be used to increase performance where the inverse of the
logarithmic mean is needed, by replacing a (slow) division by a (fast)
multiplication.
"""
@inline function inv_ln_mean(x, y)
epsilon_f2 = 1.0e-4
@inline inv_ln_mean(x::Real, y::Real) = inv_ln_mean(promote(x, y)...)

@inline function inv_ln_mean(x::RealT, y::RealT) where {RealT <: Real}
epsilon_f2 = convert(RealT, 1.0e-4)
f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
if f2 < epsilon_f2
return @evalpoly(f2, 2, 2/3, 2/5, 2/7) / (x + y)
return @evalpoly(f2,
2,
convert(RealT, 2 / 3),
convert(RealT, 2 / 5),
convert(RealT, 2 / 7)) / (x + y)
else
return log(y / x) / (y - x)
end
Expand Down Expand Up @@ -273,7 +285,7 @@ end
# [Fortran](https://godbolt.org/z/Yrsa1js7P)
# or [C++](https://godbolt.org/z/674G7Pccv).
"""
max(x, y, ...)
Trixi.max(x, y, ...)
Return the maximum of the arguments. See also the `maximum` function to take
the maximum element from a collection.
Expand All @@ -292,7 +304,7 @@ julia> max(2, 5, 1)
@inline max(args...) = @fastmath max(args...)

"""
min(x, y, ...)
Trixi.min(x, y, ...)
Return the minimum of the arguments. See also the `minimum` function to take
the minimum element from a collection.
Expand All @@ -311,7 +323,7 @@ julia> min(2, 5, 1)
@inline min(args...) = @fastmath min(args...)

"""
positive_part(x)
Trixi.positive_part(x)
Return `x` if `x` is positive, else zero. In other words, return
`(x + abs(x)) / 2` for real numbers `x`.
Expand All @@ -321,7 +333,7 @@ Return `x` if `x` is positive, else zero. In other words, return
end

"""
negative_part(x)
Trixi.negative_part(x)
Return `x` if `x` is negative, else zero. In other words, return
`(x - abs(x)) / 2` for real numbers `x`.
Expand All @@ -331,7 +343,7 @@ Return `x` if `x` is negative, else zero. In other words, return
end

"""
stolarsky_mean(x, y, gamma)
Trixi.stolarsky_mean(x::Real, y:Real, gamma::Real)
Compute an instance of a weighted Stolarsky mean of the form
Expand Down Expand Up @@ -382,15 +394,18 @@ Given ε = 1.0e-4, we use the following algorithm.
for Intel, AMD, and VIA CPUs.
[https://www.agner.org/optimize/instruction_tables.pdf](https://www.agner.org/optimize/instruction_tables.pdf)
"""
@inline function stolarsky_mean(x, y, gamma)
epsilon_f2 = 1.0e-4
@inline stolarsky_mean(x::Real, y::Real, gamma::Real) = stolarsky_mean(promote(x, y,
gamma)...)

@inline function stolarsky_mean(x::RealT, y::RealT, gamma::RealT) where {RealT <: Real}
epsilon_f2 = convert(RealT, 1.0e-4)
f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2
if f2 < epsilon_f2
# convenience coefficients
c1 = (1 / 3) * (gamma - 2)
c2 = -(1 / 15) * (gamma + 1) * (gamma - 3) * c1
c3 = -(1 / 21) * (2 * gamma * (gamma - 2) - 9) * c2
return 0.5 * (x + y) * @evalpoly(f2, 1, c1, c2, c3)
c1 = convert(RealT, 1 / 3) * (gamma - 2)
c2 = convert(RealT, -1 / 15) * (gamma + 1) * (gamma - 3) * c1
c3 = convert(RealT, -1 / 21) * (2 * gamma * (gamma - 2) - 9) * c2
return 0.5f0 * (x + y) * @evalpoly(f2, 1, c1, c2, c3)
else
return (gamma - 1) / gamma * (y^gamma - x^gamma) /
(y^(gamma - 1) - x^(gamma - 1))
Expand Down
6 changes: 5 additions & 1 deletion src/solvers/dgmulti/shock_capturing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,14 @@ function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha,
mesh::DGMultiMesh, dg::DGMulti)
empty!(element_ids_dg)
empty!(element_ids_dgfv)
# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
RealT = eltype(alpha)
atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0))

for element in eachelement(mesh, dg)
# Clip blending factor for values close to zero (-> pure DG)
dg_only = isapprox(alpha[element], 0, atol = 1e-12)
dg_only = isapprox(alpha[element], 0, atol = atol)
if dg_only
push!(element_ids_dg, element)
else
Expand Down
2 changes: 1 addition & 1 deletion src/solvers/dgsem_p4est/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ end
# the interpretation of global SBP operators coupled discontinuously via
# central fluxes/SATs
surface_flux_values[v, node_index, direction_index, element_index] = flux_[v] +
0.5 *
0.5f0 *
noncons_[v]
end
end
Expand Down
6 changes: 5 additions & 1 deletion src/solvers/dgsem_tree/dg.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,14 @@ function pure_and_blended_element_ids!(element_ids_dg, element_ids_dgfv, alpha,
cache)
empty!(element_ids_dg)
empty!(element_ids_dgfv)
# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
RealT = eltype(alpha)
atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0))

for element in eachelement(dg, cache)
# Clip blending factor for values close to zero (-> pure DG)
dg_only = isapprox(alpha[element], 0, atol = 1e-12)
dg_only = isapprox(alpha[element], 0, atol = atol)
if dg_only
push!(element_ids_dg, element)
else
Expand Down
2 changes: 2 additions & 0 deletions src/solvers/dgsem_unstructured/dg_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ function create_cache(mesh::UnstructuredMesh2D, equations,

# perform a check on the sufficient metric identities condition for free-stream preservation
# and halt computation if it fails
# For `Float64`, this gives 1.8189894035458565e-12
# For `Float32`, this gives 1.1920929f-5
atol = max(100 * eps(RealT), eps(RealT)^convert(RealT, 0.75f0))
if !isapprox(max_discrete_metric_identities(dg, cache), 0, atol = atol)
error("metric terms fail free-stream preservation check with maximum error $(max_discrete_metric_identities(dg, cache))")
Expand Down
28 changes: 28 additions & 0 deletions test/test_p4est_2d.jl
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,34 @@ end
end
end

@trixi_testset "elixir_euler_shockcapturing_ec_float32.jl" begin
@test_trixi_include(joinpath(EXAMPLES_DIR,
"elixir_euler_shockcapturing_ec_float32.jl"),
l2=[
0.09539953f0,
0.10563527f0,
0.105637245f0,
0.3507514f0,
],
linf=[
0.2942562f0,
0.4079147f0,
0.3972956f0,
1.0810697f0,
],
tspan=(0.0f0, 1.0f0),
rtol=10 * sqrt(eps(Float32)), # to make CI pass
RealT=Float32)
# Ensure that we do not have excessive memory allocations
# (e.g., from type instabilities)
let
t = sol.t[end]
u_ode = sol.u[end]
du_ode = similar(u_ode)
@test (@allocated Trixi.rhs!(du_ode, u_ode, semi, t)) < 1000
end
end

@trixi_testset "elixir_euler_sedov.jl" begin
@test_trixi_include(joinpath(EXAMPLES_DIR, "elixir_euler_sedov.jl"),
l2=[
Expand Down
Loading

0 comments on commit 8d7b20a

Please sign in to comment.