diff --git a/Project.toml b/Project.toml index d592e3e3a..029db9dd6 100644 --- a/Project.toml +++ b/Project.toml @@ -9,6 +9,7 @@ DiffEqCallbacks = "459566f4-90b8-5000-8ac3-15dfb0a30def" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Morton = "2a6d852e-3fac-5a38-885c-fe708af2d09e" +MuladdMacro = "46d2c3a1-f734-5fdb-9937-b9b9aeba4221" Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" @@ -23,6 +24,7 @@ WriteVTK = "64499a7a-5c06-52f2-abe2-ccb03c286192" DiffEqCallbacks = "2.25" ForwardDiff = "0.10" Morton = "0.1" +MuladdMacro = "0.2" Polyester = "0.7.5" Reexport = "1" SciMLBase = "1, 2" diff --git a/src/TrixiParticles.jl b/src/TrixiParticles.jl index 1e1ef1508..fc5a77b21 100644 --- a/src/TrixiParticles.jl +++ b/src/TrixiParticles.jl @@ -6,6 +6,7 @@ using Dates using DiffEqCallbacks: PeriodicCallback, PeriodicCallbackAffect using LinearAlgebra: norm, dot, I, tr using Morton: cartesian2morton +using MuladdMacro: @muladd using Polyester: Polyester, @batch using Printf: @printf, @sprintf using SciMLBase: CallbackSet, DiscreteCallback, DynamicalODEProblem, u_modified!, diff --git a/src/general/smoothing_kernels.jl b/src/general/smoothing_kernels.jl index 53097b615..1d9fee30f 100644 --- a/src/general/smoothing_kernels.jl +++ b/src/general/smoothing_kernels.jl @@ -163,24 +163,28 @@ For an analytic formula for higher order kernels, see (Monaghan, 1985). """ struct SchoenbergQuarticSplineKernel{NDIMS} <: SmoothingKernel{NDIMS} end -function kernel(kernel::SchoenbergQuarticSplineKernel, r::Real, h) +@muladd @inline function kernel(kernel::SchoenbergQuarticSplineKernel, r::Real, h) q = r / h - if q >= 5 / 2 - return 0.0 - end - - result = (5 / 2 - q)^4 - - if q < 3 / 2 - result -= 5 * (3 / 2 - q)^4 - - if q < 1 / 2 - result += 10 * (1 / 2 - q)^4 - end - end - - return normalization_factor(kernel, h) * result + # Note that `floating_point_number^integer_literal` is lowered to `Base.literal_pow`. + # Currently, specializations reducing this to simple multiplications exist only up + # to a power of three, see + # https://github.com/JuliaLang/julia/blob/34934736fa4dcb30697ac1b23d11d5ad394d6a4d/base/intfuncs.jl#L327-L339 + # Higher powers just call a generic power implementation. Here, we accept to lose + # some precision but gain performance by using plain multiplications instead. + q5_2_pow4 = ((5 / 2 - q)^2)^2 + q3_2_pow4 = ((3 / 2 - q)^2)^2 + q1_2_pow4 = ((1 / 2 - q)^2)^2 + + # We do not use `+=` or `-=` since these are not recognized by MuladdMacro.jl. + result = q5_2_pow4 + result = result - 5 * (q < 3 / 2) * q3_2_pow4 + result = result + 10 * (q < 1 / 2) * q1_2_pow4 + + # Zero out result if q >= 5/2 + result = ifelse(q < 5 / 2, normalization_factor(kernel, h) * result, zero(result)) + + return result end function kernel_deriv(kernel::SchoenbergQuarticSplineKernel, r::Real, h)