From ea2ca189399176f7abe106991c165322bf750ab3 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Sun, 5 May 2024 21:19:50 -0700 Subject: [PATCH 1/2] start with src/auxiliary --- src/auxiliary/math.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index 9e3aaa181bf..b05aaa07faf 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -151,10 +151,12 @@ Given ε = 1.0e-4, we use the following algorithm. [https://www.agner.org/optimize/instruction_tables.pdf](https://www.agner.org/optimize/instruction_tables.pdf) """ @inline function ln_mean(x, y) + RealT = eltype(x) epsilon_f2 = 1.0e-4 f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 if f2 < epsilon_f2 - return (x + y) / @evalpoly(f2, 2, 2/3, 2/5, 2/7) + return (x + y) / @evalpoly(f2, 2, convert(RealT, 2 / 3), convert(RealT, 2 / 5), + convert(RealT, 2 / 7)) else return (y - x) / log(y / x) end @@ -171,10 +173,12 @@ logarithmic mean is needed, by replacing a (slow) division by a (fast) multiplication. """ @inline function inv_ln_mean(x, y) + RealT = eltype(x) epsilon_f2 = 1.0e-4 f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 if f2 < epsilon_f2 - return @evalpoly(f2, 2, 2/3, 2/5, 2/7) / (x + y) + return @evalpoly(f2, 2, convert(RealT, 2 / 3), convert(RealT, 2 / 5), + convert(RealT, 2 / 7)) / (x + y) else return log(y / x) / (y - x) end From 6ab37ec18ddd5a3026b218b134362d19ef2d35ff Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Mon, 13 May 2024 18:11:34 -0700 Subject: [PATCH 2/2] leave to TODO list --- src/auxiliary/math.jl | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index b05aaa07faf..07ec02d5ef0 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -151,12 +151,22 @@ Given ε = 1.0e-4, we use the following algorithm. [https://www.agner.org/optimize/instruction_tables.pdf](https://www.agner.org/optimize/instruction_tables.pdf) """ @inline function ln_mean(x, y) - RealT = eltype(x) epsilon_f2 = 1.0e-4 f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 if f2 < epsilon_f2 - return (x + y) / @evalpoly(f2, 2, convert(RealT, 2 / 3), convert(RealT, 2 / 5), - convert(RealT, 2 / 7)) + return (x + y) / @evalpoly(f2, 2, 2/3, 2/5, 2/7) + else + return (y - x) / log(y / x) + end +end + +# TODO: Performance tuning for `ln_mean` (Float32), please find below +@inline function ln_mean(x::Float32, y::Float32) + epsilon_f2 = 1.0f-4 # cut-off value to be changed for Float32 + f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 + if f2 < epsilon_f2 + # Taylor expansion to be changed for Float32 + return (x + y) / @evalpoly(f2, 2.0f0, 2.0f0/3, 2.0f0/5, 2.0f0/7) else return (y - x) / log(y / x) end @@ -173,12 +183,22 @@ logarithmic mean is needed, by replacing a (slow) division by a (fast) multiplication. """ @inline function inv_ln_mean(x, y) - RealT = eltype(x) epsilon_f2 = 1.0e-4 f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 if f2 < epsilon_f2 - return @evalpoly(f2, 2, convert(RealT, 2 / 3), convert(RealT, 2 / 5), - convert(RealT, 2 / 7)) / (x + y) + return @evalpoly(f2, 2, 2/3, 2/5, 2/7) / (x + y) + else + return log(y / x) / (y - x) + end +end + +# TODO: Performance tuning for `inv_ln_mean` (Float32), please find below +@inline function inv_ln_mean(x::Float32, y::Float32) + epsilon_f2 = 1.0f-4 # cut-off value to be changed for Float32 + f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 + if f2 < epsilon_f2 + # Taylor expansion to be changed for Float32 + return @evalpoly(f2, 2.0f0, 2.0f0/3, 2.0f0/5, 2.0f0/7) / (x + y) else return log(y / x) / (y - x) end