From ea2ca189399176f7abe106991c165322bf750ab3 Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Sun, 5 May 2024 21:19:50 -0700 Subject: [PATCH 1/3] start with src/auxiliary --- src/auxiliary/math.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index 9e3aaa181bf..b05aaa07faf 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -151,10 +151,12 @@ Given ε = 1.0e-4, we use the following algorithm. [https://www.agner.org/optimize/instruction_tables.pdf](https://www.agner.org/optimize/instruction_tables.pdf) """ @inline function ln_mean(x, y) + RealT = eltype(x) epsilon_f2 = 1.0e-4 f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 if f2 < epsilon_f2 - return (x + y) / @evalpoly(f2, 2, 2/3, 2/5, 2/7) + return (x + y) / @evalpoly(f2, 2, convert(RealT, 2 / 3), convert(RealT, 2 / 5), + convert(RealT, 2 / 7)) else return (y - x) / log(y / x) end @@ -171,10 +173,12 @@ logarithmic mean is needed, by replacing a (slow) division by a (fast) multiplication. """ @inline function inv_ln_mean(x, y) + RealT = eltype(x) epsilon_f2 = 1.0e-4 f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 if f2 < epsilon_f2 - return @evalpoly(f2, 2, 2/3, 2/5, 2/7) / (x + y) + return @evalpoly(f2, 2, convert(RealT, 2 / 3), convert(RealT, 2 / 5), + convert(RealT, 2 / 7)) / (x + y) else return log(y / x) / (y - x) end From 6ab37ec18ddd5a3026b218b134362d19ef2d35ff Mon Sep 17 00:00:00 2001 From: huiyuxie Date: Mon, 13 May 2024 18:11:34 -0700 Subject: [PATCH 2/3] leave to TODO list --- src/auxiliary/math.jl | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index b05aaa07faf..07ec02d5ef0 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -151,12 +151,22 @@ Given ε = 1.0e-4, we use the following algorithm. [https://www.agner.org/optimize/instruction_tables.pdf](https://www.agner.org/optimize/instruction_tables.pdf) """ @inline function ln_mean(x, y) - RealT = eltype(x) epsilon_f2 = 1.0e-4 f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 if f2 < epsilon_f2 - return (x + y) / @evalpoly(f2, 2, convert(RealT, 2 / 3), convert(RealT, 2 / 5), - convert(RealT, 2 / 7)) + return (x + y) / @evalpoly(f2, 2, 2/3, 2/5, 2/7) + else + return (y - x) / log(y / x) + end +end + +# TODO: Performance tuning for `ln_mean` (Float32), please find below +@inline function ln_mean(x::Float32, y::Float32) + epsilon_f2 = 1.0f-4 # cut-off value to be changed for Float32 + f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 + if f2 < epsilon_f2 + # Taylor expansion to be changed for Float32 + return (x + y) / @evalpoly(f2, 2.0f0, 2.0f0/3, 2.0f0/5, 2.0f0/7) else return (y - x) / log(y / x) end @@ -173,12 +183,22 @@ logarithmic mean is needed, by replacing a (slow) division by a (fast) multiplication. """ @inline function inv_ln_mean(x, y) - RealT = eltype(x) epsilon_f2 = 1.0e-4 f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 if f2 < epsilon_f2 - return @evalpoly(f2, 2, convert(RealT, 2 / 3), convert(RealT, 2 / 5), - convert(RealT, 2 / 7)) / (x + y) + return @evalpoly(f2, 2, 2/3, 2/5, 2/7) / (x + y) + else + return log(y / x) / (y - x) + end +end + +# TODO: Performance tuning for `inv_ln_mean` (Float32), please find below +@inline function inv_ln_mean(x::Float32, y::Float32) + epsilon_f2 = 1.0f-4 # cut-off value to be changed for Float32 + f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 + if f2 < epsilon_f2 + # Taylor expansion to be changed for Float32 + return @evalpoly(f2, 2.0f0, 2.0f0/3, 2.0f0/5, 2.0f0/7) / (x + y) else return log(y / x) / (y - x) end From d9e0f502ddd662427f02af7f6cc81c74aa301679 Mon Sep 17 00:00:00 2001 From: Huiyu Xie Date: Mon, 20 May 2024 15:32:02 -0700 Subject: [PATCH 3/3] apply suggestions from code review Co-authored-by: Hendrik Ranocha --- src/auxiliary/math.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index 07ec02d5ef0..23a4278add1 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -160,7 +160,8 @@ Given ε = 1.0e-4, we use the following algorithm. end end -# TODO: Performance tuning for `ln_mean` (Float32), please find below +# TODO: This may need some performance tuning, e.g., to optimize the +# cutoff value `epsilon_f2` or the number of terms in the Taylor expansion @inline function ln_mean(x::Float32, y::Float32) epsilon_f2 = 1.0f-4 # cut-off value to be changed for Float32 f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2 @@ -192,7 +193,8 @@ multiplication. end end -# TODO: Performance tuning for `inv_ln_mean` (Float32), please find below +# TODO: This may need some performance tuning, e.g., to optimize the +# cutoff value `epsilon_f2` or the number of terms in the Taylor expansion @inline function inv_ln_mean(x::Float32, y::Float32) epsilon_f2 = 1.0f-4 # cut-off value to be changed for Float32 f2 = (x * (x - 2 * y) + y * y) / (x * (x + 2 * y) + y * y) # f2 = f^2