From 055c81e6e9bbaa7591d254fa9ac776409f560419 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Fri, 2 Aug 2024 18:30:28 +0200 Subject: [PATCH] Add preference to disable Polyester usage Co-authored-by: Hendrik Ranocha --- src/Trixi.jl | 1 + src/auxiliary/auxiliary.jl | 51 ++++++++++++++++------------------- src/auxiliary/math.jl | 15 +++++++++++ src/callbacks_step/summary.jl | 3 +++ src/solvers/dg.jl | 2 +- 5 files changed, 43 insertions(+), 29 deletions(-) diff --git a/src/Trixi.jl b/src/Trixi.jl index 1a509ed92d1..f4290c9a885 100644 --- a/src/Trixi.jl +++ b/src/Trixi.jl @@ -80,6 +80,7 @@ using Preferences: @load_preference, set_preferences! const _PREFERENCE_SQRT = @load_preference("sqrt", "sqrt_Trixi_NaN") const _PREFERENCE_LOG = @load_preference("log", "log_Trixi_NaN") +const _PREFERENCE_POLYESTER = parse(Bool, @load_preference("polyester", "true")) # finite difference SBP operators using SummationByPartsOperators: AbstractDerivativeOperator, diff --git a/src/auxiliary/auxiliary.jl b/src/auxiliary/auxiliary.jl index 6259e936737..97263405d2a 100644 --- a/src/auxiliary/auxiliary.jl +++ b/src/auxiliary/auxiliary.jl @@ -204,36 +204,31 @@ Some discussion can be found at [https://discourse.julialang.org/t/overhead-of-t and [https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-remove-the-overhead/58435](https://discourse.julialang.org/t/threads-threads-with-one-thread-how-to-remove-the-overhead/58435). """ macro threaded(expr) - # Use `esc(quote ... end)` for nested macro calls as suggested in - # https://github.com/JuliaLang/julia/issues/23221 - # - # The following code is a simple version using only `Threads.@threads` from the - # standard library with an additional check whether only a single thread is used - # to reduce some overhead (and allocations) for serial execution. - # - # return esc(quote - # let - # if Threads.nthreads() == 1 - # $(expr) - # else - # Threads.@threads $(expr) - # end - # end - # end) - # - # However, the code below using `@batch` from Polyester.jl is more efficient, - # since this packages provides threads with less overhead. Since it is written - # by Chris Elrod, the author of LoopVectorization.jl, we expect this package - # to provide the most efficient and useful implementation of threads (as we use - # them) available in Julia. # !!! danger "Heisenbug" # Look at the comments for `wrap_array` when considering to change this macro. - - # By using `Trixi.@batch` we allow users of Trixi.jl to use `@threaded` without having - # Polyester.jl in their namespace. - return esc(quote - Trixi.@batch $(expr) - end) + expr = if _PREFERENCE_POLYESTER + # Currently using `@batch` from Polyester.jl is more efficient, + # bypasses the Julia task scheduler and provides parallelization with less overhead. + quote + $Trixi.@batch $(expr) + end + else + # The following code is a simple version using only `Threads.@threads` from the + # standard library with an additional check whether only a single thread is used + # to reduce some overhead (and allocations) for serial execution. + quote + let + if $Threads.nthreads() == 1 + $(expr) + else + $Threads.@threads :static $(expr) + end + end + end + end + # Use `esc(quote ... end)` for nested macro calls as suggested in + # https://github.com/JuliaLang/julia/issues/23221 + return esc(expr) end """ diff --git a/src/auxiliary/math.jl b/src/auxiliary/math.jl index 9e3aaa181bf..a102dc4a71c 100644 --- a/src/auxiliary/math.jl +++ b/src/auxiliary/math.jl @@ -7,6 +7,21 @@ const TRIXI_UUID = UUID("a7f1ee26-1774-49b1-8366-f1abc58fbfcb") +""" + Trixi.set_polyester(toggle::Bool; force = true) + +Toggle the usage of [Polyester.jl](https://github.com/JuliaSIMD/Polyester.jl) for multithreading. +By default, Polyester.jl is enabled, but it can +be useful for performance comparisons to switch to the Julia core backend. + +This does not fully disable Polyester.jl, +buy only its use as part of Trixi.jl's `@threaded` macro. +""" +function set_polyester(toggle::Bool; force = true) + set_preferences!(TRIXI_UUID, "polyester" => string(toggle), force = force) + @info "Please restart Julia and reload Trixi.jl for the `polyester` change to take effect" +end + """ Trixi.set_sqrt_type(type; force = true) diff --git a/src/callbacks_step/summary.jl b/src/callbacks_step/summary.jl index 21c7fc780a5..465cc10a310 100644 --- a/src/callbacks_step/summary.jl +++ b/src/callbacks_step/summary.jl @@ -207,6 +207,9 @@ function initialize_summary_callback(cb::DiscreteCallback, u, t, integrator; # technical details setup = Pair{String, Any}["#threads" => Threads.nthreads()] + if !_PREFERENCE_POLYESTER + push!(setup, "Polyester" => "disabled") + end if mpi_isparallel() push!(setup, "#MPI ranks" => mpi_nranks()) diff --git a/src/solvers/dg.jl b/src/solvers/dg.jl index fb4c8f182e0..628e39e6a87 100644 --- a/src/solvers/dg.jl +++ b/src/solvers/dg.jl @@ -629,7 +629,7 @@ end # since LoopVectorization does not support `ForwardDiff.Dual`s. Hence, we use # optimized `PtrArray`s whenever possible and fall back to plain `Array`s # otherwise. - if LoopVectorization.check_args(u_ode) + if _PREFERENCE_POLYESTER && LoopVectorization.check_args(u_ode) # This version using `PtrArray`s from StrideArrays.jl is very fast and # does not result in allocations. #