Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CP-52709: use timeslices shorter than 50ms #6177

Merged
merged 5 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions ocaml/libs/timeslice/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
(library
(name xapi_timeslice)
(package xapi-idl)
(libraries threads.posix mtime mtime.clock.os xapi-log)
)
48 changes: 48 additions & 0 deletions ocaml/libs/timeslice/recommended.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
(*
* Copyright (C) Cloud Software Group
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; version 2.1 only. with the special
* exception on linking described in file LICENSE.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*)

module D = Debug.Make (struct let name = "timeslice_recommended" end)

let yield_stop = Atomic.make false

let yield_worker () =
while not (Atomic.get yield_stop) do
Thread.yield ()
done

let yield_overhead () =
(* Thread.yield only has an effect if another thread exists,
so create one that yields back immediately *)
D.debug "Measuring Thread.yield overhead" ;
Atomic.set yield_stop false ;
let t = Thread.create yield_worker () in
let measured = Simple_measure.measure Thread.yield in
D.debug "Thread.yield overhead: %.6fs <= %.6fs <= %.6fs" measured.low
measured.median measured.high ;
D.debug "Waiting for worker thread to stop" ;
Atomic.set yield_stop true ;
Thread.join t ;
measured.median

let measure ?(max_overhead_percentage = 1.0) () =
let overhead = yield_overhead () in
let interval = overhead /. (max_overhead_percentage /. 100.) in
D.debug "Recommended timeslice interval = %.4fs" interval ;
(* Avoid too high or too low intervals:
do not go below 1ms (our HZ is 250, and max is 1000, the kernel would round up anyway)
do not go above 50ms (the current default in OCaml 4.14)
*)
let interval = interval |> Float.max 0.001 |> Float.min 0.050 in
D.debug "Final recommeded timeslice interval = %.4fs" interval ;
interval
22 changes: 22 additions & 0 deletions ocaml/libs/timeslice/recommended.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
(*
* Copyright (C) Cloud Software Group
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; version 2.1 only. with the special
* exception on linking described in file LICENSE.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*)

val measure : ?max_overhead_percentage:float -> unit -> float
(** [measure ?max_overhead_percentage ()] returns the recommended timeslice for the current system.

The returned value should be used in a call to {!val:Timeslice.set}.

@param max_overhead_percentage default 1%
@returns [interval] such that [overhead / interval <= max_overhead_percentage / 100]
*)
61 changes: 61 additions & 0 deletions ocaml/libs/timeslice/simple_measure.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
(*
* Copyright (C) Cloud Software Group
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; version 2.1 only. with the special
* exception on linking described in file LICENSE.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*)

(** 95% confidence interval, and median value *)
type t = {low: float; median: float; high: float}

let span_to_s s = Mtime.Span.to_float_ns s *. 1e-9

let ci95 measurements =
let n = Array.length measurements in
Array.sort Float.compare measurements ;
let median = measurements.(n / 2) in
(* "Performance Evaluation of Computer and Communication Systems", Table A. 1 *)
let n = float n in
let d = 0.98 *. sqrt n in
let lo = (n /. 2.) -. d |> Float.to_int
and hi = (n /. 2.) +. 1. +. d |> Float.ceil |> Float.to_int in
{low= measurements.(lo - 1); median; high= measurements.(hi - 1)}

let measure ?(n = 1001) ?(inner = 10) f =
if n <= 70 then (* some of the formulas below are not valid for smaller [n] *)
invalid_arg (Printf.sprintf "n must be at least 70: %d" n) ;
(* warmup *)
Sys.opaque_identity (f ()) ;

let measure_inner _ =
let m = Mtime_clock.counter () in
for _ = 1 to inner do
(* opaque_identity prevents the call from being optimized away *)
Sys.opaque_identity (f ())
done ;
let elapsed = Mtime_clock.count m in
span_to_s elapsed /. float inner
in
let measurements = Array.init n measure_inner in
ci95 measurements

let measure_min ?(n = 1001) f arg =
(* warmup *)
Sys.opaque_identity (f arg) ;
let measure_one _ =
let m = Mtime_clock.counter () in
Sys.opaque_identity (f arg) ;
let elapsed = Mtime_clock.count m in
span_to_s elapsed
in
Seq.ints 0
|> Seq.take n
|> Seq.map measure_one
|> Seq.fold_left Float.min Float.max_float
47 changes: 47 additions & 0 deletions ocaml/libs/timeslice/simple_measure.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
(*
* Copyright (C) Cloud Software Group
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; version 2.1 only. with the special
* exception on linking described in file LICENSE.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*)

(** Measure the speed of an operation in a very simple and robust way.
More detailed measurements can be dune using [Bechamel].
*)

(** 95% confidence interval, and median value *)
type t = {low: float; median: float; high: float}

val measure : ?n:int -> ?inner:int -> (unit -> unit) -> t
(** [measure ?n ?inner f] measures [n] times the duration of [inner] iterations of [f ()].

Returns the median of the inner measurements, and a 95% confidence interval.
The median is used, because it makes no assumptions about the distribution of the samples,
i.e. it doesn't require a normal (Gaussian) distribution.

The inner measurements use a simple average, because we only know the duration of [inner] iterations,
not the duration of each individual call to [f ()].
The purpose of the [inner] iterations is to reduce measurement overhead.

@param n iteration count for the outer loop, must be more than [70].
@param n iteration count for the inner loop
@param f function to measure

@raises Invalid_argument if [n<70]
*)

val measure_min : ?n:int -> ('a -> unit) -> 'a -> float
(** [measure_min ?n:int f arg] is the minimum amount of time that [f arg] takes.

This should be used when we try to measure the maximum speed of some operation (e.g. cached memory accesses),
while ignoring latencies/hickups introduced by other processes on the system.

It shouldn't be used for measuring the overhead of an operation, because the hickups may be part of that overhead.
*)
72 changes: 72 additions & 0 deletions ocaml/libs/timeslice/timeslice.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
(*
* Copyright (C) Cloud Software Group
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; version 2.1 only. with the special
* exception on linking described in file LICENSE.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*)

(* avoid allocating an extra option every time *)
edwintorok marked this conversation as resolved.
Show resolved Hide resolved
let invalid_holder = -1

let last_lock_holder = Atomic.make invalid_holder

let me () = Thread.self () |> Thread.id

let lock_acquired () =
(* these need to be very low overhead, so just keep track of the last lock holder,
i.e. track only one high-priority lock at a time
*)
Atomic.set last_lock_holder (me ())

let lock_released () = Atomic.set last_lock_holder invalid_holder

let[@inline always] am_i_holding_locks () =
let last = Atomic.get last_lock_holder in
last <> invalid_holder && last = me ()

let yield_interval = Atomic.make Mtime.Span.zero

(* TODO: use bechamel.monotonic-clock instead, which has lower overhead,
but not in the right place in xs-opam yet
*)
let last_yield = Atomic.make (Mtime_clock.counter ())

let failures = Atomic.make 0

let periodic_hook (_ : Gc.Memprof.allocation) =
let () =
try
if not (am_i_holding_locks ()) then
let elapsed = Mtime_clock.count (Atomic.get last_yield) in
if Mtime.Span.compare elapsed (Atomic.get yield_interval) > 0 then (
let now = Mtime_clock.counter () in
Atomic.set last_yield now ; Thread.yield ()
)
with _ ->
(* It is not safe to raise exceptions here, it'd require changing all code to be safe to asynchronous interrupts/exceptions,
see https://guillaume.munch.name/software/ocaml/memprof-limits/index.html#isolation
Because this is just a performance optimization, we fall back to safe behaviour: do nothing, and just keep track that we failed
*)
Atomic.incr failures
in
None

let periodic =
Gc.Memprof.
{null_tracker with alloc_minor= periodic_hook; alloc_major= periodic_hook}

let set ?(sampling_rate = 1e-4) interval =
Atomic.set yield_interval
(Mtime.Span.of_float_ns @@ (interval *. 1e9) |> Option.get) ;
Gc.Memprof.start ~sampling_rate ~callstack_size:0 periodic

let clear () =
Gc.Memprof.stop () ;
Atomic.set yield_interval Mtime.Span.zero
44 changes: 44 additions & 0 deletions ocaml/libs/timeslice/timeslice.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
(*
* Copyright (C) Cloud Software Group
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; version 2.1 only. with the special
* exception on linking described in file LICENSE.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*)

val set : ?sampling_rate:float -> float -> unit
(** [set ?sampling_rate interval] calls [Thread.yield ()] at most [interval] seconds.

The implementation of [Thread.yield] guarantees since OCaml 4.09 that we'll switch to a different OCaml thread,
if one exists that is not blocked (i.e. it doesn't rely on [sched_yield] which may run the same thread again,
but uses pthread mutexes and condition variables to ensure the current thread isn't immediately runnable).

The setting is global for the entire process, and currently uses [Gc.Memprof] to ensure that a hook function is called periodically,
although it depends on the allocation rate of the program whether it gets called at all.

Another alternative would be to use {!val:Unix.set_itimer}, but XAPI doesn't cope with [EINTR] in a lot of places,
and POSIX interval timers rely on signals to notify of elapsed time.

We could also have a dedicated thread that sleeps for a certain amount of time, but if it is an OCaml thread,
we'd have no guarantees it'd get scheduled often enough (and it couldn't interrupt other threads anyway,
by the time you'd be running the handler you already gave up running something else).

It may be desirable to avoid yielding if we are currently holding a lock, see {!val:lock_acquired}, and {!val:lock_released}
to notify this module when that happens.
*)

val clear : unit -> unit
(** [clear ()] undoes the changes made by [set].
This is useful for testing multiple timeslices in the same program. *)

val lock_acquired : unit -> unit
(** [lock_acquired ()] notifies about lock acquisition. *)

val lock_released : unit -> unit
(** [lock_acquired ()] notifies about lock release. *)
1 change: 1 addition & 0 deletions ocaml/tests/common/dune
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
xapi-stdext-date
xapi-stdext-threads.scheduler
xapi-stdext-unix
xapi_timeslice
)
)

4 changes: 3 additions & 1 deletion ocaml/tests/common/suite_init.ml
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ let harness_init () =
Filename.concat Test_common.working_area "xapi-inventory" ;
Xcp_client.use_switch := false ;
Pool_role.set_pool_role_for_test () ;
Message_forwarding.register_callback_fns ()
Message_forwarding.register_callback_fns () ;
(* for unit tests use a fixed value *)
Xapi_timeslice.Timeslice.set 0.004
1 change: 1 addition & 0 deletions ocaml/xapi-idl/lib/dune
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
unix
uri
uuidm
xapi_timeslice
xapi-backtrace
xapi-consts
xapi-log
Expand Down
26 changes: 26 additions & 0 deletions ocaml/xapi-idl/lib/xcp_service.ml
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,26 @@ let setify =
in
loop []

(** How long to let an OCaml thread run, before
switching to another thread.
This needs to be as small as possible to reduce latency.

Too small values reduce performance due to context switching overheads

4ms = 1/HZ in Dom0 seems like a good default,
a better value will be written by a boot time service.
*)
let timeslice = ref 0.05

let adjust_timeslice () =
let interval = !timeslice in
D.debug "%s: Setting timeslice to %.3fs" __FUNCTION__ interval ;
if interval >= 0.05 then
D.debug "%s: Timeslice same as or larger than OCaml's default: not setting"
__FUNCTION__
else
Xapi_timeslice.Timeslice.set interval

let common_options =
[
( "use-switch"
Expand Down Expand Up @@ -236,6 +256,11 @@ let common_options =
, (fun () -> !config_dir)
, "Location of directory containing configuration file fragments"
)
; ( "timeslice"
, Arg.Set_float timeslice
, (fun () -> Printf.sprintf "%.3f" !timeslice)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure seconds is the best unit here when we mostly talk about milliseconds. However, this has enough resolution down to 1ms.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think with the statmemprof approach we could actually go below 1ms too (for testing purposes).
I'll need to do some tests on some of the newer hardware we got to see what is the smallest timeslice that would work, so maybe I'll need to increase this to %.4f or %.5f.
But in practice I think it'd be good to avoid switching more often than 1ms, just to avoid introducing too much overhead.

, "timeslice in seconds"
)
]

let loglevel () = !log_level
Expand Down Expand Up @@ -454,6 +479,7 @@ let configure_common ~options ~resources arg_parse_fn =
failwith (String.concat "\n" lines)
)
resources ;
adjust_timeslice () ;
Sys.set_signal Sys.sigpipe Sys.Signal_ignore

let configure ?(argv = Sys.argv) ?(options = []) ?(resources = []) () =
Expand Down
Loading