Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CA-403422: lengthen the timeout for xenopsd's serialized tasks #6192

Merged
merged 2 commits into from
Jan 2, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 19 additions & 12 deletions ocaml/xenopsd/lib/xenops_server.ml
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,15 @@ let rec name_of_atomic = function
| Best_effort atomic ->
Printf.sprintf "Best_effort (%s)" (name_of_atomic atomic)

let rec atomic_expires_after = function
| Serial (_, _, ops) ->
List.map atomic_expires_after ops |> List.fold_left ( +. ) 0.
| Parallel (_, _, ops) ->
List.map atomic_expires_after ops |> List.fold_left Float.max 0.
| _ ->
(* 20 minutes, in seconds *)
1200.

type vm_migrate_op = {
vmm_id: Vm.id
; vmm_vdi_map: (string * string) list
Expand Down Expand Up @@ -1848,7 +1857,7 @@ let with_tracing ~name ~task f =
warn "Failed to start tracing: %s" (Printexc.to_string e) ;
f ()

let rec perform_atomic ~progress_callback ?subtask:_ ?result (op : atomic)
let rec perform_atomic ~progress_callback ?result (op : atomic)
(t : Xenops_task.task_handle) : unit =
let module B = (val get_backend () : S) in
with_tracing ~name:(name_of_atomic op) ~task:t @@ fun () ->
Expand Down Expand Up @@ -2341,16 +2350,17 @@ and queue_atomics_and_wait ~progress_callback ~max_parallel_atoms dbg id ops =
let atom_id =
Printf.sprintf "%s.chunk=%d.atom=%d" id chunk_idx atom_idx
in
queue_atomic_int ~progress_callback dbg atom_id op
(queue_atomic_int ~progress_callback dbg atom_id op, op)
)
ops
in
let timeout_start = Unix.gettimeofday () in
List.map
(fun task ->
(fun (task, op) ->
let task_id = Xenops_task.id_of_handle task in
let expiration = atomic_expires_after op in
let completion =
event_wait updates task ~from ~timeout_start 1200.0
event_wait updates task ~from ~timeout_start expiration
(is_task task_id) task_ended
in
(task_id, task, completion)
Expand Down Expand Up @@ -2386,7 +2396,7 @@ let perform_atomics atomics t =
progress_callback progress (weight /. total_weight) t
in
debug "Performing: %s" (string_of_atomic x) ;
perform_atomic ~subtask:(string_of_atomic x) ~progress_callback x t ;
perform_atomic ~progress_callback x t ;
progress_callback 1. ;
progress +. (weight /. total_weight)
)
Expand Down Expand Up @@ -2520,8 +2530,7 @@ and trigger_cleanup_after_failure_atom op t =
| VM_import_metadata _ ->
()

and perform_exn ?subtask ?result (op : operation) (t : Xenops_task.task_handle)
: unit =
and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit =
let module B = (val get_backend () : S) in
with_tracing ~name:(name_of_operation op) ~task:t @@ fun () ->
match op with
Expand Down Expand Up @@ -2648,9 +2657,7 @@ and perform_exn ?subtask ?result (op : operation) (t : Xenops_task.task_handle)
(id, vm.Vm.memory_dynamic_min, vm.Vm.memory_dynamic_min)
in
let (_ : unit) =
perform_atomic ~subtask:(string_of_atomic atomic)
~progress_callback:(fun _ -> ())
atomic t
perform_atomic ~progress_callback:(fun _ -> ()) atomic t
in
(* Waiting here is not essential but adds a degree of safety and
reducess unnecessary memory copying. *)
Expand Down Expand Up @@ -3162,7 +3169,7 @@ and perform_exn ?subtask ?result (op : operation) (t : Xenops_task.task_handle)
VUSB_DB.signal id
| Atomic op ->
let progress_callback = progress_callback 0. 1. t in
perform_atomic ~progress_callback ?subtask ?result op t
perform_atomic ~progress_callback ?result op t

and verify_power_state op =
let module B = (val get_backend () : S) in
Expand Down Expand Up @@ -3191,7 +3198,7 @@ and perform ?subtask ?result (op : operation) (t : Xenops_task.task_handle) :
unit =
let one op =
verify_power_state op ;
try perform_exn ?subtask ?result op t
try perform_exn ?result op t
with e ->
Backtrace.is_important e ;
info "Caught %s executing %s: triggering cleanup actions"
Expand Down
Loading