Skip to content

Commit

Permalink
CA-380551: HA: assert that the HA SR is big enough for BOTH the state…
Browse files Browse the repository at this point in the history
…file AND the redo log

Previously only the statefile size was checked, and available space for the database VDI was only checked
during DR in the metadata VDI code, but not during HA in the redolog code.

With this change XenCenter is able to grey out SRs that are too small.

After this change an HA heartbeat SR will have a physical utilization of `3997171712`, which is as close
as we can get to 4GB (the next 4MiB increment would go over 4GB).

Signed-off-by: Edwin Török <[email protected]>
  • Loading branch information
edwintorok committed Nov 6, 2023
1 parent b714a84 commit f387d52
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 11 deletions.
13 changes: 11 additions & 2 deletions ocaml/database/redo_log.ml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,17 @@ let mib megabytes =
let ( ** ) = Int64.mul in
Int64.of_int megabytes ** 1024L ** 1024L

(* Make sure we have plenty of room for the database *)
let minimum_vdi_size = mib 4096
(* Make sure we have plenty of room for the database
There is also a 4MiB statefile, so make the sum be 4GB, which is easier to document.
*)
let minimum_vdi_size =
let ( // ) = Int64.div and ( ** ) = Int64.mul and ( -- ) = Int64.sub in
let align = mib 4 in
((4_000_000_000L // align)
-- 2L
(* -2 because we also need room for a statefile, and an 'empty' SR seems to have a utilization of 4MiB *)
)
** align

let redo_log_sm_config = [("type", "raw")]

Expand Down
31 changes: 22 additions & 9 deletions ocaml/xapi/xha_statefile.ml
Original file line number Diff line number Diff line change
Expand Up @@ -29,34 +29,42 @@ open Client

(** Return the minimum size of an HA statefile, as of
XenServer HA state-file description vsn 1.3 *)
let minimum_size =
let minimum_statefile_size =
let ( ** ) = Int64.mul and ( ++ ) = Int64.add in
let global_section_size = 4096L
and host_section_size = 4096L
and maximum_number_of_hosts = 64L in
global_section_size ++ (maximum_number_of_hosts ** host_section_size)

let ha_fits_sr ~__context ~sr ~typ ~minimum_size =
let round_to ~align n = Int64.(div (add n @@ sub align 1L) align |> mul align)

(* SM doesn't actually allow us to create VDIs smaller than 4MiB, so we need to round up *)
let minimum_sr_size =
[minimum_statefile_size; Redo_log.minimum_vdi_size]
|> List.map @@ round_to ~align:Int64.(shift_left 1L 22)
|> List.fold_left Int64.add Int64.zero

let ha_fits_sr ~__context ~what ~sr ~typ ~minimum_size =
let ha_fits self =
Db.VDI.get_type ~__context ~self = typ
&& Db.VDI.get_virtual_size ~__context ~self >= minimum_size
in
match List.filter ha_fits (Db.SR.get_VDIs ~__context ~self:sr) with
| x :: _ ->
debug "Would re-use existing statefile: %s"
debug "Would re-use existing %s: %s" what
(Db.VDI.get_uuid ~__context ~self:x) ;
Some x
| [] ->
debug
"no suitable existing statefile found; would have to create a fresh one" ;
debug "no suitable existing %s found; would have to create a fresh one"
what ;
let self = sr in
let size = Db.SR.get_physical_size ~__context ~self in
let utilisation = Db.SR.get_physical_utilisation ~__context ~self in
let free_space = Int64.sub size utilisation in
if free_space < minimum_size then (
if free_space < minimum_sr_size then (
let sr = Ref.string_of sr in
info "%s: SR %s size=%Ld utilisation=%Ld free=%Ld needed=%Ld"
__FUNCTION__ sr size utilisation free_space minimum_size ;
__FUNCTION__ sr size utilisation free_space minimum_sr_size ;
raise
(Api_errors.Server_error
(Api_errors.sr_source_space_insufficient, [sr])
Expand Down Expand Up @@ -126,12 +134,17 @@ let check_sr_can_host_statefile ~__context ~sr ~cluster_stack =
(Api_errors.Server_error
(Api_errors.sr_operation_not_supported, [Ref.string_of sr])
) ;
ha_fits_sr ~__context ~sr ~minimum_size ~typ:`ha_statefile
ha_fits_sr ~__context ~what:"statefile" ~sr
~minimum_size:minimum_statefile_size ~typ:`ha_statefile

let assert_sr_can_host_statefile ~__context ~sr ~cluster_stack =
let (_ : 'a option) =
check_sr_can_host_statefile ~__context ~sr ~cluster_stack
in
let (_ : _ option) =
ha_fits_sr ~__context ~what:"redo-log" ~sr
~minimum_size:Redo_log.minimum_vdi_size ~typ:`redo_log
in
()

let list_srs_which_can_host_statefile ~__context ~cluster_stack =
Expand All @@ -146,7 +159,7 @@ let list_srs_which_can_host_statefile ~__context ~cluster_stack =

let create ~__context ~sr ~cluster_stack =
assert_sr_can_host_statefile ~__context ~sr ~cluster_stack ;
let size = minimum_size in
let size = minimum_statefile_size in
Helpers.call_api_functions ~__context (fun rpc session_id ->
Client.VDI.create ~rpc ~session_id ~name_label:"Statefile for HA"
~name_description:"Used for storage heartbeating" ~sR:sr
Expand Down

0 comments on commit f387d52

Please sign in to comment.