From f387d52bf87571db21dcc467e8da203444bd03c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= Date: Mon, 14 Aug 2023 15:24:11 +0100 Subject: [PATCH] CA-380551: HA: assert that the HA SR is big enough for BOTH the statefile AND the redo log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously only the statefile size was checked, and available space for the database VDI was only checked during DR in the metadata VDI code, but not during HA in the redolog code. With this change XenCenter is able to grey out SRs that are too small. After this change an HA heartbeat SR will have a physical utilization of `3997171712`, which is as close as we can get to 4GB (the next 4MiB increment would go over 4GB). Signed-off-by: Edwin Török --- ocaml/database/redo_log.ml | 13 +++++++++++-- ocaml/xapi/xha_statefile.ml | 31 ++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/ocaml/database/redo_log.ml b/ocaml/database/redo_log.ml index 23f662c3428..663b06bbf3e 100644 --- a/ocaml/database/redo_log.ml +++ b/ocaml/database/redo_log.ml @@ -48,8 +48,17 @@ let mib megabytes = let ( ** ) = Int64.mul in Int64.of_int megabytes ** 1024L ** 1024L -(* Make sure we have plenty of room for the database *) -let minimum_vdi_size = mib 4096 +(* Make sure we have plenty of room for the database + There is also a 4MiB statefile, so make the sum be 4GB, which is easier to document. +*) +let minimum_vdi_size = + let ( // ) = Int64.div and ( ** ) = Int64.mul and ( -- ) = Int64.sub in + let align = mib 4 in + ((4_000_000_000L // align) + -- 2L + (* -2 because we also need room for a statefile, and an 'empty' SR seems to have a utilization of 4MiB *) + ) + ** align let redo_log_sm_config = [("type", "raw")] diff --git a/ocaml/xapi/xha_statefile.ml b/ocaml/xapi/xha_statefile.ml index 063b9dbcc47..357ad1bd6b2 100644 --- a/ocaml/xapi/xha_statefile.ml +++ b/ocaml/xapi/xha_statefile.ml @@ -29,34 +29,42 @@ open Client (** Return the minimum size of an HA statefile, as of XenServer HA state-file description vsn 1.3 *) -let minimum_size = +let minimum_statefile_size = let ( ** ) = Int64.mul and ( ++ ) = Int64.add in let global_section_size = 4096L and host_section_size = 4096L and maximum_number_of_hosts = 64L in global_section_size ++ (maximum_number_of_hosts ** host_section_size) -let ha_fits_sr ~__context ~sr ~typ ~minimum_size = +let round_to ~align n = Int64.(div (add n @@ sub align 1L) align |> mul align) + +(* SM doesn't actually allow us to create VDIs smaller than 4MiB, so we need to round up *) +let minimum_sr_size = + [minimum_statefile_size; Redo_log.minimum_vdi_size] + |> List.map @@ round_to ~align:Int64.(shift_left 1L 22) + |> List.fold_left Int64.add Int64.zero + +let ha_fits_sr ~__context ~what ~sr ~typ ~minimum_size = let ha_fits self = Db.VDI.get_type ~__context ~self = typ && Db.VDI.get_virtual_size ~__context ~self >= minimum_size in match List.filter ha_fits (Db.SR.get_VDIs ~__context ~self:sr) with | x :: _ -> - debug "Would re-use existing statefile: %s" + debug "Would re-use existing %s: %s" what (Db.VDI.get_uuid ~__context ~self:x) ; Some x | [] -> - debug - "no suitable existing statefile found; would have to create a fresh one" ; + debug "no suitable existing %s found; would have to create a fresh one" + what ; let self = sr in let size = Db.SR.get_physical_size ~__context ~self in let utilisation = Db.SR.get_physical_utilisation ~__context ~self in let free_space = Int64.sub size utilisation in - if free_space < minimum_size then ( + if free_space < minimum_sr_size then ( let sr = Ref.string_of sr in info "%s: SR %s size=%Ld utilisation=%Ld free=%Ld needed=%Ld" - __FUNCTION__ sr size utilisation free_space minimum_size ; + __FUNCTION__ sr size utilisation free_space minimum_sr_size ; raise (Api_errors.Server_error (Api_errors.sr_source_space_insufficient, [sr]) @@ -126,12 +134,17 @@ let check_sr_can_host_statefile ~__context ~sr ~cluster_stack = (Api_errors.Server_error (Api_errors.sr_operation_not_supported, [Ref.string_of sr]) ) ; - ha_fits_sr ~__context ~sr ~minimum_size ~typ:`ha_statefile + ha_fits_sr ~__context ~what:"statefile" ~sr + ~minimum_size:minimum_statefile_size ~typ:`ha_statefile let assert_sr_can_host_statefile ~__context ~sr ~cluster_stack = let (_ : 'a option) = check_sr_can_host_statefile ~__context ~sr ~cluster_stack in + let (_ : _ option) = + ha_fits_sr ~__context ~what:"redo-log" ~sr + ~minimum_size:Redo_log.minimum_vdi_size ~typ:`redo_log + in () let list_srs_which_can_host_statefile ~__context ~cluster_stack = @@ -146,7 +159,7 @@ let list_srs_which_can_host_statefile ~__context ~cluster_stack = let create ~__context ~sr ~cluster_stack = assert_sr_can_host_statefile ~__context ~sr ~cluster_stack ; - let size = minimum_size in + let size = minimum_statefile_size in Helpers.call_api_functions ~__context (fun rpc session_id -> Client.VDI.create ~rpc ~session_id ~name_label:"Statefile for HA" ~name_description:"Used for storage heartbeating" ~sR:sr