Skip to content

Commit

Permalink
CA-402326: Fetch SM records from the pool in one go to avoid race (#6127
Browse files Browse the repository at this point in the history
)

Previously the SM feature check was done in two parts, fetch all the SM
ref from the coordinator, and then fetch their information such as types
and features from the coordinator basedon the refs. This can cause race
conditions, where the previously fetched refs might have been deleted
when we fetch the SM features. The deletion might happen due to the way
SM registeration works for shared SRs such as GFS2, where each
`PBD.plug` will trigger a deregister of existing SM and re-register
(which will delete existing SMs), and create another (very likely)
identical SM.

To avoid this race, instead of fetch SM refs and their features
separately, do this in one go so we get a consistent snapshot of the db
state.
  • Loading branch information
Vincent-lau authored Nov 21, 2024
2 parents 77dd474 + 630aead commit 7c62ede
Showing 1 changed file with 16 additions and 12 deletions.
28 changes: 16 additions & 12 deletions ocaml/xapi/xapi_pool.ml
Original file line number Diff line number Diff line change
Expand Up @@ -840,22 +840,27 @@ let pre_join_checks ~__context ~rpc ~session_id ~force =
)
in
let assert_sm_features_compatible () =
debug
"%s Checking whether SM features on the joining host is compatible with \
the pool"
__FUNCTION__ ;
(* We consider the case where the existing pool has FOO/m, and the candidate having FOO/n,
where n >= m, to be compatible. Not vice versa. *)
let features_compatible coor_features candidate_features =
(* The pool features must not be reduced or downgraded, although it is fine
the other way around. *)
Smint.compat_features coor_features candidate_features = coor_features
in

let master_sms = Client.SM.get_all ~rpc ~session_id in
let pool_sms = Client.SM.get_all_records ~rpc ~session_id in
List.iter
(fun sm ->
let master_sm_type = Client.SM.get_type ~rpc ~session_id ~self:sm in
(fun (sm_ref, sm_rec) ->
let pool_sm_type = sm_rec.API.sM_type in
debug "%s Checking SM %s of name %s in the pool" __FUNCTION__
(Ref.string_of sm_ref) sm_rec.sM_name_label ;
let candidate_sm_ref, candidate_sm_rec =
match
Db.SM.get_records_where ~__context
~expr:(Eq (Field "type", Literal master_sm_type))
~expr:(Eq (Field "type", Literal pool_sm_type))
with
| [(sm_ref, sm_rec)] ->
(sm_ref, sm_rec)
Expand All @@ -864,25 +869,24 @@ let pre_join_checks ~__context ~rpc ~session_id ~force =
Api_errors.(
Server_error
( pool_joining_sm_features_incompatible
, [Ref.string_of sm; ""]
, [Ref.string_of sm_ref; ""]
)
)
in

let coor_sm_features =
Client.SM.get_features ~rpc ~session_id ~self:sm
in
let pool_sm_features = sm_rec.sM_features in

let candidate_sm_features = candidate_sm_rec.API.sM_features in
if not (features_compatible coor_sm_features candidate_sm_features) then
if not (features_compatible pool_sm_features candidate_sm_features) then
raise
Api_errors.(
Server_error
( pool_joining_sm_features_incompatible
, [Ref.string_of sm; Ref.string_of candidate_sm_ref]
, [Ref.string_of sm_ref; Ref.string_of candidate_sm_ref]
)
)
)
master_sms
pool_sms
in

(* call pre-join asserts *)
Expand Down

0 comments on commit 7c62ede

Please sign in to comment.