From f02c9e7cfe3e41feef5cfc3dbef77184d00696f8 Mon Sep 17 00:00:00 2001
From: Vincent Liu <shuntian.liu2@cloud.com>
Date: Mon, 17 Jun 2024 17:10:49 +0100
Subject: [PATCH] CA-394109: Reduce number of alerts

Currently there are two ways of raising an alert when a cluster host
joins/leaves the cluster: 1. through the api call such as
cluster-host-leave; 2. through the cluster watcher which checks for
updates from `corosync-notifyd`. This will cause all alerts to be raised
twice.

This patch favours the second approach as it accounts for "unclean"
leave of a cluster host. Moreover, a "clean" leave triggered by an API
will cause a change in `corosync-notifyd` which will get detected by the
watcher anyway. This solves the double alerting problem.

Signed-off-by: Vincent Liu <shuntian.liu2@cloud.com>
---
 ocaml/xapi/xapi_cluster_host.ml | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/ocaml/xapi/xapi_cluster_host.ml b/ocaml/xapi/xapi_cluster_host.ml
index de14b266a96..11730e32e72 100644
--- a/ocaml/xapi/xapi_cluster_host.ml
+++ b/ocaml/xapi/xapi_cluster_host.ml
@@ -13,7 +13,6 @@
  *)
 
 open Xapi_clustering
-open Xapi_cluster_helpers
 open Ipaddr_rpc_type
 
 module D = Debug.Make (struct let name = "xapi_cluster_host" end)
@@ -55,19 +54,6 @@ let call_api_function_with_alert ~__context ~msg ~cls ~obj_uuid ~body
         raise err
   )
 
-let alert_for_cluster_host ~__context ~cluster_host ~missing_hosts ~new_hosts =
-  let num_hosts = Db.Cluster_host.get_all ~__context |> List.length in
-  let cluster = Db.Cluster_host.get_cluster ~__context ~self:cluster_host in
-  let quorum = Db.Cluster.get_quorum ~__context ~self:cluster |> Int64.to_int in
-  maybe_generate_alert ~__context ~missing_hosts ~new_hosts ~num_hosts ~quorum
-
-let alert_for_cluster_host_leave ~__context ~cluster_host =
-  alert_for_cluster_host ~__context ~cluster_host ~missing_hosts:[cluster_host]
-    ~new_hosts:[]
-
-let alert_for_cluster_host_join ~__context ~cluster_host =
-  alert_for_cluster_host ~__context ~cluster_host ~missing_hosts:[]
-    ~new_hosts:[cluster_host]
 
 (* Create xapi db object for cluster_host, resync_host calls clusterd *)
 let create_internal ~__context ~cluster ~host ~pIF : API.ref_Cluster_host =
@@ -81,7 +67,6 @@ let create_internal ~__context ~cluster ~host ~pIF : API.ref_Cluster_host =
         ~enabled:false ~current_operations:[] ~allowed_operations:[]
         ~other_config:[] ~joined:false ~live:false
         ~last_update_live:API.Date.epoch ;
-      alert_for_cluster_host_join ~__context ~cluster_host:ref ;
       ref
   )
 
@@ -274,14 +259,12 @@ let destroy_op ~__context ~self ~force =
       let result = local_fn (rpc ~__context) dbg in
       match Idl.IdM.run @@ Cluster_client.IDL.T.get result with
       | Ok () ->
-          alert_for_cluster_host_leave ~__context ~cluster_host:self ;
           Db.Cluster_host.destroy ~__context ~self ;
           debug "Cluster_host.%s was successful" fn_str ;
           Xapi_clustering.Daemon.disable ~__context
       | Error error ->
           warn "Error occurred during Cluster_host.%s" fn_str ;
           if force then (
-            alert_for_cluster_host_leave ~__context ~cluster_host:self ;
             let ref_str = Ref.string_of self in
             Db.Cluster_host.destroy ~__context ~self ;
             debug "Cluster_host %s force destroyed." ref_str
@@ -329,7 +312,6 @@ let forget ~__context ~self =
           Db.Cluster.set_pending_forget ~__context ~self:cluster ~value:[] ;
           (* must not disable the daemon here, because we declared another unreachable node dead,
            * not the current one *)
-          alert_for_cluster_host_leave ~__context ~cluster_host:self ;
           Db.Cluster_host.destroy ~__context ~self ;
           debug "Cluster_host.forget was successful"
       | Error error ->