From e202bfe0c5b63a199679e5014666925c7031859c Mon Sep 17 00:00:00 2001 From: Abhradeep Chakraborty Date: Thu, 22 Aug 2024 10:40:28 +0530 Subject: [PATCH] fix: failover if master pod is not ready Signed-off-by: Abhradeep Chakraborty --- .../dragonfly_pod_lifecycle_controller.go | 32 ++++--------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/internal/controller/dragonfly_pod_lifecycle_controller.go b/internal/controller/dragonfly_pod_lifecycle_controller.go index 7ee6f74..148b9d8 100644 --- a/internal/controller/dragonfly_pod_lifecycle_controller.go +++ b/internal/controller/dragonfly_pod_lifecycle_controller.go @@ -75,23 +75,16 @@ func (r *DfPodLifeCycleReconciler) Reconcile(ctx context.Context, req ctrl.Reque // Get the role of the pod role, roleExists := pod.Labels[resources.Role] if !isPodReady { - restartCount := getRestartCount(pod) if roleExists && role == "master" { - // If the master Pod is not ready and has restarted atleast once, initiate failover - if restartCount > 0 { - log.Info("Master pod is not starting after multiple attempts, initiating failover", "pod", req.NamespacedName, "restarts", restartCount) - err := dfi.configureReplication(ctx) - if err != nil { - log.Error(err, "Failed to initiate failover") - return ctrl.Result{RequeueAfter: 5 * time.Second}, err - } - return ctrl.Result{}, nil - } else { - log.Info("Master pod is not ready yet, will requeue", "pod", req.NamespacedName, "restarts", restartCount) - return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + log.Info("Master pod is not ready, initiating failover", "pod", req.NamespacedName) + err := dfi.configureReplication(ctx) + if err != nil { + log.Error(err, "Failed to initiate failover") + return ctrl.Result{RequeueAfter: 5 * time.Second}, err } + return ctrl.Result{}, nil } else { - log.Info("Pod is not ready yet", "pod", req.NamespacedName, "restarts", restartCount) + log.Info("Pod is not ready yet", "pod", req.NamespacedName) return ctrl.Result{RequeueAfter: 5 * time.Second}, nil } } @@ -196,17 +189,6 @@ func (r *DfPodLifeCycleReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, nil } -// getRestartCount fetches the restart count for the given dragonfly pod. -func getRestartCount(pod corev1.Pod) int32 { - var restartCount int32 = 0 - for _, cs := range pod.Status.ContainerStatuses { - if cs.Name == "dragonfly" { - restartCount += cs.RestartCount - } - } - return restartCount -} - // SetupWithManager sets up the controller with the Manager. func (r *DfPodLifeCycleReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr).