From 0df5d0efa07b16cfdaf64df1c8ce7a256c060e1e Mon Sep 17 00:00:00 2001 From: Abhisek Dwivedi Date: Thu, 30 Nov 2023 03:14:02 +0530 Subject: [PATCH] Handled device removal for ignored pods --- api/v1/aerospikecluster_types.go | 38 +++---------------- api/v1/zz_generated.deepcopy.go | 5 --- .../asdb.aerospike.com_aerospikeclusters.yaml | 36 ++++++++---------- controllers/rack.go | 24 ++++++++++++ controllers/reconciler.go | 37 +++++++++++++++++- ..._aerospikeclusters.asdb.aerospike.com.yaml | 36 ++++++++---------- 6 files changed, 97 insertions(+), 79 deletions(-) diff --git a/api/v1/aerospikecluster_types.go b/api/v1/aerospikecluster_types.go index 1a04b46aa..17e19f2bd 100644 --- a/api/v1/aerospikecluster_types.go +++ b/api/v1/aerospikecluster_types.go @@ -74,12 +74,6 @@ type AerospikeClusterSpec struct { //nolint:govet // for readability // RosterNodeBlockList is a list of blocked nodeIDs from roster in a strong-consistency setup // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Roster Node BlockList" RosterNodeBlockList []string `json:"rosterNodeBlockList,omitempty"` - // IgnorePodList is a list of pods that the operator will ignore while assessing cluster stability. - // Pods specified in this list are not considered part of the cluster. This is particularly useful when - // there are failed pods and the operator needs to perform certain operations on the cluster. Note that - // running pods included in this list will not be ignored. - // +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Ignore Pod List" - IgnorePodList []string `json:"ignorePodList,omitempty"` } type SeedsFinderServices struct { @@ -283,10 +277,12 @@ type RackConfig struct { //nolint:govet // for readability // RollingUpdateBatchSize is the percentage/number of rack pods that will be restarted simultaneously // +optional RollingUpdateBatchSize *intstr.IntOrString `json:"rollingUpdateBatchSize,omitempty"` - // MaxIgnorableFailedPods is the maximum percentage/number of rack failed pods that will be ignored while assessing - // cluster stability. Failed pods identified using this value are not considered part of the cluster. - // This is particularly useful when there are failed pods and the operator needs to perform certain operations on - // the cluster. + // MaxIgnorableFailedPods is the maximum percentage/number of rack pods that are in pending state due to scheduling + // issues. They are ignored while assessing cluster stability. Failed/pending pods identified using this value are + // not considered part of the cluster. + // This is particularly useful when there are failed/pending pods that cannot be recovered by updating the CR and + // the operator needs to perform certain operations on the cluster like Aerospike config change. + // Reset this value to 0 after the deployment is done, to avoid unintended consequences. // +optional MaxIgnorableFailedPods *intstr.IntOrString `json:"maxIgnorableFailedPods,omitempty"` } @@ -955,17 +951,6 @@ func CopySpecToStatus(spec *AerospikeClusterSpec) (*AerospikeClusterStatusSpec, status.RosterNodeBlockList = rosterNodeBlockList } - // IgnorePodList - if len(spec.IgnorePodList) != 0 { - var ignorePodList []string - - lib.DeepCopy( - &ignorePodList, &spec.IgnorePodList, - ) - - status.IgnorePodList = ignorePodList - } - return &status, nil } @@ -1056,16 +1041,5 @@ func CopyStatusToSpec(status *AerospikeClusterStatusSpec) (*AerospikeClusterSpec spec.RosterNodeBlockList = rosterNodeBlockList } - // IgnorePodList - if len(status.IgnorePodList) != 0 { - var ignorePodList []string - - lib.DeepCopy( - &ignorePodList, &status.IgnorePodList, - ) - - spec.IgnorePodList = ignorePodList - } - return &spec, nil } diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index bbbf11e1f..892f93d8b 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -183,11 +183,6 @@ func (in *AerospikeClusterSpec) DeepCopyInto(out *AerospikeClusterSpec) { *out = make([]string, len(*in)) copy(*out, *in) } - if in.IgnorePodList != nil { - in, out := &in.IgnorePodList, &out.IgnorePodList - *out = make([]string, len(*in)) - copy(*out, *in) - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AerospikeClusterSpec. diff --git a/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml b/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml index 78842d978..dda91cbf1 100644 --- a/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml +++ b/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml @@ -277,16 +277,6 @@ spec: - customInterface type: string type: object - ignorePodList: - description: IgnorePodList is a list of pods that the operator will - ignore while assessing cluster stability. Pods specified in this - list are not considered part of the cluster. This is particularly - useful when there are failed pods and the operator needs to perform - certain operations on the cluster. Note that running pods included - in this list will not be ignored. - items: - type: string - type: array image: description: Aerospike server image type: string @@ -4608,11 +4598,14 @@ spec: - type: integer - type: string description: MaxIgnorableFailedPods is the maximum percentage/number - of rack failed pods that will be ignored while assessing cluster - stability. Failed pods identified using this value are not considered - part of the cluster. This is particularly useful when there - are failed pods and the operator needs to perform certain operations - on the cluster. + of rack pods that are in pending state due to scheduling issues. + They are ignored while assessing cluster stability. Failed/pending + pods identified using this value are not considered part of + the cluster. This is particularly useful when there are failed/pending + pods that cannot be recovered by updating the CR and the operator + needs to perform certain operations on the cluster like Aerospike + config change. Reset this value to 0 after the deployment is + done, to avoid unintended consequences. x-kubernetes-int-or-string: true namespaces: description: List of Aerospike namespaces for which rack feature @@ -13366,11 +13359,14 @@ spec: - type: integer - type: string description: MaxIgnorableFailedPods is the maximum percentage/number - of rack failed pods that will be ignored while assessing cluster - stability. Failed pods identified using this value are not considered - part of the cluster. This is particularly useful when there - are failed pods and the operator needs to perform certain operations - on the cluster. + of rack pods that are in pending state due to scheduling issues. + They are ignored while assessing cluster stability. Failed/pending + pods identified using this value are not considered part of + the cluster. This is particularly useful when there are failed/pending + pods that cannot be recovered by updating the CR and the operator + needs to perform certain operations on the cluster like Aerospike + config change. Reset this value to 0 after the deployment is + done, to avoid unintended consequences. x-kubernetes-int-or-string: true namespaces: description: List of Aerospike namespaces for which rack feature diff --git a/controllers/rack.go b/controllers/rack.go index aa6f0670e..d2b6d5703 100644 --- a/controllers/rack.go +++ b/controllers/rack.go @@ -419,6 +419,30 @@ func (r *SingleClusterReconciler) upgradeOrRollingRestartRack(found *appsv1.Stat } } + podList, err := r.getOrderedRackPodList(rackState.Rack.ID) + if err != nil { + return found, reconcileError(fmt.Errorf("failed to list pods: %v", err)) + } + + // Filter ignoredPods to update their dirtyVolumes in the status. + // IgnoredPods are skipped from upgrade/rolling restart, and as a result in case of device removal, dirtyVolumes + // are not updated in their pod status. This makes devices un-reusable as they cannot be cleaned up during init phase. + // So, explicitly add dirtyVolumes for ignoredPods, so that they can be cleaned in the init phase. + var ignoredPod []*corev1.Pod + + for idx := range podList { + pod := podList[idx] + if ignorablePodNames.Has(pod.Name) { + ignoredPod = append(ignoredPod, pod) + } + } + + if len(ignoredPod) > 0 { + if err := r.handleNSOrDeviceRemoval(rackState, ignoredPod); err != nil { + return found, reconcileError(err) + } + } + return found, reconcileSuccess() } diff --git a/controllers/reconciler.go b/controllers/reconciler.go index 7d97e0437..bc8fbf3fd 100644 --- a/controllers/reconciler.go +++ b/controllers/reconciler.go @@ -179,8 +179,8 @@ func (r *SingleClusterReconciler) Reconcile() (ctrl.Result, error) { // Use policy from spec after setting up access control policy := r.getClientPolicy() - // revert migrate-fill-delay to original value if it was set to 0 during scale down - // Passing first rack from the list as all the racks will have same migrate-fill-delay + // Revert migrate-fill-delay to original value if it was set to 0 during scale down. + // Passing the first rack from the list as all the racks will have the same migrate-fill-delay // Redundant safe check to revert migrate-fill-delay if previous revert operation missed/skipped somehow if res := r.setMigrateFillDelay( policy, &r.aeroCluster.Spec.RackConfig.Racks[0].AerospikeConfig, @@ -216,6 +216,39 @@ func (r *SingleClusterReconciler) Reconcile() (ctrl.Result, error) { return reconcile.Result{}, err } + podList, gErr := r.getClusterPodList() + if gErr != nil { + r.Log.Error(gErr, "Failed to get cluster pod list") + return reconcile.Result{}, gErr + } + + r.Log.Info("Try to recover failed/pending pods if any") + + var anyPodFailed bool + // Try to recover failed/pending pods by deleting them + for idx := range podList.Items { + if cErr := utils.CheckPodFailed(&podList.Items[idx]); cErr != nil { + anyPodFailed = true + + if err := r.createOrUpdatePodServiceIfNeeded([]string{podList.Items[idx].Name}); err != nil { + return reconcile.Result{}, err + } + + if err := r.Client.Delete(context.TODO(), &podList.Items[idx]); err != nil { + r.Log.Error(err, "Failed to delete pod", "pod", podList.Items[idx].Name) + return reconcile.Result{}, err + } + + r.Log.Info("Deleted pod", "pod", podList.Items[idx].Name) + } + } + + if anyPodFailed { + return reconcile.Result{Requeue: true}, nil + } + + r.Log.Info("Reconcile completed successfully") + return reconcile.Result{}, nil } diff --git a/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml b/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml index 78842d978..dda91cbf1 100644 --- a/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml +++ b/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml @@ -277,16 +277,6 @@ spec: - customInterface type: string type: object - ignorePodList: - description: IgnorePodList is a list of pods that the operator will - ignore while assessing cluster stability. Pods specified in this - list are not considered part of the cluster. This is particularly - useful when there are failed pods and the operator needs to perform - certain operations on the cluster. Note that running pods included - in this list will not be ignored. - items: - type: string - type: array image: description: Aerospike server image type: string @@ -4608,11 +4598,14 @@ spec: - type: integer - type: string description: MaxIgnorableFailedPods is the maximum percentage/number - of rack failed pods that will be ignored while assessing cluster - stability. Failed pods identified using this value are not considered - part of the cluster. This is particularly useful when there - are failed pods and the operator needs to perform certain operations - on the cluster. + of rack pods that are in pending state due to scheduling issues. + They are ignored while assessing cluster stability. Failed/pending + pods identified using this value are not considered part of + the cluster. This is particularly useful when there are failed/pending + pods that cannot be recovered by updating the CR and the operator + needs to perform certain operations on the cluster like Aerospike + config change. Reset this value to 0 after the deployment is + done, to avoid unintended consequences. x-kubernetes-int-or-string: true namespaces: description: List of Aerospike namespaces for which rack feature @@ -13366,11 +13359,14 @@ spec: - type: integer - type: string description: MaxIgnorableFailedPods is the maximum percentage/number - of rack failed pods that will be ignored while assessing cluster - stability. Failed pods identified using this value are not considered - part of the cluster. This is particularly useful when there - are failed pods and the operator needs to perform certain operations - on the cluster. + of rack pods that are in pending state due to scheduling issues. + They are ignored while assessing cluster stability. Failed/pending + pods identified using this value are not considered part of + the cluster. This is particularly useful when there are failed/pending + pods that cannot be recovered by updating the CR and the operator + needs to perform certain operations on the cluster like Aerospike + config change. Reset this value to 0 after the deployment is + done, to avoid unintended consequences. x-kubernetes-int-or-string: true namespaces: description: List of Aerospike namespaces for which rack feature