From 2989ad5165c49f96634ef15ba8fa14d07fab9797 Mon Sep 17 00:00:00 2001
From: Abhisek Dwivedi <adwivedi@aerospike.com>
Date: Thu, 30 Nov 2023 21:28:15 +0530
Subject: [PATCH] Added/modified test-cases

---
 api/v1/aerospikecluster_types.go              |  17 +-
 api/v1/aerospikecluster_validating_webhook.go |   8 +-
 api/v1/zz_generated.deepcopy.go               |   4 +-
 .../asdb.aerospike.com_aerospikeclusters.yaml |  48 ++--
 ...rnetes-operator.clusterserviceversion.yaml |   7 -
 controllers/pod.go                            |   2 +-
 controllers/rack.go                           |  28 +--
 controllers/reconciler.go                     |  48 ++--
 ..._aerospikeclusters.asdb.aerospike.com.yaml |  48 ++--
 test/aero_info.go                             |  34 +++
 test/cluster_helper.go                        |  22 ++
 test/cluster_test.go                          | 205 +++++++++++++-----
 12 files changed, 315 insertions(+), 156 deletions(-)

diff --git a/api/v1/aerospikecluster_types.go b/api/v1/aerospikecluster_types.go
index 60ef8f55b..9346610f5 100644
--- a/api/v1/aerospikecluster_types.go
+++ b/api/v1/aerospikecluster_types.go
@@ -277,14 +277,17 @@ type RackConfig struct { //nolint:govet // for readability
 	// RollingUpdateBatchSize is the percentage/number of rack pods that will be restarted simultaneously
 	// +optional
 	RollingUpdateBatchSize *intstr.IntOrString `json:"rollingUpdateBatchSize,omitempty"`
-	// MaxIgnorableFailedPods is the maximum percentage/number of rack pods that are in pending state due to scheduling
-	// issues. They are ignored while assessing cluster stability. Failed/pending pods identified using this value are
-	// not considered part of the cluster.
-	// This is particularly useful when there are failed/pending pods that cannot be recovered by updating the CR and
-	// the operator needs to perform certain operations on the cluster like Aerospike config change.
-	// Reset this value to 0 after the deployment is done, to avoid unintended consequences.
+	// MaxIgnorablePods is the maximum number/percentage of pending/failed pods in a rack that are ignored while
+	// assessing cluster stability. Pods identified using this value are not considered part of the cluster.
+	// Additionally, in SC mode clusters, these pods are removed from the roster.
+	// This is particularly useful when some pods are stuck in pending/failed state due to any scheduling issues and
+	// cannot be fixed by simply updating the CR.
+	// It enables the operator to perform specific operations on the cluster, like changing Aerospike configurations,
+	// without being hindered by these problematic pods.
+	// Remember to set MaxIgnorablePods back to 0 once the required operation is done.
+	// This makes sure that later on, all pods are properly counted when evaluating the cluster stability.
 	// +optional
-	MaxIgnorableFailedPods *intstr.IntOrString `json:"maxIgnorableFailedPods,omitempty"`
+	MaxIgnorablePods *intstr.IntOrString `json:"maxIgnorablePods,omitempty"`
 }
 
 // Rack specifies single rack config
diff --git a/api/v1/aerospikecluster_validating_webhook.go b/api/v1/aerospikecluster_validating_webhook.go
index 71c14f2d6..96d8e9454 100644
--- a/api/v1/aerospikecluster_validating_webhook.go
+++ b/api/v1/aerospikecluster_validating_webhook.go
@@ -638,10 +638,10 @@ func (c *AerospikeCluster) validateRackConfig(_ logr.Logger) error {
 		}
 	}
 
-	// Validate MaxIgnorableFailedPods param
-	if c.Spec.RackConfig.MaxIgnorableFailedPods != nil {
-		if err := validateIntOrStringField(c.Spec.RackConfig.MaxIgnorableFailedPods,
-			"spec.rackConfig.maxIgnorableFailedPods"); err != nil {
+	// Validate MaxIgnorablePods param
+	if c.Spec.RackConfig.MaxIgnorablePods != nil {
+		if err := validateIntOrStringField(c.Spec.RackConfig.MaxIgnorablePods,
+			"spec.rackConfig.maxIgnorablePods"); err != nil {
 			return err
 		}
 	}
diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go
index 76d5c6591..6696bf022 100644
--- a/api/v1/zz_generated.deepcopy.go
+++ b/api/v1/zz_generated.deepcopy.go
@@ -848,8 +848,8 @@ func (in *RackConfig) DeepCopyInto(out *RackConfig) {
 		*out = new(intstr.IntOrString)
 		**out = **in
 	}
-	if in.MaxIgnorableFailedPods != nil {
-		in, out := &in.MaxIgnorableFailedPods, &out.MaxIgnorableFailedPods
+	if in.MaxIgnorablePods != nil {
+		in, out := &in.MaxIgnorablePods, &out.MaxIgnorablePods
 		*out = new(intstr.IntOrString)
 		**out = **in
 	}
diff --git a/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml b/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml
index ca804bf58..266989bf2 100644
--- a/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml
+++ b/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml
@@ -4593,19 +4593,23 @@ spec:
                   Aerospike cluster. Pods will be deployed in given racks based on
                   given configuration
                 properties:
-                  maxIgnorableFailedPods:
+                  maxIgnorablePods:
                     anyOf:
                     - type: integer
                     - type: string
-                    description: MaxIgnorableFailedPods is the maximum percentage/number
-                      of rack pods that are in pending state due to scheduling issues.
-                      They are ignored while assessing cluster stability. Failed/pending
-                      pods identified using this value are not considered part of
-                      the cluster. This is particularly useful when there are failed/pending
-                      pods that cannot be recovered by updating the CR and the operator
-                      needs to perform certain operations on the cluster like Aerospike
-                      config change. Reset this value to 0 after the deployment is
-                      done, to avoid unintended consequences.
+                    description: MaxIgnorablePods is the maximum number/percentage
+                      of pending/failed pods in a rack that are ignored while assessing
+                      cluster stability. Pods identified using this value are not
+                      considered part of the cluster. Additionally, in SC mode clusters,
+                      these pods are removed from the roster. This is particularly
+                      useful when some pods are stuck in pending/failed state due
+                      to any scheduling issues and cannot be fixed by simply updating
+                      the CR. It enables the operator to perform specific operations
+                      on the cluster, like changing Aerospike configurations, without
+                      being hindered by these problematic pods. Remember to set MaxIgnorablePods
+                      back to 0 once the required operation is done. This makes sure
+                      that later on, all pods are properly counted when evaluating
+                      the cluster stability.
                     x-kubernetes-int-or-string: true
                   namespaces:
                     description: List of Aerospike namespaces for which rack feature
@@ -13344,19 +13348,23 @@ spec:
                   given configuration
                 nullable: true
                 properties:
-                  maxIgnorableFailedPods:
+                  maxIgnorablePods:
                     anyOf:
                     - type: integer
                     - type: string
-                    description: MaxIgnorableFailedPods is the maximum percentage/number
-                      of rack pods that are in pending state due to scheduling issues.
-                      They are ignored while assessing cluster stability. Failed/pending
-                      pods identified using this value are not considered part of
-                      the cluster. This is particularly useful when there are failed/pending
-                      pods that cannot be recovered by updating the CR and the operator
-                      needs to perform certain operations on the cluster like Aerospike
-                      config change. Reset this value to 0 after the deployment is
-                      done, to avoid unintended consequences.
+                    description: MaxIgnorablePods is the maximum number/percentage
+                      of pending/failed pods in a rack that are ignored while assessing
+                      cluster stability. Pods identified using this value are not
+                      considered part of the cluster. Additionally, in SC mode clusters,
+                      these pods are removed from the roster. This is particularly
+                      useful when some pods are stuck in pending/failed state due
+                      to any scheduling issues and cannot be fixed by simply updating
+                      the CR. It enables the operator to perform specific operations
+                      on the cluster, like changing Aerospike configurations, without
+                      being hindered by these problematic pods. Remember to set MaxIgnorablePods
+                      back to 0 once the required operation is done. This makes sure
+                      that later on, all pods are properly counted when evaluating
+                      the cluster stability.
                     x-kubernetes-int-or-string: true
                   namespaces:
                     description: List of Aerospike namespaces for which rack feature
diff --git a/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml b/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml
index 1a03a8841..ed26895a5 100644
--- a/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml
+++ b/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml
@@ -47,13 +47,6 @@ spec:
           the Aerospike cluster.
         displayName: Aerospike Network Policy
         path: aerospikeNetworkPolicy
-      - description: IgnorePodList is a list of pods that the operator will ignore
-          while assessing cluster stability. Pods specified in this list are not considered
-          part of the cluster. This is particularly useful when there are failed pods
-          and the operator needs to perform certain operations on the cluster. Note
-          that running pods included in this list will not be ignored.
-        displayName: Ignore Pod List
-        path: ignorePodList
       - description: Aerospike server image
         displayName: Server Image
         path: image
diff --git a/controllers/pod.go b/controllers/pod.go
index 4a64fb304..188b7e8c1 100644
--- a/controllers/pod.go
+++ b/controllers/pod.go
@@ -674,7 +674,7 @@ func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack,
 		rack := &configureRacks[idx]
 
 		failedAllowed, _ := intstr.GetScaledValueFromIntOrPercent(
-			r.aeroCluster.Spec.RackConfig.MaxIgnorableFailedPods, rack.Size, false,
+			r.aeroCluster.Spec.RackConfig.MaxIgnorablePods, rack.Size, false,
 		)
 
 		podList, err := r.getRackPodList(rack.Rack.ID)
diff --git a/controllers/rack.go b/controllers/rack.go
index d2b6d5703..451d1dbea 100644
--- a/controllers/rack.go
+++ b/controllers/rack.go
@@ -424,22 +424,24 @@ func (r *SingleClusterReconciler) upgradeOrRollingRestartRack(found *appsv1.Stat
 		return found, reconcileError(fmt.Errorf("failed to list pods: %v", err))
 	}
 
-	// Filter ignoredPods to update their dirtyVolumes in the status.
-	// IgnoredPods are skipped from upgrade/rolling restart, and as a result in case of device removal, dirtyVolumes
-	// are not updated in their pod status. This makes devices un-reusable as they cannot be cleaned up during init phase.
-	// So, explicitly add dirtyVolumes for ignoredPods, so that they can be cleaned in the init phase.
-	var ignoredPod []*corev1.Pod
+	if r.aeroCluster.Spec.RackConfig.MaxIgnorablePods != nil {
+		// Filter ignoredPods to update their dirtyVolumes in the status.
+		// IgnoredPods are skipped from upgrade/rolling restart, and as a result in case of device removal, dirtyVolumes
+		// are not updated in their pod status. This makes devices un-reusable as they cannot be cleaned up during init phase.
+		// So, explicitly add dirtyVolumes for ignoredPods, so that they can be cleaned in the init phase.
+		var ignoredPod []*corev1.Pod
 
-	for idx := range podList {
-		pod := podList[idx]
-		if ignorablePodNames.Has(pod.Name) {
-			ignoredPod = append(ignoredPod, pod)
+		for idx := range podList {
+			pod := podList[idx]
+			if ignorablePodNames.Has(pod.Name) {
+				ignoredPod = append(ignoredPod, pod)
+			}
 		}
-	}
 
-	if len(ignoredPod) > 0 {
-		if err := r.handleNSOrDeviceRemoval(rackState, ignoredPod); err != nil {
-			return found, reconcileError(err)
+		if len(ignoredPod) > 0 {
+			if err := r.handleNSOrDeviceRemoval(rackState, ignoredPod); err != nil {
+				return found, reconcileError(err)
+			}
 		}
 	}
 
diff --git a/controllers/reconciler.go b/controllers/reconciler.go
index bc8fbf3fd..4b1df97a9 100644
--- a/controllers/reconciler.go
+++ b/controllers/reconciler.go
@@ -216,35 +216,39 @@ func (r *SingleClusterReconciler) Reconcile() (ctrl.Result, error) {
 		return reconcile.Result{}, err
 	}
 
-	podList, gErr := r.getClusterPodList()
-	if gErr != nil {
-		r.Log.Error(gErr, "Failed to get cluster pod list")
-		return reconcile.Result{}, gErr
-	}
+	// Try to recover pods only when MaxIgnorablePods is set
+	if r.aeroCluster.Spec.RackConfig.MaxIgnorablePods != nil {
+		podList, gErr := r.getClusterPodList()
+		if gErr != nil {
+			r.Log.Error(gErr, "Failed to get cluster pod list")
+			return reconcile.Result{}, gErr
+		}
 
-	r.Log.Info("Try to recover failed/pending pods if any")
+		r.Log.Info("Try to recover failed/pending pods if any")
 
-	var anyPodFailed bool
-	// Try to recover failed/pending pods by deleting them
-	for idx := range podList.Items {
-		if cErr := utils.CheckPodFailed(&podList.Items[idx]); cErr != nil {
-			anyPodFailed = true
+		var anyPodFailed bool
+		// Try to recover failed/pending pods by deleting them
+		for idx := range podList.Items {
+			if cErr := utils.CheckPodFailed(&podList.Items[idx]); cErr != nil {
+				anyPodFailed = true
 
-			if err := r.createOrUpdatePodServiceIfNeeded([]string{podList.Items[idx].Name}); err != nil {
-				return reconcile.Result{}, err
-			}
+				if err := r.createOrUpdatePodServiceIfNeeded([]string{podList.Items[idx].Name}); err != nil {
+					return reconcile.Result{}, err
+				}
 
-			if err := r.Client.Delete(context.TODO(), &podList.Items[idx]); err != nil {
-				r.Log.Error(err, "Failed to delete pod", "pod", podList.Items[idx].Name)
-				return reconcile.Result{}, err
-			}
+				if err := r.Client.Delete(context.TODO(), &podList.Items[idx]); err != nil {
+					r.Log.Error(err, "Failed to delete pod", "pod", podList.Items[idx].Name)
+					return reconcile.Result{}, err
+				}
 
-			r.Log.Info("Deleted pod", "pod", podList.Items[idx].Name)
+				r.Log.Info("Deleted pod", "pod", podList.Items[idx].Name)
+			}
 		}
-	}
 
-	if anyPodFailed {
-		return reconcile.Result{Requeue: true}, nil
+		if anyPodFailed {
+			r.Log.Info("Found failed/pending pod(s), requeuing")
+			return reconcile.Result{Requeue: true}, nil
+		}
 	}
 
 	r.Log.Info("Reconcile completed successfully")
diff --git a/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml b/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml
index ca804bf58..266989bf2 100644
--- a/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml
+++ b/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml
@@ -4593,19 +4593,23 @@ spec:
                   Aerospike cluster. Pods will be deployed in given racks based on
                   given configuration
                 properties:
-                  maxIgnorableFailedPods:
+                  maxIgnorablePods:
                     anyOf:
                     - type: integer
                     - type: string
-                    description: MaxIgnorableFailedPods is the maximum percentage/number
-                      of rack pods that are in pending state due to scheduling issues.
-                      They are ignored while assessing cluster stability. Failed/pending
-                      pods identified using this value are not considered part of
-                      the cluster. This is particularly useful when there are failed/pending
-                      pods that cannot be recovered by updating the CR and the operator
-                      needs to perform certain operations on the cluster like Aerospike
-                      config change. Reset this value to 0 after the deployment is
-                      done, to avoid unintended consequences.
+                    description: MaxIgnorablePods is the maximum number/percentage
+                      of pending/failed pods in a rack that are ignored while assessing
+                      cluster stability. Pods identified using this value are not
+                      considered part of the cluster. Additionally, in SC mode clusters,
+                      these pods are removed from the roster. This is particularly
+                      useful when some pods are stuck in pending/failed state due
+                      to any scheduling issues and cannot be fixed by simply updating
+                      the CR. It enables the operator to perform specific operations
+                      on the cluster, like changing Aerospike configurations, without
+                      being hindered by these problematic pods. Remember to set MaxIgnorablePods
+                      back to 0 once the required operation is done. This makes sure
+                      that later on, all pods are properly counted when evaluating
+                      the cluster stability.
                     x-kubernetes-int-or-string: true
                   namespaces:
                     description: List of Aerospike namespaces for which rack feature
@@ -13344,19 +13348,23 @@ spec:
                   given configuration
                 nullable: true
                 properties:
-                  maxIgnorableFailedPods:
+                  maxIgnorablePods:
                     anyOf:
                     - type: integer
                     - type: string
-                    description: MaxIgnorableFailedPods is the maximum percentage/number
-                      of rack pods that are in pending state due to scheduling issues.
-                      They are ignored while assessing cluster stability. Failed/pending
-                      pods identified using this value are not considered part of
-                      the cluster. This is particularly useful when there are failed/pending
-                      pods that cannot be recovered by updating the CR and the operator
-                      needs to perform certain operations on the cluster like Aerospike
-                      config change. Reset this value to 0 after the deployment is
-                      done, to avoid unintended consequences.
+                    description: MaxIgnorablePods is the maximum number/percentage
+                      of pending/failed pods in a rack that are ignored while assessing
+                      cluster stability. Pods identified using this value are not
+                      considered part of the cluster. Additionally, in SC mode clusters,
+                      these pods are removed from the roster. This is particularly
+                      useful when some pods are stuck in pending/failed state due
+                      to any scheduling issues and cannot be fixed by simply updating
+                      the CR. It enables the operator to perform specific operations
+                      on the cluster, like changing Aerospike configurations, without
+                      being hindered by these problematic pods. Remember to set MaxIgnorablePods
+                      back to 0 once the required operation is done. This makes sure
+                      that later on, all pods are properly counted when evaluating
+                      the cluster stability.
                     x-kubernetes-int-or-string: true
                   namespaces:
                     description: List of Aerospike namespaces for which rack feature
diff --git a/test/aero_info.go b/test/aero_info.go
index 4156a99a6..e7092e019 100644
--- a/test/aero_info.go
+++ b/test/aero_info.go
@@ -288,6 +288,40 @@ func getNodeList(ctx goctx.Context, k8sClient client.Client) (
 	return nodeList, nil
 }
 
+func cordonNodes(ctx goctx.Context, k8sClient client.Client, nodes []corev1.Node) error {
+	for idx := range nodes {
+		// fetch the latest node object to avoid object conflict
+		if err := k8sClient.Get(ctx, types.NamespacedName{Name: nodes[idx].Name}, &nodes[idx]); err != nil {
+			return err
+		}
+
+		nodes[idx].Spec.Unschedulable = true
+
+		if err := k8sClient.Update(ctx, &nodes[idx]); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func uncordonNodes(ctx goctx.Context, k8sClient client.Client, nodes []corev1.Node) error {
+	for idx := range nodes {
+		// fetch the latest node object to avoid object conflict
+		if err := k8sClient.Get(ctx, types.NamespacedName{Name: nodes[idx].Name}, &nodes[idx]); err != nil {
+			return err
+		}
+
+		nodes[idx].Spec.Unschedulable = false
+
+		if err := k8sClient.Update(ctx, &nodes[idx]); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
 func getZones(ctx goctx.Context, k8sClient client.Client) ([]string, error) {
 	unqZones := map[string]int{}
 
diff --git a/test/cluster_helper.go b/test/cluster_helper.go
index 599b5428a..b1263a8c7 100644
--- a/test/cluster_helper.go
+++ b/test/cluster_helper.go
@@ -4,6 +4,7 @@ import (
 	goctx "context"
 	"errors"
 	"fmt"
+	"reflect"
 	"strconv"
 	"time"
 
@@ -541,6 +542,27 @@ func validateMigrateFillDelay(
 	return err
 }
 
+func validateDirtyVolumes(
+	ctx goctx.Context, k8sClient client.Client,
+	clusterNamespacedName types.NamespacedName, expectedVolumes []string,
+) error {
+	aeroCluster, err := getCluster(k8sClient, ctx, clusterNamespacedName)
+	if err != nil {
+		return err
+	}
+
+	for podName := range aeroCluster.Status.Pods {
+		if !reflect.DeepEqual(aeroCluster.Status.Pods[podName].DirtyVolumes, expectedVolumes) {
+			return fmt.Errorf(
+				"dirtyVolumes mismatch, expected: %v, found %v", expectedVolumes,
+				aeroCluster.Status.Pods[podName].DirtyVolumes,
+			)
+		}
+	}
+
+	return nil
+}
+
 func upgradeClusterTest(
 	k8sClient client.Client, ctx goctx.Context,
 	clusterNamespacedName types.NamespacedName, image string,
diff --git a/test/cluster_test.go b/test/cluster_test.go
index 57ee9e823..174b3523a 100644
--- a/test/cluster_test.go
+++ b/test/cluster_test.go
@@ -3,6 +3,7 @@ package test
 import (
 	goctx "context"
 	"fmt"
+	"time"
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@@ -12,6 +13,7 @@ import (
 	"k8s.io/apimachinery/pkg/util/intstr"
 
 	asdbv1 "github.com/aerospike/aerospike-kubernetes-operator/api/v1"
+	"github.com/aerospike/aerospike-kubernetes-operator/pkg/utils"
 )
 
 var _ = Describe(
@@ -47,8 +49,8 @@ var _ = Describe(
 		// 	},
 		// )
 		Context(
-			"DeployClusterWithIgnorePodList", func() {
-				clusterWithIgnorePodList(ctx)
+			"DeployClusterWithMaxIgnorablePod", func() {
+				clusterWithMaxIgnorablePod(ctx)
 			},
 		)
 		Context(
@@ -130,9 +132,9 @@ func ScaleDownWithMigrateFillDelay(ctx goctx.Context) {
 	)
 }
 
-func clusterWithIgnorePodList(ctx goctx.Context) {
+func clusterWithMaxIgnorablePod(ctx goctx.Context) {
 	Context(
-		"UpdateClusterWithIgnorePodList", func() {
+		"UpdateClusterWithMaxIgnorablePodAndPendingPod", func() {
 			clusterNamespacedName := getNamespacedName(
 				"ignore-pod-cluster", namespace,
 			)
@@ -140,31 +142,80 @@ func clusterWithIgnorePodList(ctx goctx.Context) {
 			var (
 				aeroCluster *asdbv1.AerospikeCluster
 				err         error
+				nodeList    = &v1.NodeList{}
+				podList     = &v1.PodList{}
+				nodeToDrain int
+			)
 
-				testClusterLifecycle = func(ignorePodName string) {
-					By(fmt.Sprintf("Fail %s aerospike pod", ignorePodName))
-					pod := &v1.Pod{}
+			BeforeEach(
+				func() {
+					nodeList, err = getNodeList(ctx, k8sClient)
+					Expect(err).ToNot(HaveOccurred())
+					nodeToDrain = len(nodeList.Items) / 2
+					size := len(nodeList.Items) - nodeToDrain
 
-					err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName,
-						Namespace: clusterNamespacedName.Namespace}, pod)
+					err = cordonNodes(ctx, k8sClient, nodeList.Items[:nodeToDrain])
 					Expect(err).ToNot(HaveOccurred())
 
-					pod.Spec.Containers[0].Image = "wrong-image"
-					err = k8sClient.Update(ctx, pod)
+					aeroCluster = createDummyAerospikeCluster(clusterNamespacedName, int32(size))
+					nsList := aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{})
+					nsList = append(nsList, getNonSCNamespaceConfig("bar", "/test/dev/xvdf1"))
+					aeroCluster.Spec.AerospikeConfig.Value["namespaces"] = nsList
+
+					aeroCluster.Spec.Storage.Volumes = append(aeroCluster.Spec.Storage.Volumes,
+						asdbv1.VolumeSpec{
+							Name: "bar",
+							Source: asdbv1.VolumeSource{
+								PersistentVolume: &asdbv1.PersistentVolumeSpec{
+									Size:         resource.MustParse("1Gi"),
+									StorageClass: storageClass,
+									VolumeMode:   v1.PersistentVolumeBlock,
+								},
+							},
+							Aerospike: &asdbv1.AerospikeServerVolumeAttachment{
+								Path: "/test/dev/xvdf1",
+							},
+						},
+					)
+					racks := getDummyRackConf(1, 2)
+					aeroCluster.Spec.RackConfig = asdbv1.RackConfig{
+						Namespaces: []string{scNamespace}, Racks: racks}
+					aeroCluster.Spec.PodSpec.MultiPodPerHost = false
+					err = deployCluster(k8sClient, ctx, aeroCluster)
 					Expect(err).ToNot(HaveOccurred())
 
-					By("Set IgnorePodList and scale down 1 pod")
-					aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName)
+					// make the node unschedulable and delete the pod to make it pending
+					By(fmt.Sprintf("Drain the node %s", nodeList.Items[nodeToDrain].Name))
+					err = cordonNodes(ctx, k8sClient, []v1.Node{nodeList.Items[nodeToDrain]})
 					Expect(err).ToNot(HaveOccurred())
-					val := intstr.FromInt(1)
-					aeroCluster.Spec.RackConfig.MaxIgnorableFailedPods = &val
-					aeroCluster.Spec.Size--
-					err = updateCluster(k8sClient, ctx, aeroCluster)
+
+					podList, err = getPodList(aeroCluster, k8sClient)
+					Expect(err).ToNot(HaveOccurred())
+					for idx := range podList.Items {
+						if podList.Items[idx].Spec.NodeName == nodeList.Items[nodeToDrain].Name {
+							Expect(k8sClient.Delete(ctx, &podList.Items[idx])).NotTo(HaveOccurred())
+						}
+					}
+				},
+			)
+
+			AfterEach(
+				func() {
+					// Uncordon all nodes
+					err = uncordonNodes(ctx, k8sClient, nodeList.Items)
+					Expect(err).ToNot(HaveOccurred())
+					err = deleteCluster(k8sClient, ctx, aeroCluster)
 					Expect(err).ToNot(HaveOccurred())
+				},
+			)
 
-					By("Rolling restart cluster")
+			It(
+				"Should allow cluster operations with pending pod", func() {
+					By("Set MaxIgnorablePod and Rolling restart cluster")
 					aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName)
 					Expect(err).ToNot(HaveOccurred())
+					val := intstr.FromInt(1)
+					aeroCluster.Spec.RackConfig.MaxIgnorablePods = &val
 					aeroCluster.Spec.AerospikeConfig.Value["service"].(map[string]interface{})["proto-fd-max"] = int64(18000)
 					err = updateCluster(k8sClient, ctx, aeroCluster)
 					Expect(err).ToNot(HaveOccurred())
@@ -177,75 +228,102 @@ func clusterWithIgnorePodList(ctx goctx.Context) {
 					err = updateCluster(k8sClient, ctx, aeroCluster)
 					Expect(err).ToNot(HaveOccurred())
 
-					By("Scale up")
+					By("Verify pending pod")
+					podList, err = getPodList(aeroCluster, k8sClient)
+
+					var counter int
+
+					for idx := range podList.Items {
+						if podList.Items[idx].Status.Phase == v1.PodPending {
+							counter++
+						}
+					}
+					// There should be only one pending pod
+					Expect(counter).To(Equal(1))
+
+					By("Scale down 1 pod")
 					aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName)
 					Expect(err).ToNot(HaveOccurred())
-					aeroCluster.Spec.Size++
+					aeroCluster.Spec.Size--
 					err = updateCluster(k8sClient, ctx, aeroCluster)
 					Expect(err).ToNot(HaveOccurred())
 
-					By(fmt.Sprintf("Verify pod %s is still in failed state", ignorePodName))
-					err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName,
-						Namespace: clusterNamespacedName.Namespace}, pod)
+					By("Verify if all pods are running")
+					podList, err = getPodList(aeroCluster, k8sClient)
 					Expect(err).ToNot(HaveOccurred())
-					Expect(*pod.Status.ContainerStatuses[0].Started).To(BeFalse())
-					Expect(pod.Status.ContainerStatuses[0].Ready).To(BeFalse())
 
-					By(fmt.Sprintf(
-						"Remove pod from IgnorePodList and verify pod %s is in running state", ignorePodName))
+					for idx := range podList.Items {
+						Expect(utils.IsPodRunningAndReady(&podList.Items[idx])).To(BeTrue())
+					}
+				},
+			)
+
+			It(
+				"Should allow namespace addition and removal with pending pod", func() {
+					By("Set MaxIgnorablePod and Rolling restart by removing namespace")
 					aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName)
 					Expect(err).ToNot(HaveOccurred())
-					aeroCluster.Spec.RackConfig.MaxIgnorableFailedPods = nil
+					val := intstr.FromInt(1)
+					aeroCluster.Spec.RackConfig.MaxIgnorablePods = &val
+					nsList := aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{})
+					nsList = nsList[:len(nsList)-1]
+					aeroCluster.Spec.AerospikeConfig.Value["namespaces"] = nsList
 					err = updateCluster(k8sClient, ctx, aeroCluster)
 					Expect(err).ToNot(HaveOccurred())
 
-					err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName,
-						Namespace: clusterNamespacedName.Namespace}, pod)
+					err = validateDirtyVolumes(ctx, k8sClient, clusterNamespacedName, []string{"bar"})
+					Expect(err).ToNot(HaveOccurred())
+
+					By("RollingRestart by re-using previously removed namespace storage")
+					aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName)
 					Expect(err).ToNot(HaveOccurred())
-					Expect(*pod.Status.ContainerStatuses[0].Started).To(BeTrue())
-					Expect(pod.Status.ContainerStatuses[0].Ready).To(BeTrue())
-					Expect(pod.Spec.Containers[0].Image).To(Equal(newImage))
-				}
+					nsList = aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{})
+					nsList = append(nsList, getNonSCNamespaceConfig("bar", "/test/dev/xvdf1"))
+					aeroCluster.Spec.AerospikeConfig.Value["namespaces"] = nsList
+
+					err = updateCluster(k8sClient, ctx, aeroCluster)
+					Expect(err).ToNot(HaveOccurred())
+				},
+			)
+		},
+	)
+
+	Context(
+		"UpdateClusterWithMaxIgnorablePodAndFailedPod", func() {
+			clusterNamespacedName := getNamespacedName(
+				"ignore-pod-cluster", namespace,
+			)
+
+			var (
+				aeroCluster *asdbv1.AerospikeCluster
 			)
 
 			BeforeEach(
 				func() {
 					aeroCluster = createDummyAerospikeCluster(clusterNamespacedName, 4)
+					aeroCluster.Spec.AerospikeConfig = getSCAndNonSCAerospikeConfig()
 					racks := getDummyRackConf(1, 2)
-					aeroCluster.Spec.RackConfig = asdbv1.RackConfig{Racks: racks}
-					err = deployCluster(k8sClient, ctx, aeroCluster)
+					aeroCluster.Spec.RackConfig = asdbv1.RackConfig{
+						Namespaces: []string{scNamespace}, Racks: racks}
+					err := deployCluster(k8sClient, ctx, aeroCluster)
 					Expect(err).ToNot(HaveOccurred())
 				},
 			)
 
 			AfterEach(
 				func() {
-					err = deleteCluster(k8sClient, ctx, aeroCluster)
+					err := deleteCluster(k8sClient, ctx, aeroCluster)
 					Expect(err).ToNot(HaveOccurred())
 				},
 			)
 
-			It(
-				"Should allow cluster operations with random failed pod", func() {
-					// test with failed pod in between statefulset replicas
-					testClusterLifecycle(clusterNamespacedName.Name + "-2-0")
-				},
-			)
-
-			It(
-				"Should allow cluster operations with sequential(last replica) failed pod", func() {
-					// test with last replica of statefulset as failed pod
-					testClusterLifecycle(clusterNamespacedName.Name + "-1-1")
-				},
-			)
-
 			It(
 				"Should allow rack deletion with failed pods in different rack", func() {
 					By("Fail 1-1 aerospike pod")
 					ignorePodName := clusterNamespacedName.Name + "-1-1"
 					pod := &v1.Pod{}
 
-					err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName,
+					err := k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName,
 						Namespace: clusterNamespacedName.Namespace}, pod)
 					Expect(err).ToNot(HaveOccurred())
 
@@ -257,17 +335,24 @@ func clusterWithIgnorePodList(ctx goctx.Context) {
 					aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName)
 					Expect(err).ToNot(HaveOccurred())
 					val := intstr.FromInt(1)
-					aeroCluster.Spec.RackConfig.MaxIgnorableFailedPods = &val
-					aeroCluster.Spec.RackConfig = asdbv1.RackConfig{Racks: getDummyRackConf(1)}
+					aeroCluster.Spec.RackConfig.MaxIgnorablePods = &val
+					aeroCluster.Spec.RackConfig.Racks = getDummyRackConf(1)
 					err = updateCluster(k8sClient, ctx, aeroCluster)
 					Expect(err).ToNot(HaveOccurred())
 
-					By(fmt.Sprintf("Verify pod %s is still in failed state", ignorePodName))
-					err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName,
-						Namespace: clusterNamespacedName.Namespace}, pod)
-					Expect(err).ToNot(HaveOccurred())
-					Expect(*pod.Status.ContainerStatuses[0].Started).To(BeFalse())
-					Expect(pod.Status.ContainerStatuses[0].Ready).To(BeFalse())
+					By(fmt.Sprintf("Verify if failed pod %s is automatically recovered", ignorePodName))
+					Eventually(func() bool {
+						err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName,
+							Namespace: clusterNamespacedName.Namespace}, pod)
+
+						return *pod.Status.ContainerStatuses[0].Started && pod.Status.ContainerStatuses[0].Ready
+					}, 1*time.Minute).Should(BeTrue())
+
+					Eventually(func() error {
+						return InterceptGomegaFailure(func() {
+							validateRoster(k8sClient, ctx, clusterNamespacedName, scNamespace)
+						})
+					}, 4*time.Minute).Should(BeNil())
 				},
 			)
 		},