diff --git a/api/v1/aerospikecluster_types.go b/api/v1/aerospikecluster_types.go index 60ef8f55b..9346610f5 100644 --- a/api/v1/aerospikecluster_types.go +++ b/api/v1/aerospikecluster_types.go @@ -277,14 +277,17 @@ type RackConfig struct { //nolint:govet // for readability // RollingUpdateBatchSize is the percentage/number of rack pods that will be restarted simultaneously // +optional RollingUpdateBatchSize *intstr.IntOrString `json:"rollingUpdateBatchSize,omitempty"` - // MaxIgnorableFailedPods is the maximum percentage/number of rack pods that are in pending state due to scheduling - // issues. They are ignored while assessing cluster stability. Failed/pending pods identified using this value are - // not considered part of the cluster. - // This is particularly useful when there are failed/pending pods that cannot be recovered by updating the CR and - // the operator needs to perform certain operations on the cluster like Aerospike config change. - // Reset this value to 0 after the deployment is done, to avoid unintended consequences. + // MaxIgnorablePods is the maximum number/percentage of pending/failed pods in a rack that are ignored while + // assessing cluster stability. Pods identified using this value are not considered part of the cluster. + // Additionally, in SC mode clusters, these pods are removed from the roster. + // This is particularly useful when some pods are stuck in pending/failed state due to any scheduling issues and + // cannot be fixed by simply updating the CR. + // It enables the operator to perform specific operations on the cluster, like changing Aerospike configurations, + // without being hindered by these problematic pods. + // Remember to set MaxIgnorablePods back to 0 once the required operation is done. + // This makes sure that later on, all pods are properly counted when evaluating the cluster stability. // +optional - MaxIgnorableFailedPods *intstr.IntOrString `json:"maxIgnorableFailedPods,omitempty"` + MaxIgnorablePods *intstr.IntOrString `json:"maxIgnorablePods,omitempty"` } // Rack specifies single rack config diff --git a/api/v1/aerospikecluster_validating_webhook.go b/api/v1/aerospikecluster_validating_webhook.go index 71c14f2d6..96d8e9454 100644 --- a/api/v1/aerospikecluster_validating_webhook.go +++ b/api/v1/aerospikecluster_validating_webhook.go @@ -638,10 +638,10 @@ func (c *AerospikeCluster) validateRackConfig(_ logr.Logger) error { } } - // Validate MaxIgnorableFailedPods param - if c.Spec.RackConfig.MaxIgnorableFailedPods != nil { - if err := validateIntOrStringField(c.Spec.RackConfig.MaxIgnorableFailedPods, - "spec.rackConfig.maxIgnorableFailedPods"); err != nil { + // Validate MaxIgnorablePods param + if c.Spec.RackConfig.MaxIgnorablePods != nil { + if err := validateIntOrStringField(c.Spec.RackConfig.MaxIgnorablePods, + "spec.rackConfig.maxIgnorablePods"); err != nil { return err } } diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 76d5c6591..6696bf022 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -848,8 +848,8 @@ func (in *RackConfig) DeepCopyInto(out *RackConfig) { *out = new(intstr.IntOrString) **out = **in } - if in.MaxIgnorableFailedPods != nil { - in, out := &in.MaxIgnorableFailedPods, &out.MaxIgnorableFailedPods + if in.MaxIgnorablePods != nil { + in, out := &in.MaxIgnorablePods, &out.MaxIgnorablePods *out = new(intstr.IntOrString) **out = **in } diff --git a/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml b/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml index ca804bf58..266989bf2 100644 --- a/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml +++ b/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml @@ -4593,19 +4593,23 @@ spec: Aerospike cluster. Pods will be deployed in given racks based on given configuration properties: - maxIgnorableFailedPods: + maxIgnorablePods: anyOf: - type: integer - type: string - description: MaxIgnorableFailedPods is the maximum percentage/number - of rack pods that are in pending state due to scheduling issues. - They are ignored while assessing cluster stability. Failed/pending - pods identified using this value are not considered part of - the cluster. This is particularly useful when there are failed/pending - pods that cannot be recovered by updating the CR and the operator - needs to perform certain operations on the cluster like Aerospike - config change. Reset this value to 0 after the deployment is - done, to avoid unintended consequences. + description: MaxIgnorablePods is the maximum number/percentage + of pending/failed pods in a rack that are ignored while assessing + cluster stability. Pods identified using this value are not + considered part of the cluster. Additionally, in SC mode clusters, + these pods are removed from the roster. This is particularly + useful when some pods are stuck in pending/failed state due + to any scheduling issues and cannot be fixed by simply updating + the CR. It enables the operator to perform specific operations + on the cluster, like changing Aerospike configurations, without + being hindered by these problematic pods. Remember to set MaxIgnorablePods + back to 0 once the required operation is done. This makes sure + that later on, all pods are properly counted when evaluating + the cluster stability. x-kubernetes-int-or-string: true namespaces: description: List of Aerospike namespaces for which rack feature @@ -13344,19 +13348,23 @@ spec: given configuration nullable: true properties: - maxIgnorableFailedPods: + maxIgnorablePods: anyOf: - type: integer - type: string - description: MaxIgnorableFailedPods is the maximum percentage/number - of rack pods that are in pending state due to scheduling issues. - They are ignored while assessing cluster stability. Failed/pending - pods identified using this value are not considered part of - the cluster. This is particularly useful when there are failed/pending - pods that cannot be recovered by updating the CR and the operator - needs to perform certain operations on the cluster like Aerospike - config change. Reset this value to 0 after the deployment is - done, to avoid unintended consequences. + description: MaxIgnorablePods is the maximum number/percentage + of pending/failed pods in a rack that are ignored while assessing + cluster stability. Pods identified using this value are not + considered part of the cluster. Additionally, in SC mode clusters, + these pods are removed from the roster. This is particularly + useful when some pods are stuck in pending/failed state due + to any scheduling issues and cannot be fixed by simply updating + the CR. It enables the operator to perform specific operations + on the cluster, like changing Aerospike configurations, without + being hindered by these problematic pods. Remember to set MaxIgnorablePods + back to 0 once the required operation is done. This makes sure + that later on, all pods are properly counted when evaluating + the cluster stability. x-kubernetes-int-or-string: true namespaces: description: List of Aerospike namespaces for which rack feature diff --git a/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml b/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml index 1a03a8841..ed26895a5 100644 --- a/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml @@ -47,13 +47,6 @@ spec: the Aerospike cluster. displayName: Aerospike Network Policy path: aerospikeNetworkPolicy - - description: IgnorePodList is a list of pods that the operator will ignore - while assessing cluster stability. Pods specified in this list are not considered - part of the cluster. This is particularly useful when there are failed pods - and the operator needs to perform certain operations on the cluster. Note - that running pods included in this list will not be ignored. - displayName: Ignore Pod List - path: ignorePodList - description: Aerospike server image displayName: Server Image path: image diff --git a/controllers/pod.go b/controllers/pod.go index 4a64fb304..3fb2f840e 100644 --- a/controllers/pod.go +++ b/controllers/pod.go @@ -650,7 +650,7 @@ func (r *SingleClusterReconciler) cleanupDanglingPodsRack(sts *appsv1.StatefulSe // getIgnorablePods returns pods: // 1. From racksToDelete that are currently not running and can be ignored in stability checks. -// 2. User given pods in ignorePodList that are currently not running and can be ignored from stability checks. +// 2. Failed/pending pods identified using maxIgnorablePods field and can be ignored from stability checks. func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack, configureRacks []RackState) ( sets.Set[string], error, ) { @@ -674,7 +674,7 @@ func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack, rack := &configureRacks[idx] failedAllowed, _ := intstr.GetScaledValueFromIntOrPercent( - r.aeroCluster.Spec.RackConfig.MaxIgnorableFailedPods, rack.Size, false, + r.aeroCluster.Spec.RackConfig.MaxIgnorablePods, rack.Size, false, ) podList, err := r.getRackPodList(rack.Rack.ID) diff --git a/controllers/rack.go b/controllers/rack.go index d2b6d5703..e03029ec0 100644 --- a/controllers/rack.go +++ b/controllers/rack.go @@ -419,27 +419,28 @@ func (r *SingleClusterReconciler) upgradeOrRollingRestartRack(found *appsv1.Stat } } - podList, err := r.getOrderedRackPodList(rackState.Rack.ID) - if err != nil { - return found, reconcileError(fmt.Errorf("failed to list pods: %v", err)) - } - - // Filter ignoredPods to update their dirtyVolumes in the status. - // IgnoredPods are skipped from upgrade/rolling restart, and as a result in case of device removal, dirtyVolumes - // are not updated in their pod status. This makes devices un-reusable as they cannot be cleaned up during init phase. - // So, explicitly add dirtyVolumes for ignoredPods, so that they can be cleaned in the init phase. - var ignoredPod []*corev1.Pod + if r.aeroCluster.Spec.RackConfig.MaxIgnorablePods != nil { + podList, err := r.getOrderedRackPodList(rackState.Rack.ID) + if err != nil { + return found, reconcileError(fmt.Errorf("failed to list pods: %v", err)) + } + // Filter ignoredPods to update their dirtyVolumes in the status. + // IgnoredPods are skipped from upgrade/rolling restart, and as a result in case of device removal, dirtyVolumes + // are not updated in their pod status. This makes devices un-reusable as they cannot be cleaned up during init phase. + // So, explicitly add dirtyVolumes for ignoredPods, so that they can be cleaned in the init phase. + var ignoredPod []*corev1.Pod - for idx := range podList { - pod := podList[idx] - if ignorablePodNames.Has(pod.Name) { - ignoredPod = append(ignoredPod, pod) + for idx := range podList { + pod := podList[idx] + if ignorablePodNames.Has(pod.Name) { + ignoredPod = append(ignoredPod, pod) + } } - } - if len(ignoredPod) > 0 { - if err := r.handleNSOrDeviceRemoval(rackState, ignoredPod); err != nil { - return found, reconcileError(err) + if len(ignoredPod) > 0 { + if err := r.handleNSOrDeviceRemoval(rackState, ignoredPod); err != nil { + return found, reconcileError(err) + } } } diff --git a/controllers/reconciler.go b/controllers/reconciler.go index bc8fbf3fd..4b1df97a9 100644 --- a/controllers/reconciler.go +++ b/controllers/reconciler.go @@ -216,35 +216,39 @@ func (r *SingleClusterReconciler) Reconcile() (ctrl.Result, error) { return reconcile.Result{}, err } - podList, gErr := r.getClusterPodList() - if gErr != nil { - r.Log.Error(gErr, "Failed to get cluster pod list") - return reconcile.Result{}, gErr - } + // Try to recover pods only when MaxIgnorablePods is set + if r.aeroCluster.Spec.RackConfig.MaxIgnorablePods != nil { + podList, gErr := r.getClusterPodList() + if gErr != nil { + r.Log.Error(gErr, "Failed to get cluster pod list") + return reconcile.Result{}, gErr + } - r.Log.Info("Try to recover failed/pending pods if any") + r.Log.Info("Try to recover failed/pending pods if any") - var anyPodFailed bool - // Try to recover failed/pending pods by deleting them - for idx := range podList.Items { - if cErr := utils.CheckPodFailed(&podList.Items[idx]); cErr != nil { - anyPodFailed = true + var anyPodFailed bool + // Try to recover failed/pending pods by deleting them + for idx := range podList.Items { + if cErr := utils.CheckPodFailed(&podList.Items[idx]); cErr != nil { + anyPodFailed = true - if err := r.createOrUpdatePodServiceIfNeeded([]string{podList.Items[idx].Name}); err != nil { - return reconcile.Result{}, err - } + if err := r.createOrUpdatePodServiceIfNeeded([]string{podList.Items[idx].Name}); err != nil { + return reconcile.Result{}, err + } - if err := r.Client.Delete(context.TODO(), &podList.Items[idx]); err != nil { - r.Log.Error(err, "Failed to delete pod", "pod", podList.Items[idx].Name) - return reconcile.Result{}, err - } + if err := r.Client.Delete(context.TODO(), &podList.Items[idx]); err != nil { + r.Log.Error(err, "Failed to delete pod", "pod", podList.Items[idx].Name) + return reconcile.Result{}, err + } - r.Log.Info("Deleted pod", "pod", podList.Items[idx].Name) + r.Log.Info("Deleted pod", "pod", podList.Items[idx].Name) + } } - } - if anyPodFailed { - return reconcile.Result{Requeue: true}, nil + if anyPodFailed { + r.Log.Info("Found failed/pending pod(s), requeuing") + return reconcile.Result{Requeue: true}, nil + } } r.Log.Info("Reconcile completed successfully") diff --git a/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml b/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml index ca804bf58..266989bf2 100644 --- a/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml +++ b/helm-charts/aerospike-kubernetes-operator/crds/customresourcedefinition_aerospikeclusters.asdb.aerospike.com.yaml @@ -4593,19 +4593,23 @@ spec: Aerospike cluster. Pods will be deployed in given racks based on given configuration properties: - maxIgnorableFailedPods: + maxIgnorablePods: anyOf: - type: integer - type: string - description: MaxIgnorableFailedPods is the maximum percentage/number - of rack pods that are in pending state due to scheduling issues. - They are ignored while assessing cluster stability. Failed/pending - pods identified using this value are not considered part of - the cluster. This is particularly useful when there are failed/pending - pods that cannot be recovered by updating the CR and the operator - needs to perform certain operations on the cluster like Aerospike - config change. Reset this value to 0 after the deployment is - done, to avoid unintended consequences. + description: MaxIgnorablePods is the maximum number/percentage + of pending/failed pods in a rack that are ignored while assessing + cluster stability. Pods identified using this value are not + considered part of the cluster. Additionally, in SC mode clusters, + these pods are removed from the roster. This is particularly + useful when some pods are stuck in pending/failed state due + to any scheduling issues and cannot be fixed by simply updating + the CR. It enables the operator to perform specific operations + on the cluster, like changing Aerospike configurations, without + being hindered by these problematic pods. Remember to set MaxIgnorablePods + back to 0 once the required operation is done. This makes sure + that later on, all pods are properly counted when evaluating + the cluster stability. x-kubernetes-int-or-string: true namespaces: description: List of Aerospike namespaces for which rack feature @@ -13344,19 +13348,23 @@ spec: given configuration nullable: true properties: - maxIgnorableFailedPods: + maxIgnorablePods: anyOf: - type: integer - type: string - description: MaxIgnorableFailedPods is the maximum percentage/number - of rack pods that are in pending state due to scheduling issues. - They are ignored while assessing cluster stability. Failed/pending - pods identified using this value are not considered part of - the cluster. This is particularly useful when there are failed/pending - pods that cannot be recovered by updating the CR and the operator - needs to perform certain operations on the cluster like Aerospike - config change. Reset this value to 0 after the deployment is - done, to avoid unintended consequences. + description: MaxIgnorablePods is the maximum number/percentage + of pending/failed pods in a rack that are ignored while assessing + cluster stability. Pods identified using this value are not + considered part of the cluster. Additionally, in SC mode clusters, + these pods are removed from the roster. This is particularly + useful when some pods are stuck in pending/failed state due + to any scheduling issues and cannot be fixed by simply updating + the CR. It enables the operator to perform specific operations + on the cluster, like changing Aerospike configurations, without + being hindered by these problematic pods. Remember to set MaxIgnorablePods + back to 0 once the required operation is done. This makes sure + that later on, all pods are properly counted when evaluating + the cluster stability. x-kubernetes-int-or-string: true namespaces: description: List of Aerospike namespaces for which rack feature diff --git a/test/aero_info.go b/test/aero_info.go index 4156a99a6..e7092e019 100644 --- a/test/aero_info.go +++ b/test/aero_info.go @@ -288,6 +288,40 @@ func getNodeList(ctx goctx.Context, k8sClient client.Client) ( return nodeList, nil } +func cordonNodes(ctx goctx.Context, k8sClient client.Client, nodes []corev1.Node) error { + for idx := range nodes { + // fetch the latest node object to avoid object conflict + if err := k8sClient.Get(ctx, types.NamespacedName{Name: nodes[idx].Name}, &nodes[idx]); err != nil { + return err + } + + nodes[idx].Spec.Unschedulable = true + + if err := k8sClient.Update(ctx, &nodes[idx]); err != nil { + return err + } + } + + return nil +} + +func uncordonNodes(ctx goctx.Context, k8sClient client.Client, nodes []corev1.Node) error { + for idx := range nodes { + // fetch the latest node object to avoid object conflict + if err := k8sClient.Get(ctx, types.NamespacedName{Name: nodes[idx].Name}, &nodes[idx]); err != nil { + return err + } + + nodes[idx].Spec.Unschedulable = false + + if err := k8sClient.Update(ctx, &nodes[idx]); err != nil { + return err + } + } + + return nil +} + func getZones(ctx goctx.Context, k8sClient client.Client) ([]string, error) { unqZones := map[string]int{} diff --git a/test/cluster_helper.go b/test/cluster_helper.go index 599b5428a..b1263a8c7 100644 --- a/test/cluster_helper.go +++ b/test/cluster_helper.go @@ -4,6 +4,7 @@ import ( goctx "context" "errors" "fmt" + "reflect" "strconv" "time" @@ -541,6 +542,27 @@ func validateMigrateFillDelay( return err } +func validateDirtyVolumes( + ctx goctx.Context, k8sClient client.Client, + clusterNamespacedName types.NamespacedName, expectedVolumes []string, +) error { + aeroCluster, err := getCluster(k8sClient, ctx, clusterNamespacedName) + if err != nil { + return err + } + + for podName := range aeroCluster.Status.Pods { + if !reflect.DeepEqual(aeroCluster.Status.Pods[podName].DirtyVolumes, expectedVolumes) { + return fmt.Errorf( + "dirtyVolumes mismatch, expected: %v, found %v", expectedVolumes, + aeroCluster.Status.Pods[podName].DirtyVolumes, + ) + } + } + + return nil +} + func upgradeClusterTest( k8sClient client.Client, ctx goctx.Context, clusterNamespacedName types.NamespacedName, image string, diff --git a/test/cluster_test.go b/test/cluster_test.go index 57ee9e823..174b3523a 100644 --- a/test/cluster_test.go +++ b/test/cluster_test.go @@ -3,6 +3,7 @@ package test import ( goctx "context" "fmt" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -12,6 +13,7 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" asdbv1 "github.com/aerospike/aerospike-kubernetes-operator/api/v1" + "github.com/aerospike/aerospike-kubernetes-operator/pkg/utils" ) var _ = Describe( @@ -47,8 +49,8 @@ var _ = Describe( // }, // ) Context( - "DeployClusterWithIgnorePodList", func() { - clusterWithIgnorePodList(ctx) + "DeployClusterWithMaxIgnorablePod", func() { + clusterWithMaxIgnorablePod(ctx) }, ) Context( @@ -130,9 +132,9 @@ func ScaleDownWithMigrateFillDelay(ctx goctx.Context) { ) } -func clusterWithIgnorePodList(ctx goctx.Context) { +func clusterWithMaxIgnorablePod(ctx goctx.Context) { Context( - "UpdateClusterWithIgnorePodList", func() { + "UpdateClusterWithMaxIgnorablePodAndPendingPod", func() { clusterNamespacedName := getNamespacedName( "ignore-pod-cluster", namespace, ) @@ -140,31 +142,80 @@ func clusterWithIgnorePodList(ctx goctx.Context) { var ( aeroCluster *asdbv1.AerospikeCluster err error + nodeList = &v1.NodeList{} + podList = &v1.PodList{} + nodeToDrain int + ) - testClusterLifecycle = func(ignorePodName string) { - By(fmt.Sprintf("Fail %s aerospike pod", ignorePodName)) - pod := &v1.Pod{} + BeforeEach( + func() { + nodeList, err = getNodeList(ctx, k8sClient) + Expect(err).ToNot(HaveOccurred()) + nodeToDrain = len(nodeList.Items) / 2 + size := len(nodeList.Items) - nodeToDrain - err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName, - Namespace: clusterNamespacedName.Namespace}, pod) + err = cordonNodes(ctx, k8sClient, nodeList.Items[:nodeToDrain]) Expect(err).ToNot(HaveOccurred()) - pod.Spec.Containers[0].Image = "wrong-image" - err = k8sClient.Update(ctx, pod) + aeroCluster = createDummyAerospikeCluster(clusterNamespacedName, int32(size)) + nsList := aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{}) + nsList = append(nsList, getNonSCNamespaceConfig("bar", "/test/dev/xvdf1")) + aeroCluster.Spec.AerospikeConfig.Value["namespaces"] = nsList + + aeroCluster.Spec.Storage.Volumes = append(aeroCluster.Spec.Storage.Volumes, + asdbv1.VolumeSpec{ + Name: "bar", + Source: asdbv1.VolumeSource{ + PersistentVolume: &asdbv1.PersistentVolumeSpec{ + Size: resource.MustParse("1Gi"), + StorageClass: storageClass, + VolumeMode: v1.PersistentVolumeBlock, + }, + }, + Aerospike: &asdbv1.AerospikeServerVolumeAttachment{ + Path: "/test/dev/xvdf1", + }, + }, + ) + racks := getDummyRackConf(1, 2) + aeroCluster.Spec.RackConfig = asdbv1.RackConfig{ + Namespaces: []string{scNamespace}, Racks: racks} + aeroCluster.Spec.PodSpec.MultiPodPerHost = false + err = deployCluster(k8sClient, ctx, aeroCluster) Expect(err).ToNot(HaveOccurred()) - By("Set IgnorePodList and scale down 1 pod") - aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName) + // make the node unschedulable and delete the pod to make it pending + By(fmt.Sprintf("Drain the node %s", nodeList.Items[nodeToDrain].Name)) + err = cordonNodes(ctx, k8sClient, []v1.Node{nodeList.Items[nodeToDrain]}) Expect(err).ToNot(HaveOccurred()) - val := intstr.FromInt(1) - aeroCluster.Spec.RackConfig.MaxIgnorableFailedPods = &val - aeroCluster.Spec.Size-- - err = updateCluster(k8sClient, ctx, aeroCluster) + + podList, err = getPodList(aeroCluster, k8sClient) + Expect(err).ToNot(HaveOccurred()) + for idx := range podList.Items { + if podList.Items[idx].Spec.NodeName == nodeList.Items[nodeToDrain].Name { + Expect(k8sClient.Delete(ctx, &podList.Items[idx])).NotTo(HaveOccurred()) + } + } + }, + ) + + AfterEach( + func() { + // Uncordon all nodes + err = uncordonNodes(ctx, k8sClient, nodeList.Items) + Expect(err).ToNot(HaveOccurred()) + err = deleteCluster(k8sClient, ctx, aeroCluster) Expect(err).ToNot(HaveOccurred()) + }, + ) - By("Rolling restart cluster") + It( + "Should allow cluster operations with pending pod", func() { + By("Set MaxIgnorablePod and Rolling restart cluster") aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName) Expect(err).ToNot(HaveOccurred()) + val := intstr.FromInt(1) + aeroCluster.Spec.RackConfig.MaxIgnorablePods = &val aeroCluster.Spec.AerospikeConfig.Value["service"].(map[string]interface{})["proto-fd-max"] = int64(18000) err = updateCluster(k8sClient, ctx, aeroCluster) Expect(err).ToNot(HaveOccurred()) @@ -177,75 +228,102 @@ func clusterWithIgnorePodList(ctx goctx.Context) { err = updateCluster(k8sClient, ctx, aeroCluster) Expect(err).ToNot(HaveOccurred()) - By("Scale up") + By("Verify pending pod") + podList, err = getPodList(aeroCluster, k8sClient) + + var counter int + + for idx := range podList.Items { + if podList.Items[idx].Status.Phase == v1.PodPending { + counter++ + } + } + // There should be only one pending pod + Expect(counter).To(Equal(1)) + + By("Scale down 1 pod") aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName) Expect(err).ToNot(HaveOccurred()) - aeroCluster.Spec.Size++ + aeroCluster.Spec.Size-- err = updateCluster(k8sClient, ctx, aeroCluster) Expect(err).ToNot(HaveOccurred()) - By(fmt.Sprintf("Verify pod %s is still in failed state", ignorePodName)) - err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName, - Namespace: clusterNamespacedName.Namespace}, pod) + By("Verify if all pods are running") + podList, err = getPodList(aeroCluster, k8sClient) Expect(err).ToNot(HaveOccurred()) - Expect(*pod.Status.ContainerStatuses[0].Started).To(BeFalse()) - Expect(pod.Status.ContainerStatuses[0].Ready).To(BeFalse()) - By(fmt.Sprintf( - "Remove pod from IgnorePodList and verify pod %s is in running state", ignorePodName)) + for idx := range podList.Items { + Expect(utils.IsPodRunningAndReady(&podList.Items[idx])).To(BeTrue()) + } + }, + ) + + It( + "Should allow namespace addition and removal with pending pod", func() { + By("Set MaxIgnorablePod and Rolling restart by removing namespace") aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName) Expect(err).ToNot(HaveOccurred()) - aeroCluster.Spec.RackConfig.MaxIgnorableFailedPods = nil + val := intstr.FromInt(1) + aeroCluster.Spec.RackConfig.MaxIgnorablePods = &val + nsList := aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{}) + nsList = nsList[:len(nsList)-1] + aeroCluster.Spec.AerospikeConfig.Value["namespaces"] = nsList err = updateCluster(k8sClient, ctx, aeroCluster) Expect(err).ToNot(HaveOccurred()) - err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName, - Namespace: clusterNamespacedName.Namespace}, pod) + err = validateDirtyVolumes(ctx, k8sClient, clusterNamespacedName, []string{"bar"}) + Expect(err).ToNot(HaveOccurred()) + + By("RollingRestart by re-using previously removed namespace storage") + aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName) Expect(err).ToNot(HaveOccurred()) - Expect(*pod.Status.ContainerStatuses[0].Started).To(BeTrue()) - Expect(pod.Status.ContainerStatuses[0].Ready).To(BeTrue()) - Expect(pod.Spec.Containers[0].Image).To(Equal(newImage)) - } + nsList = aeroCluster.Spec.AerospikeConfig.Value["namespaces"].([]interface{}) + nsList = append(nsList, getNonSCNamespaceConfig("bar", "/test/dev/xvdf1")) + aeroCluster.Spec.AerospikeConfig.Value["namespaces"] = nsList + + err = updateCluster(k8sClient, ctx, aeroCluster) + Expect(err).ToNot(HaveOccurred()) + }, + ) + }, + ) + + Context( + "UpdateClusterWithMaxIgnorablePodAndFailedPod", func() { + clusterNamespacedName := getNamespacedName( + "ignore-pod-cluster", namespace, + ) + + var ( + aeroCluster *asdbv1.AerospikeCluster ) BeforeEach( func() { aeroCluster = createDummyAerospikeCluster(clusterNamespacedName, 4) + aeroCluster.Spec.AerospikeConfig = getSCAndNonSCAerospikeConfig() racks := getDummyRackConf(1, 2) - aeroCluster.Spec.RackConfig = asdbv1.RackConfig{Racks: racks} - err = deployCluster(k8sClient, ctx, aeroCluster) + aeroCluster.Spec.RackConfig = asdbv1.RackConfig{ + Namespaces: []string{scNamespace}, Racks: racks} + err := deployCluster(k8sClient, ctx, aeroCluster) Expect(err).ToNot(HaveOccurred()) }, ) AfterEach( func() { - err = deleteCluster(k8sClient, ctx, aeroCluster) + err := deleteCluster(k8sClient, ctx, aeroCluster) Expect(err).ToNot(HaveOccurred()) }, ) - It( - "Should allow cluster operations with random failed pod", func() { - // test with failed pod in between statefulset replicas - testClusterLifecycle(clusterNamespacedName.Name + "-2-0") - }, - ) - - It( - "Should allow cluster operations with sequential(last replica) failed pod", func() { - // test with last replica of statefulset as failed pod - testClusterLifecycle(clusterNamespacedName.Name + "-1-1") - }, - ) - It( "Should allow rack deletion with failed pods in different rack", func() { By("Fail 1-1 aerospike pod") ignorePodName := clusterNamespacedName.Name + "-1-1" pod := &v1.Pod{} - err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName, + err := k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName, Namespace: clusterNamespacedName.Namespace}, pod) Expect(err).ToNot(HaveOccurred()) @@ -257,17 +335,24 @@ func clusterWithIgnorePodList(ctx goctx.Context) { aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName) Expect(err).ToNot(HaveOccurred()) val := intstr.FromInt(1) - aeroCluster.Spec.RackConfig.MaxIgnorableFailedPods = &val - aeroCluster.Spec.RackConfig = asdbv1.RackConfig{Racks: getDummyRackConf(1)} + aeroCluster.Spec.RackConfig.MaxIgnorablePods = &val + aeroCluster.Spec.RackConfig.Racks = getDummyRackConf(1) err = updateCluster(k8sClient, ctx, aeroCluster) Expect(err).ToNot(HaveOccurred()) - By(fmt.Sprintf("Verify pod %s is still in failed state", ignorePodName)) - err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName, - Namespace: clusterNamespacedName.Namespace}, pod) - Expect(err).ToNot(HaveOccurred()) - Expect(*pod.Status.ContainerStatuses[0].Started).To(BeFalse()) - Expect(pod.Status.ContainerStatuses[0].Ready).To(BeFalse()) + By(fmt.Sprintf("Verify if failed pod %s is automatically recovered", ignorePodName)) + Eventually(func() bool { + err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName, + Namespace: clusterNamespacedName.Namespace}, pod) + + return *pod.Status.ContainerStatuses[0].Started && pod.Status.ContainerStatuses[0].Ready + }, 1*time.Minute).Should(BeTrue()) + + Eventually(func() error { + return InterceptGomegaFailure(func() { + validateRoster(k8sClient, ctx, clusterNamespacedName, scNamespace) + }) + }, 4*time.Minute).Should(BeNil()) }, ) },