Skip to content

Commit

Permalink
Added test-cases
Browse files Browse the repository at this point in the history
  • Loading branch information
abhishekdwivedi3060 committed Nov 20, 2023
1 parent 5a4465c commit d58bf24
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 13 deletions.
4 changes: 2 additions & 2 deletions api/v1/aerospikecluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ type AerospikeClusterSpec struct { //nolint:govet // for readability
RosterNodeBlockList []string `json:"rosterNodeBlockList,omitempty"`
// IgnorePodList is the list of pods which are ignored by the operator while checking the cluster stability and
// are not considered part of cluster. This is only useful when there are some failed pods and operator is required
// to do some operation on the cluster. If pods in running state are defined in this list, they are not ignored
// to do some operation on the cluster. If pods in running state are defined in this list, they are not ignored.
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="Ignore Pod List"
IgnorePodList []string `json:"ignorePodList,omitempty"`
}
Expand Down Expand Up @@ -623,7 +623,7 @@ type AerospikeClusterStatusSpec struct { //nolint:govet // for readability
RosterNodeBlockList []string `json:"rosterNodeBlockList,omitempty"`
// IgnorePodList is the list of pods which are ignored by the operator while checking the cluster stability and
// are not considered part of cluster. This is only useful when there are some failed pods and operator is required
// to do some operation on the cluster. If pods in running state are defined in this list, they are not ignored
// to do some operation on the cluster. If pods in running state are defined in this list, they are not ignored.
IgnorePodList []string `json:"ignorePodList,omitempty"`
}

Expand Down
4 changes: 2 additions & 2 deletions config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ spec:
part of cluster. This is only useful when there are some failed
pods and operator is required to do some operation on the cluster.
If pods in running state are defined in this list, they are not
ignored
ignored.
items:
type: string
type: array
Expand Down Expand Up @@ -8902,7 +8902,7 @@ spec:
part of cluster. This is only useful when there are some failed
pods and operator is required to do some operation on the cluster.
If pods in running state are defined in this list, they are not
ignored
ignored.
items:
type: string
type: array
Expand Down
9 changes: 7 additions & 2 deletions controllers/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ func (r *SingleClusterReconciler) cleanupDanglingPodsRack(sts *appsv1.StatefulSe

// getIgnorablePods returns pods:
// 1. From racksToDelete that are currently not running and can be ignored in stability checks.
// 2. User given pods in ignorePodList that are currently not running and can be ignored in stability checks.
// 2. User given pods in ignorePodList that are currently not running and can be ignored from stability checks.
func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack) (
sets.Set[string], error,
) {
Expand Down Expand Up @@ -726,9 +726,14 @@ func (r *SingleClusterReconciler) getClusterPodList() (
return podList, nil
}

func (r *SingleClusterReconciler) isAnyPodInImageFailedState(podList []corev1.Pod) bool {
func (r *SingleClusterReconciler) isAnyPodInImageFailedState(podList []corev1.Pod, ignorablePodNames sets.Set[string],
) bool {
for idx := range podList {
pod := &podList[idx]
if ignorablePodNames.Has(pod.Name) {
continue
}

// TODO: Should we use checkPodFailed or CheckPodImageFailed?
// scaleDown, rollingRestart should work even if node is crashed
// If node was crashed due to wrong config then only rollingRestart can bring it back.
Expand Down
12 changes: 7 additions & 5 deletions controllers/rack.go
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ func (r *SingleClusterReconciler) reconcileRack(
// Scale up after upgrading, so that new pods come up with new image
currentSize = *found.Spec.Replicas
if currentSize < desiredSize {
found, res = r.scaleUpRack(found, rackState)
found, res = r.scaleUpRack(found, rackState, ignorablePodNames)
if !res.isSuccess {
r.Log.Error(
res.err, "Failed to scaleUp StatefulSet pods", "stsName",
Expand Down Expand Up @@ -531,7 +531,9 @@ func (r *SingleClusterReconciler) reconcileRack(
return reconcileSuccess()
}

func (r *SingleClusterReconciler) scaleUpRack(found *appsv1.StatefulSet, rackState *RackState) (
func (r *SingleClusterReconciler) scaleUpRack(
found *appsv1.StatefulSet, rackState *RackState, ignorablePodNames sets.Set[string],
) (
*appsv1.StatefulSet, reconcileResult,
) {
desiredSize := int32(rackState.Size)
Expand All @@ -552,7 +554,7 @@ func (r *SingleClusterReconciler) scaleUpRack(found *appsv1.StatefulSet, rackSta
return found, reconcileError(fmt.Errorf("failed to list pods: %v", err))
}

if r.isAnyPodInImageFailedState(podList.Items) {
if r.isAnyPodInImageFailedState(podList.Items, ignorablePodNames) {
return found, reconcileError(fmt.Errorf("cannot scale up AerospikeCluster. A pod is already in failed state"))
}

Expand Down Expand Up @@ -758,7 +760,7 @@ func (r *SingleClusterReconciler) scaleDownRack(
return found, reconcileError(fmt.Errorf("failed to list pods: %v", err))
}

if r.isAnyPodInImageFailedState(oldPodList.Items) {
if r.isAnyPodInImageFailedState(oldPodList.Items, ignorablePodNames) {
return found, reconcileError(fmt.Errorf("cannot scale down AerospikeCluster. A pod is already in failed state"))
}

Expand Down Expand Up @@ -920,7 +922,7 @@ func (r *SingleClusterReconciler) rollingRestartRack(found *appsv1.StatefulSet,
pods = append(pods, *podList[idx])
}

if len(failedPods) != 0 && r.isAnyPodInImageFailedState(pods) {
if len(failedPods) != 0 && r.isAnyPodInImageFailedState(pods, ignorablePodNames) {
return found, reconcileError(
fmt.Errorf(
"cannot Rolling restart AerospikeCluster. " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ spec:
part of cluster. This is only useful when there are some failed
pods and operator is required to do some operation on the cluster.
If pods in running state are defined in this list, they are not
ignored
ignored.
items:
type: string
type: array
Expand Down Expand Up @@ -8902,7 +8902,7 @@ spec:
part of cluster. This is only useful when there are some failed
pods and operator is required to do some operation on the cluster.
If pods in running state are defined in this list, they are not
ignored
ignored.
items:
type: string
type: array
Expand Down
91 changes: 91 additions & 0 deletions test/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ var _ = Describe(
// DeployClusterWithSyslog(ctx)
// },
// )
Context(
"DeployClusterWithSyslog", func() {
clusterWithIgnorePodList(ctx)
},
)
Context(
"CommonNegativeClusterValidationTest", func() {
NegativeClusterValidationTest(ctx)
Expand Down Expand Up @@ -124,6 +129,92 @@ func ScaleDownWithMigrateFillDelay(ctx goctx.Context) {
)
}

func clusterWithIgnorePodList(ctx goctx.Context) {
Context(
"UpdateClusterWithIgnorePodList", func() {
clusterNamespacedName := getNamespacedName(
"ignore-pod-cluster", namespace,
)

var (
aeroCluster *asdbv1.AerospikeCluster
err error
)

BeforeEach(
func() {
aeroCluster = createDummyAerospikeCluster(clusterNamespacedName, 4)
racks := getDummyRackConf(1, 2)
aeroCluster.Spec.RackConfig = asdbv1.RackConfig{Racks: racks}
err = deployCluster(k8sClient, ctx, aeroCluster)
Expect(err).ToNot(HaveOccurred())
},
)

AfterEach(
func() {
err = deleteCluster(k8sClient, ctx, aeroCluster)
Expect(err).ToNot(HaveOccurred())
},
)

It(
"Should allow cluster operations with failed pods", func() {
By("Fail 2-0 aerospike pod")
pod := &v1.Pod{}
ignorePodName := clusterNamespacedName.Name + "-2-0"

err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName,
Namespace: clusterNamespacedName.Namespace}, pod)
Expect(err).ToNot(HaveOccurred())

// This will lead to pod 2-0 pod in failed state
pod.Spec.Containers[0].Image = "wrong-image"
err = k8sClient.Update(ctx, pod)
Expect(err).ToNot(HaveOccurred())

By("Set IgnorePodList and scale down 1 pod")
aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName)
Expect(err).ToNot(HaveOccurred())
aeroCluster.Spec.IgnorePodList = []string{ignorePodName}
aeroCluster.Spec.Size--
err = updateCluster(k8sClient, ctx, aeroCluster)
Expect(err).ToNot(HaveOccurred())

By("Rolling restart cluster")
aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName)
Expect(err).ToNot(HaveOccurred())
aeroCluster.Spec.AerospikeConfig.Value["service"].(map[string]interface{})["proto-fd-max"] = int64(18000)
err = updateCluster(k8sClient, ctx, aeroCluster)
Expect(err).ToNot(HaveOccurred())

By("Upgrade version")
aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName)
Expect(err).ToNot(HaveOccurred())
aeroCluster.Spec.Image = baseImage + ":6.4.0.4"
err = updateCluster(k8sClient, ctx, aeroCluster)
Expect(err).ToNot(HaveOccurred())

By("Scale up")
aeroCluster, err = getCluster(k8sClient, ctx, clusterNamespacedName)
Expect(err).ToNot(HaveOccurred())
aeroCluster.Spec.IgnorePodList = []string{ignorePodName}
aeroCluster.Spec.Size++
err = updateCluster(k8sClient, ctx, aeroCluster)
Expect(err).ToNot(HaveOccurred())

By("Verify pod 2-0 is still in failed state")
err = k8sClient.Get(ctx, types.NamespacedName{Name: ignorePodName,
Namespace: clusterNamespacedName.Namespace}, pod)
Expect(err).ToNot(HaveOccurred())
Expect(*pod.Status.ContainerStatuses[0].Started).To(BeFalse())
Expect(pod.Status.ContainerStatuses[0].Ready).To(BeFalse())
},
)
},
)
}

// Test cluster deployment with all image post 4.9.0
func DeployClusterForAllImagesPost490(ctx goctx.Context) {
// post 4.9.0, need feature-key file
Expand Down

0 comments on commit d58bf24

Please sign in to comment.