diff --git a/api/v1/aerospikecluster_types.go b/api/v1/aerospikecluster_types.go index bcbb7cd37..156ed52e5 100644 --- a/api/v1/aerospikecluster_types.go +++ b/api/v1/aerospikecluster_types.go @@ -283,6 +283,8 @@ type RackConfig struct { //nolint:govet // for readability // RollingUpdateBatchSize is the percentage/number of rack pods that will be restarted simultaneously // +optional RollingUpdateBatchSize *intstr.IntOrString `json:"rollingUpdateBatchSize,omitempty"` + + MaxUnavailable int `json:"maxUnavailable,omitempty"` } // Rack specifies single rack config diff --git a/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml b/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml index 6d64a64b9..e343dd23b 100644 --- a/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml +++ b/config/crd/bases/asdb.aerospike.com_aerospikeclusters.yaml @@ -4603,6 +4603,8 @@ spec: Aerospike cluster. Pods will be deployed in given racks based on given configuration properties: + maxUnavailable: + type: integer namespaces: description: List of Aerospike namespaces for which rack feature will be enabled @@ -13350,6 +13352,8 @@ spec: given configuration nullable: true properties: + maxUnavailable: + type: integer namespaces: description: List of Aerospike namespaces for which rack feature will be enabled diff --git a/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml b/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml index 2b8c3c42e..1a03a8841 100644 --- a/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/aerospike-kubernetes-operator.clusterserviceversion.yaml @@ -47,11 +47,11 @@ spec: the Aerospike cluster. displayName: Aerospike Network Policy path: aerospikeNetworkPolicy - - description: IgnorePodList is the list of pods which are ignored by the operator - while checking the cluster stability and are not considered part of cluster. - This is only useful when there are some failed pods and operator is required - to do some operation on the cluster. If pods in running state are defined - in this list, they are not ignored. + - description: IgnorePodList is a list of pods that the operator will ignore + while assessing cluster stability. Pods specified in this list are not considered + part of the cluster. This is particularly useful when there are failed pods + and the operator needs to perform certain operations on the cluster. Note + that running pods included in this list will not be ignored. displayName: Ignore Pod List path: ignorePodList - description: Aerospike server image diff --git a/controllers/pod.go b/controllers/pod.go index 0cd28b466..1efd76345 100644 --- a/controllers/pod.go +++ b/controllers/pod.go @@ -650,7 +650,7 @@ func (r *SingleClusterReconciler) cleanupDanglingPodsRack(sts *appsv1.StatefulSe // getIgnorablePods returns pods: // 1. From racksToDelete that are currently not running and can be ignored in stability checks. // 2. User given pods in ignorePodList that are currently not running and can be ignored from stability checks. -func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack) ( +func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack, configureRacks []RackState) ( sets.Set[string], error, ) { ignorablePodNames := sets.Set[string]{} @@ -669,17 +669,21 @@ func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack) } } - podList, err := r.getClusterPodList() - if err != nil { - return nil, err - } + for idx := range configureRacks { + rack := &configureRacks[idx] + failedAllowed := r.aeroCluster.Spec.RackConfig.MaxUnavailable - userIgnorePodSet := sets.NewString(r.aeroCluster.Spec.IgnorePodList...) + podList, err := r.getRackPodList(rack.Rack.ID) + if err != nil { + return nil, err + } - for podIdx := range podList.Items { - pod := &podList.Items[podIdx] - if userIgnorePodSet.Has(pod.Name) && !utils.IsPodRunningAndReady(pod) { - ignorablePodNames.Insert(pod.Name) + for podIdx := range podList.Items { + pod := &podList.Items[podIdx] + if !utils.IsPodRunningAndReady(pod) && failedAllowed > 0 { + ignorablePodNames.Insert(pod.Name) + failedAllowed-- + } } } diff --git a/controllers/rack.go b/controllers/rack.go index 263a4e380..aa6f0670e 100644 --- a/controllers/rack.go +++ b/controllers/rack.go @@ -45,7 +45,7 @@ func (r *SingleClusterReconciler) reconcileRacks() reconcileResult { rackIDsToDelete = append(rackIDsToDelete, racksToDelete[idx].ID) } - ignorablePodNames, err := r.getIgnorablePods(racksToDelete) + ignorablePodNames, err := r.getIgnorablePods(racksToDelete, rackStateList) if err != nil { return reconcileError(err) } diff --git a/controllers/reconciler.go b/controllers/reconciler.go index 796147f1a..7d97e0437 100644 --- a/controllers/reconciler.go +++ b/controllers/reconciler.go @@ -124,7 +124,7 @@ func (r *SingleClusterReconciler) Reconcile() (ctrl.Result, error) { return reconcile.Result{}, err } - ignorablePodNames, err := r.getIgnorablePods(nil) + ignorablePodNames, err := r.getIgnorablePods(nil, getConfiguredRackStateList(r.aeroCluster)) if err != nil { r.Log.Error(err, "Failed to determine pods to be ignored")