Skip to content

Commit

Permalink
Fixed tests
Browse files Browse the repository at this point in the history
  • Loading branch information
sud82 committed Dec 2, 2023
1 parent ba63462 commit 188013c
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 156 deletions.
8 changes: 4 additions & 4 deletions controllers/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -650,8 +650,8 @@ func (r *SingleClusterReconciler) cleanupDanglingPodsRack(sts *appsv1.StatefulSe

// getIgnorablePods returns pods:
// 1. From racksToDelete that are currently not running and can be ignored in stability checks.
// 2. Failed/pending pods identified using maxIgnorablePods field and can be ignored from stability checks.
func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack, configureRacks []RackState) (
// 2. Failed/pending pods from the configuredRacks identified using maxIgnorablePods field and can be ignored from stability checks.

Check failure on line 653 in controllers/pod.go

View workflow job for this annotation

GitHub Actions / lint

line is 132 characters (lll)
func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack, configuredRacks []RackState) (
sets.Set[string], error,
) {
ignorablePodNames := sets.Set[string]{}
Expand All @@ -670,8 +670,8 @@ func (r *SingleClusterReconciler) getIgnorablePods(racksToDelete []asdbv1.Rack,
}
}

for idx := range configureRacks {
rack := &configureRacks[idx]
for idx := range configuredRacks {
rack := &configuredRacks[idx]

failedAllowed, _ := intstr.GetScaledValueFromIntOrPercent(
r.aeroCluster.Spec.RackConfig.MaxIgnorablePods, rack.Size, false,
Expand Down
43 changes: 28 additions & 15 deletions controllers/rack.go
Original file line number Diff line number Diff line change
Expand Up @@ -420,31 +420,44 @@ func (r *SingleClusterReconciler) upgradeOrRollingRestartRack(found *appsv1.Stat
}

if r.aeroCluster.Spec.RackConfig.MaxIgnorablePods != nil {
podList, err := r.getOrderedRackPodList(rackState.Rack.ID)
if err != nil {
return found, reconcileError(fmt.Errorf("failed to list pods: %v", err))
if res := r.handleNSOrDeviceRemovalForIgnorablePods(rackState, ignorablePodNames); !res.isSuccess {
return found, res
}
// Filter ignoredPods to update their dirtyVolumes in the status.
// IgnoredPods are skipped from upgrade/rolling restart, and as a result in case of device removal, dirtyVolumes
// are not updated in their pod status. This makes devices un-reusable as they cannot be cleaned up during init phase.
// So, explicitly add dirtyVolumes for ignoredPods, so that they can be cleaned in the init phase.
var ignoredPod []*corev1.Pod
}

for idx := range podList {
pod := podList[idx]
return found, reconcileSuccess()
}

func (r *SingleClusterReconciler) handleNSOrDeviceRemovalForIgnorablePods(
rackState *RackState, ignorablePodNames sets.Set[string],
) reconcileResult {
podList, err := r.getOrderedRackPodList(rackState.Rack.ID)
if err != nil {
return reconcileError(fmt.Errorf("failed to list pods: %v", err))
}
// Filter ignoredPods to update their dirtyVolumes in the status.
// IgnoredPods are skipped from upgrade/rolling restart, and as a result in case of device removal, dirtyVolumes
// are not updated in their pod status. This makes devices un-reusable as they cannot be cleaned up during init phase.
// So, explicitly add dirtyVolumes for ignoredPods, so that they can be cleaned in the init phase.
var ignoredPod []*corev1.Pod

for idx := range podList {
pod := podList[idx]
// Pods, that are not in status are not even initialized, so no need to update dirtyVolumes.
if _, ok := r.aeroCluster.Status.Pods[pod.Name]; ok {
if ignorablePodNames.Has(pod.Name) {
ignoredPod = append(ignoredPod, pod)
}
}
}

if len(ignoredPod) > 0 {
if err := r.handleNSOrDeviceRemoval(rackState, ignoredPod); err != nil {
return found, reconcileError(err)
}
if len(ignoredPod) > 0 {
if err := r.handleNSOrDeviceRemoval(rackState, ignoredPod); err != nil {
return reconcileError(err)
}
}

return found, reconcileSuccess()
return reconcileSuccess()
}

func (r *SingleClusterReconciler) reconcileRack(
Expand Down
56 changes: 32 additions & 24 deletions controllers/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,42 +218,50 @@ func (r *SingleClusterReconciler) Reconcile() (ctrl.Result, error) {

// Try to recover pods only when MaxIgnorablePods is set
if r.aeroCluster.Spec.RackConfig.MaxIgnorablePods != nil {
podList, gErr := r.getClusterPodList()
if gErr != nil {
r.Log.Error(gErr, "Failed to get cluster pod list")
return reconcile.Result{}, gErr
if res := r.recoverIgnorablePods(); !res.isSuccess {
return res.getResult()
}
}

r.Log.Info("Try to recover failed/pending pods if any")
r.Log.Info("Reconcile completed successfully")

var anyPodFailed bool
// Try to recover failed/pending pods by deleting them
for idx := range podList.Items {
if cErr := utils.CheckPodFailed(&podList.Items[idx]); cErr != nil {
anyPodFailed = true
return reconcile.Result{}, nil
}

if err := r.createOrUpdatePodServiceIfNeeded([]string{podList.Items[idx].Name}); err != nil {
return reconcile.Result{}, err
}
func (r *SingleClusterReconciler) recoverIgnorablePods() reconcileResult {
podList, gErr := r.getClusterPodList()
if gErr != nil {
r.Log.Error(gErr, "Failed to get cluster pod list")
return reconcileError(gErr)
}

if err := r.Client.Delete(context.TODO(), &podList.Items[idx]); err != nil {
r.Log.Error(err, "Failed to delete pod", "pod", podList.Items[idx].Name)
return reconcile.Result{}, err
}
r.Log.Info("Try to recover failed/pending pods if any")

r.Log.Info("Deleted pod", "pod", podList.Items[idx].Name)
var anyPodFailed bool
// Try to recover failed/pending pods by deleting them
for idx := range podList.Items {
if cErr := utils.CheckPodFailed(&podList.Items[idx]); cErr != nil {
anyPodFailed = true

if err := r.createOrUpdatePodServiceIfNeeded([]string{podList.Items[idx].Name}); err != nil {
return reconcileError(err)
}

if err := r.Client.Delete(context.TODO(), &podList.Items[idx]); err != nil {
r.Log.Error(err, "Failed to delete pod", "pod", podList.Items[idx].Name)
return reconcileError(err)
}
}

if anyPodFailed {
r.Log.Info("Found failed/pending pod(s), requeuing")
return reconcile.Result{Requeue: true}, nil
r.Log.Info("Deleted pod", "pod", podList.Items[idx].Name)
}
}

r.Log.Info("Reconcile completed successfully")
if anyPodFailed {
r.Log.Info("Found failed/pending pod(s), requeuing")
return reconcileRequeueAfter(0)
}

return reconcile.Result{}, nil
return reconcileSuccess()
}

func (r *SingleClusterReconciler) validateAndReconcileAccessControl(ignorablePodNames sets.Set[string]) error {
Expand Down
Loading

0 comments on commit 188013c

Please sign in to comment.