From 401ee30f3fa74a08ae3122b7046ac61e2309d39b Mon Sep 17 00:00:00 2001 From: David Cassany Date: Thu, 5 Oct 2023 17:08:19 +0200 Subject: [PATCH] Improve error management Signed-off-by: David Cassany --- .../managedosversionchannel_controller.go | 58 ++++++++++----- ...managedosversionchannel_controller_test.go | 74 +++++++++++++++---- 2 files changed, 99 insertions(+), 33 deletions(-) diff --git a/controllers/managedosversionchannel_controller.go b/controllers/managedosversionchannel_controller.go index edfe846ea..4f17677ac 100644 --- a/controllers/managedosversionchannel_controller.go +++ b/controllers/managedosversionchannel_controller.go @@ -140,7 +140,7 @@ func (r *ManagedOSVersionChannelReconciler) reconcile(ctx context.Context, manag meta.SetStatusCondition(&managedOSVersionChannel.Status.Conditions, metav1.Condition{ Type: elementalv1.ReadyCondition, Reason: elementalv1.InvalidConfigurationReason, - Status: metav1.ConditionTrue, + Status: metav1.ConditionFalse, Message: msg, }) logger.Error(nil, msg) @@ -153,7 +153,7 @@ func (r *ManagedOSVersionChannelReconciler) reconcile(ctx context.Context, manag meta.SetStatusCondition(&managedOSVersionChannel.Status.Conditions, metav1.Condition{ Type: elementalv1.ReadyCondition, Reason: elementalv1.InvalidConfigurationReason, - Status: metav1.ConditionTrue, + Status: metav1.ConditionFalse, Message: msg, }) logger.Error(nil, msg) @@ -166,7 +166,7 @@ func (r *ManagedOSVersionChannelReconciler) reconcile(ctx context.Context, manag meta.SetStatusCondition(&managedOSVersionChannel.Status.Conditions, metav1.Condition{ Type: elementalv1.ReadyCondition, Reason: elementalv1.InvalidConfigurationReason, - Status: metav1.ConditionTrue, + Status: metav1.ConditionFalse, Message: msg, }) logger.Error(nil, msg) @@ -182,7 +182,7 @@ func (r *ManagedOSVersionChannelReconciler) reconcile(ctx context.Context, manag lastSync := managedOSVersionChannel.Status.LastSyncedTime if lastSync != nil && lastSync.Add(r.minTimeBetweenSyncs).After(time.Now()) { logger.Info("synchronization already done shortly before", "lastSync", lastSync) - return ctrl.Result{RequeueAfter: time.Until(lastSync.Add(interval))}, nil + return ctrl.Result{}, nil } pod := &corev1.Pod{} @@ -204,18 +204,22 @@ func (r *ManagedOSVersionChannelReconciler) reconcile(ctx context.Context, manag return ctrl.Result{}, err } - return r.handleSyncPod(ctx, pod, managedOSVersionChannel, interval) + return r.handleSyncPod(ctx, pod, managedOSVersionChannel, interval), nil } // handleSyncPod is the method responcible to manage the lifecycle of the channel synchronization pod -func (r *ManagedOSVersionChannelReconciler) handleSyncPod(ctx context.Context, pod *corev1.Pod, ch *elementalv1.ManagedOSVersionChannel, interval time.Duration) (result ctrl.Result, err error) { +func (r *ManagedOSVersionChannelReconciler) handleSyncPod(ctx context.Context, pod *corev1.Pod, ch *elementalv1.ManagedOSVersionChannel, interval time.Duration) ctrl.Result { var data []byte + var err error + logger := ctrl.LoggerFrom(ctx) defer func() { - r.deletePodOnError(ctx, pod, ch, err) + r.handleFailedSync(ctx, pod, ch, err) }() + // Once the Pod already started we do not return error in case of failure so synchronization is not retriggered, we just set + // to false readyness condition and schedule a new resynchronization for the next interval switch pod.Status.Phase { case corev1.PodPending, corev1.PodRunning: logger.Info("Waiting until the pod is on succeeded state ", "pod", pod.Name) @@ -225,15 +229,15 @@ func (r *ManagedOSVersionChannelReconciler) handleSyncPod(ctx context.Context, p Status: metav1.ConditionFalse, Message: "ongoing channel synchronization", }) - return ctrl.Result{RequeueAfter: r.minTimeBetweenSyncs / 2}, nil + return ctrl.Result{RequeueAfter: r.minTimeBetweenSyncs / 2} case corev1.PodSucceeded: data, err = r.syncerProvider.ReadPodLogs(ctx, r.kcl, pod, displayContainer) if err != nil { - return ctrl.Result{}, err + return ctrl.Result{RequeueAfter: interval} } err = r.createManagedOSVersions(ctx, ch, data) if err != nil { - return ctrl.Result{}, err + return ctrl.Result{RequeueAfter: interval} } if err = r.Delete(ctx, pod); err != nil { logger.Error(err, "could not delete the pod", "pod", pod.Name) @@ -247,18 +251,20 @@ func (r *ManagedOSVersionChannelReconciler) handleSyncPod(ctx context.Context, p }) now := metav1.Now() ch.Status.LastSyncedTime = &now - return ctrl.Result{RequeueAfter: interval}, nil + return ctrl.Result{RequeueAfter: interval} default: // Any other phase (failed or unknown) is considered an error - return ctrl.Result{}, fmt.Errorf("synchronization pod failed") + err = fmt.Errorf("synchronization pod failed") + return ctrl.Result{RequeueAfter: interval} } } -// deletePodOnError deletes the pod if err is not nil -func (r *ManagedOSVersionChannelReconciler) deletePodOnError(ctx context.Context, pod *corev1.Pod, ch *elementalv1.ManagedOSVersionChannel, err error) { - logger := ctrl.LoggerFrom(ctx) - +// handleFailedSync deletes the pod, produces error log traces and sets the failure state if error is not nil, otherwise sets a success state +func (r *ManagedOSVersionChannelReconciler) handleFailedSync(ctx context.Context, pod *corev1.Pod, ch *elementalv1.ManagedOSVersionChannel, err error) { if err != nil { + logger := ctrl.LoggerFrom(ctx) + now := metav1.Now() + ch.Status.LastSyncedTime = &now logger.Error(err, "failed handling syncer pod", "pod", pod.Name) meta.SetStatusCondition(&ch.Status.Conditions, metav1.Condition{ Type: elementalv1.ReadyCondition, @@ -272,7 +278,7 @@ func (r *ManagedOSVersionChannelReconciler) deletePodOnError(ctx context.Context } } -// createManagedOSVersions unmarshals managedOSVersions from a byte array and creates them +// createManagedOSVersions unmarshals managedOSVersions from a byte array and creates them. func (r *ManagedOSVersionChannelReconciler) createManagedOSVersions(ctx context.Context, ch *elementalv1.ManagedOSVersionChannel, data []byte) error { logger := ctrl.LoggerFrom(ctx) @@ -289,6 +295,7 @@ func (r *ManagedOSVersionChannelReconciler) createManagedOSVersions(ctx context. }) var errs []error + for _, v := range vers { vcpy := v.DeepCopy() vcpy.ObjectMeta.Namespace = ch.Namespace @@ -321,11 +328,11 @@ func (r *ManagedOSVersionChannelReconciler) createManagedOSVersions(ctx context. } } else if err = r.Create(ctx, vcpy); err != nil { if apierrors.IsAlreadyExists(err) { - logger.Info("already existing managedOSVersion", "name", vcpy.Name) + logger.Error(err, "already existing managedOSVersion", "name", vcpy.Name) } else { logger.Error(err, "failed to create a managedosversion", "name", vcpy.Name) - errs = append(errs, err) } + errs = append(errs, err) } else { logger.Info("managedOSVersion created", "name", vcpy.Name) } @@ -457,7 +464,18 @@ func filterChannelEvents() predicate.Funcs { return false } // Return true in case it watches other types - logger.V(log.DebugDepth).Info("Processing create event", "Obj", e.Object.GetName()) + logger.V(log.DebugDepth).Info("Processing delete event", "Obj", e.Object.GetName()) + return true + }, + // Ignore generic pod events + GenericFunc: func(e event.GenericEvent) bool { + logger := ctrl.LoggerFrom(context.Background()) + + if _, ok := e.Object.(*corev1.Pod); ok { + return false + } + // Return true in case it watches other types + logger.V(log.DebugDepth).Info("Processing generic event", "Obj", e.Object.GetName()) return true }, } diff --git a/controllers/managedosversionchannel_controller_test.go b/controllers/managedosversionchannel_controller_test.go index 091d99d58..46cea5ae8 100644 --- a/controllers/managedosversionchannel_controller_test.go +++ b/controllers/managedosversionchannel_controller_test.go @@ -199,8 +199,7 @@ var _ = Describe("reconcile managed os version channel", func() { // No re-sync can be triggered before the minium time between syncs res, err = r.Reconcile(ctx, reconcile.Request{NamespacedName: name}) Expect(err).ToNot(HaveOccurred()) - Expect(res.RequeueAfter).To(BeNumerically("<", 60*time.Second)) - Expect(res.RequeueAfter).To(BeNumerically(">", 55*time.Second)) + Expect(res.RequeueAfter).To(Equal(0 * time.Second)) }) It("should reconcile managed os version channel object without a type", func() { @@ -223,7 +222,7 @@ var _ = Describe("reconcile managed os version channel", func() { }, managedOSVersionChannel)).To(Succeed()) Expect(managedOSVersionChannel.Status.Conditions[0].Reason).To(Equal(elementalv1.InvalidConfigurationReason)) - Expect(managedOSVersionChannel.Status.Conditions[0].Status).To(Equal(metav1.ConditionTrue)) + Expect(managedOSVersionChannel.Status.Conditions[0].Status).To(Equal(metav1.ConditionFalse)) }) It("should reconcile managed os version channel object with a bad a sync interval", func() { @@ -247,7 +246,7 @@ var _ = Describe("reconcile managed os version channel", func() { }, managedOSVersionChannel)).To(Succeed()) Expect(managedOSVersionChannel.Status.Conditions[0].Reason).To(Equal(elementalv1.InvalidConfigurationReason)) - Expect(managedOSVersionChannel.Status.Conditions[0].Status).To(Equal(metav1.ConditionTrue)) + Expect(managedOSVersionChannel.Status.Conditions[0].Status).To(Equal(metav1.ConditionFalse)) }) It("should reconcile managed os version channel object with a not valid type", func() { @@ -264,7 +263,6 @@ var _ = Describe("reconcile managed os version channel", func() { }) Expect(err).ToNot(HaveOccurred()) Expect(res.RequeueAfter).To(Equal(0 * time.Second)) - Expect(res.Requeue).To(BeFalse()) Expect(cl.Get(ctx, client.ObjectKey{ Name: managedOSVersionChannel.Name, @@ -272,7 +270,7 @@ var _ = Describe("reconcile managed os version channel", func() { }, managedOSVersionChannel)).To(Succeed()) Expect(managedOSVersionChannel.Status.Conditions[0].Reason).To(Equal(elementalv1.InvalidConfigurationReason)) - Expect(managedOSVersionChannel.Status.Conditions[0].Status).To(Equal(metav1.ConditionTrue)) + Expect(managedOSVersionChannel.Status.Conditions[0].Status).To(Equal(metav1.ConditionFalse)) }) It("it fails to reconcile a managed os version channel when channel provides invalid JSON", func() { @@ -285,8 +283,9 @@ var _ = Describe("reconcile managed os version channel", func() { Expect(cl.Create(ctx, managedOSVersionChannel)).To(Succeed()) // No error and status updated (no requeue) - _, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: name}) + res, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: name}) Expect(err).ToNot(HaveOccurred()) + Expect(res.RequeueAfter).To(Equal(0 * time.Second)) Expect(cl.Get(ctx, client.ObjectKey{ Name: managedOSVersionChannel.Name, @@ -303,7 +302,7 @@ var _ = Describe("reconcile managed os version channel", func() { setPodPhase(pod, corev1.PodSucceeded) _, err = r.Reconcile(ctx, reconcile.Request{NamespacedName: name}) - Expect(err).To(HaveOccurred()) + Expect(err).NotTo(HaveOccurred()) Expect(cl.Get(ctx, client.ObjectKey{ Name: managedOSVersionChannel.Name, @@ -330,7 +329,7 @@ var _ = Describe("reconcile managed os version channel", func() { Expect(cl.Create(ctx, managedOSVersionChannel)).To(Succeed()) // No error and status updated (no requeue) - _, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: name}) + res, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: name}) Expect(err).ToNot(HaveOccurred()) Expect(cl.Get(ctx, client.ObjectKey{ @@ -349,8 +348,9 @@ var _ = Describe("reconcile managed os version channel", func() { Expect(pod.Status.Phase).To(Equal(corev1.PodPending)) setPodPhase(pod, corev1.PodSucceeded) - _, err = r.Reconcile(ctx, reconcile.Request{NamespacedName: name}) - Expect(err).To(HaveOccurred()) + res, err = r.Reconcile(ctx, reconcile.Request{NamespacedName: name}) + Expect(err).NotTo(HaveOccurred()) + Expect(res.RequeueAfter).To(Equal(1 * time.Hour)) Expect(cl.Get(ctx, client.ObjectKey{ Name: managedOSVersionChannel.Name, @@ -395,8 +395,9 @@ var _ = Describe("reconcile managed os version channel", func() { Expect(pod.Status.Phase).To(Equal(corev1.PodPending)) setPodPhase(pod, corev1.PodFailed) - _, err = r.Reconcile(ctx, reconcile.Request{NamespacedName: name}) - Expect(err).To(HaveOccurred()) + res, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: name}) + Expect(err).NotTo(HaveOccurred()) + Expect(res.RequeueAfter).To(Equal(1 * time.Hour)) Expect(cl.Get(ctx, client.ObjectKey{ Name: managedOSVersionChannel.Name, @@ -583,4 +584,51 @@ var _ = Describe("managed os version channel controller integration tests", func }, managedOSVersion)).To(Succeed()) Expect(managedOSVersion.Spec.Version).To(Equal("v0.1.0-patched")) }) + + It("should not reconcile again if it errors during pod lifecycle", func() { + ch.Spec.Type = "json" + + Expect(cl.Create(ctx, ch)).To(Succeed()) + + // Pod is created + Eventually(func() bool { + err := cl.Get(ctx, client.ObjectKey{ + Name: ch.Name, + Namespace: ch.Namespace, + }, pod) + return err == nil + }, 4*time.Second, 1*time.Second).Should(BeTrue()) + setPodPhase(pod, corev1.PodFailed) + + Eventually(func() bool { + err := cl.Get(ctx, client.ObjectKey{ + Name: ch.Name, + Namespace: ch.Namespace, + }, ch) + return err == nil && ch.Status.Conditions[0].Reason == elementalv1.FailedToSyncReason + }, 4*time.Second, 1*time.Second).Should(BeTrue()) + + // Pod is deleted + Eventually(func() bool { + err := cl.Get(ctx, client.ObjectKey{ + Name: ch.Name, + Namespace: ch.Namespace, + }, pod) + return err != nil && apierrors.IsNotFound(err) + }, 4*time.Second, 1*time.Second).Should(BeTrue()) + + // Pod is never recreated and and channel stays in failed status + Consistently(func() bool { + podErr := cl.Get(ctx, client.ObjectKey{ + Name: ch.Name, + Namespace: ch.Namespace, + }, pod) + cErr := cl.Get(ctx, client.ObjectKey{ + Name: ch.Name, + Namespace: ch.Namespace, + }, ch) + return (podErr != nil && apierrors.IsNotFound(podErr)) && + (cErr == nil && ch.Status.Conditions[0].Reason == elementalv1.FailedToSyncReason) + }, minTime, 1*time.Second).Should(BeTrue()) + }) })