Skip to content

Commit

Permalink
Improve error management
Browse files Browse the repository at this point in the history
Signed-off-by: David Cassany <[email protected]>
  • Loading branch information
davidcassany committed Oct 5, 2023
1 parent 664bcc2 commit 401ee30
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 33 deletions.
58 changes: 38 additions & 20 deletions controllers/managedosversionchannel_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func (r *ManagedOSVersionChannelReconciler) reconcile(ctx context.Context, manag
meta.SetStatusCondition(&managedOSVersionChannel.Status.Conditions, metav1.Condition{
Type: elementalv1.ReadyCondition,
Reason: elementalv1.InvalidConfigurationReason,
Status: metav1.ConditionTrue,
Status: metav1.ConditionFalse,
Message: msg,
})
logger.Error(nil, msg)
Expand All @@ -153,7 +153,7 @@ func (r *ManagedOSVersionChannelReconciler) reconcile(ctx context.Context, manag
meta.SetStatusCondition(&managedOSVersionChannel.Status.Conditions, metav1.Condition{
Type: elementalv1.ReadyCondition,
Reason: elementalv1.InvalidConfigurationReason,
Status: metav1.ConditionTrue,
Status: metav1.ConditionFalse,
Message: msg,
})
logger.Error(nil, msg)
Expand All @@ -166,7 +166,7 @@ func (r *ManagedOSVersionChannelReconciler) reconcile(ctx context.Context, manag
meta.SetStatusCondition(&managedOSVersionChannel.Status.Conditions, metav1.Condition{
Type: elementalv1.ReadyCondition,
Reason: elementalv1.InvalidConfigurationReason,
Status: metav1.ConditionTrue,
Status: metav1.ConditionFalse,
Message: msg,
})
logger.Error(nil, msg)
Expand All @@ -182,7 +182,7 @@ func (r *ManagedOSVersionChannelReconciler) reconcile(ctx context.Context, manag
lastSync := managedOSVersionChannel.Status.LastSyncedTime
if lastSync != nil && lastSync.Add(r.minTimeBetweenSyncs).After(time.Now()) {
logger.Info("synchronization already done shortly before", "lastSync", lastSync)
return ctrl.Result{RequeueAfter: time.Until(lastSync.Add(interval))}, nil
return ctrl.Result{}, nil
}

pod := &corev1.Pod{}
Expand All @@ -204,18 +204,22 @@ func (r *ManagedOSVersionChannelReconciler) reconcile(ctx context.Context, manag
return ctrl.Result{}, err
}

return r.handleSyncPod(ctx, pod, managedOSVersionChannel, interval)
return r.handleSyncPod(ctx, pod, managedOSVersionChannel, interval), nil
}

// handleSyncPod is the method responcible to manage the lifecycle of the channel synchronization pod
func (r *ManagedOSVersionChannelReconciler) handleSyncPod(ctx context.Context, pod *corev1.Pod, ch *elementalv1.ManagedOSVersionChannel, interval time.Duration) (result ctrl.Result, err error) {
func (r *ManagedOSVersionChannelReconciler) handleSyncPod(ctx context.Context, pod *corev1.Pod, ch *elementalv1.ManagedOSVersionChannel, interval time.Duration) ctrl.Result {
var data []byte
var err error

logger := ctrl.LoggerFrom(ctx)

defer func() {
r.deletePodOnError(ctx, pod, ch, err)
r.handleFailedSync(ctx, pod, ch, err)
}()

// Once the Pod already started we do not return error in case of failure so synchronization is not retriggered, we just set
// to false readyness condition and schedule a new resynchronization for the next interval
switch pod.Status.Phase {
case corev1.PodPending, corev1.PodRunning:
logger.Info("Waiting until the pod is on succeeded state ", "pod", pod.Name)
Expand All @@ -225,15 +229,15 @@ func (r *ManagedOSVersionChannelReconciler) handleSyncPod(ctx context.Context, p
Status: metav1.ConditionFalse,
Message: "ongoing channel synchronization",
})
return ctrl.Result{RequeueAfter: r.minTimeBetweenSyncs / 2}, nil
return ctrl.Result{RequeueAfter: r.minTimeBetweenSyncs / 2}
case corev1.PodSucceeded:
data, err = r.syncerProvider.ReadPodLogs(ctx, r.kcl, pod, displayContainer)
if err != nil {
return ctrl.Result{}, err
return ctrl.Result{RequeueAfter: interval}
}
err = r.createManagedOSVersions(ctx, ch, data)
if err != nil {
return ctrl.Result{}, err
return ctrl.Result{RequeueAfter: interval}
}
if err = r.Delete(ctx, pod); err != nil {
logger.Error(err, "could not delete the pod", "pod", pod.Name)
Expand All @@ -247,18 +251,20 @@ func (r *ManagedOSVersionChannelReconciler) handleSyncPod(ctx context.Context, p
})
now := metav1.Now()
ch.Status.LastSyncedTime = &now
return ctrl.Result{RequeueAfter: interval}, nil
return ctrl.Result{RequeueAfter: interval}
default:
// Any other phase (failed or unknown) is considered an error
return ctrl.Result{}, fmt.Errorf("synchronization pod failed")
err = fmt.Errorf("synchronization pod failed")
return ctrl.Result{RequeueAfter: interval}
}
}

// deletePodOnError deletes the pod if err is not nil
func (r *ManagedOSVersionChannelReconciler) deletePodOnError(ctx context.Context, pod *corev1.Pod, ch *elementalv1.ManagedOSVersionChannel, err error) {
logger := ctrl.LoggerFrom(ctx)

// handleFailedSync deletes the pod, produces error log traces and sets the failure state if error is not nil, otherwise sets a success state
func (r *ManagedOSVersionChannelReconciler) handleFailedSync(ctx context.Context, pod *corev1.Pod, ch *elementalv1.ManagedOSVersionChannel, err error) {
if err != nil {
logger := ctrl.LoggerFrom(ctx)
now := metav1.Now()
ch.Status.LastSyncedTime = &now
logger.Error(err, "failed handling syncer pod", "pod", pod.Name)
meta.SetStatusCondition(&ch.Status.Conditions, metav1.Condition{
Type: elementalv1.ReadyCondition,
Expand All @@ -272,7 +278,7 @@ func (r *ManagedOSVersionChannelReconciler) deletePodOnError(ctx context.Context
}
}

// createManagedOSVersions unmarshals managedOSVersions from a byte array and creates them
// createManagedOSVersions unmarshals managedOSVersions from a byte array and creates them.
func (r *ManagedOSVersionChannelReconciler) createManagedOSVersions(ctx context.Context, ch *elementalv1.ManagedOSVersionChannel, data []byte) error {
logger := ctrl.LoggerFrom(ctx)

Expand All @@ -289,6 +295,7 @@ func (r *ManagedOSVersionChannelReconciler) createManagedOSVersions(ctx context.
})

var errs []error

for _, v := range vers {
vcpy := v.DeepCopy()
vcpy.ObjectMeta.Namespace = ch.Namespace
Expand Down Expand Up @@ -321,11 +328,11 @@ func (r *ManagedOSVersionChannelReconciler) createManagedOSVersions(ctx context.
}
} else if err = r.Create(ctx, vcpy); err != nil {
if apierrors.IsAlreadyExists(err) {
logger.Info("already existing managedOSVersion", "name", vcpy.Name)
logger.Error(err, "already existing managedOSVersion", "name", vcpy.Name)
} else {
logger.Error(err, "failed to create a managedosversion", "name", vcpy.Name)
errs = append(errs, err)
}
errs = append(errs, err)
} else {
logger.Info("managedOSVersion created", "name", vcpy.Name)
}
Expand Down Expand Up @@ -457,7 +464,18 @@ func filterChannelEvents() predicate.Funcs {
return false
}
// Return true in case it watches other types
logger.V(log.DebugDepth).Info("Processing create event", "Obj", e.Object.GetName())
logger.V(log.DebugDepth).Info("Processing delete event", "Obj", e.Object.GetName())
return true
},
// Ignore generic pod events
GenericFunc: func(e event.GenericEvent) bool {
logger := ctrl.LoggerFrom(context.Background())

if _, ok := e.Object.(*corev1.Pod); ok {
return false
}
// Return true in case it watches other types
logger.V(log.DebugDepth).Info("Processing generic event", "Obj", e.Object.GetName())
return true
},
}
Expand Down
74 changes: 61 additions & 13 deletions controllers/managedosversionchannel_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,7 @@ var _ = Describe("reconcile managed os version channel", func() {
// No re-sync can be triggered before the minium time between syncs
res, err = r.Reconcile(ctx, reconcile.Request{NamespacedName: name})
Expect(err).ToNot(HaveOccurred())
Expect(res.RequeueAfter).To(BeNumerically("<", 60*time.Second))
Expect(res.RequeueAfter).To(BeNumerically(">", 55*time.Second))
Expect(res.RequeueAfter).To(Equal(0 * time.Second))
})

It("should reconcile managed os version channel object without a type", func() {
Expand All @@ -223,7 +222,7 @@ var _ = Describe("reconcile managed os version channel", func() {
}, managedOSVersionChannel)).To(Succeed())

Expect(managedOSVersionChannel.Status.Conditions[0].Reason).To(Equal(elementalv1.InvalidConfigurationReason))
Expect(managedOSVersionChannel.Status.Conditions[0].Status).To(Equal(metav1.ConditionTrue))
Expect(managedOSVersionChannel.Status.Conditions[0].Status).To(Equal(metav1.ConditionFalse))
})

It("should reconcile managed os version channel object with a bad a sync interval", func() {
Expand All @@ -247,7 +246,7 @@ var _ = Describe("reconcile managed os version channel", func() {
}, managedOSVersionChannel)).To(Succeed())

Expect(managedOSVersionChannel.Status.Conditions[0].Reason).To(Equal(elementalv1.InvalidConfigurationReason))
Expect(managedOSVersionChannel.Status.Conditions[0].Status).To(Equal(metav1.ConditionTrue))
Expect(managedOSVersionChannel.Status.Conditions[0].Status).To(Equal(metav1.ConditionFalse))
})

It("should reconcile managed os version channel object with a not valid type", func() {
Expand All @@ -264,15 +263,14 @@ var _ = Describe("reconcile managed os version channel", func() {
})
Expect(err).ToNot(HaveOccurred())
Expect(res.RequeueAfter).To(Equal(0 * time.Second))
Expect(res.Requeue).To(BeFalse())

Expect(cl.Get(ctx, client.ObjectKey{
Name: managedOSVersionChannel.Name,
Namespace: managedOSVersionChannel.Namespace,
}, managedOSVersionChannel)).To(Succeed())

Expect(managedOSVersionChannel.Status.Conditions[0].Reason).To(Equal(elementalv1.InvalidConfigurationReason))
Expect(managedOSVersionChannel.Status.Conditions[0].Status).To(Equal(metav1.ConditionTrue))
Expect(managedOSVersionChannel.Status.Conditions[0].Status).To(Equal(metav1.ConditionFalse))
})

It("it fails to reconcile a managed os version channel when channel provides invalid JSON", func() {
Expand All @@ -285,8 +283,9 @@ var _ = Describe("reconcile managed os version channel", func() {
Expect(cl.Create(ctx, managedOSVersionChannel)).To(Succeed())

// No error and status updated (no requeue)
_, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: name})
res, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: name})
Expect(err).ToNot(HaveOccurred())
Expect(res.RequeueAfter).To(Equal(0 * time.Second))

Expect(cl.Get(ctx, client.ObjectKey{
Name: managedOSVersionChannel.Name,
Expand All @@ -303,7 +302,7 @@ var _ = Describe("reconcile managed os version channel", func() {

setPodPhase(pod, corev1.PodSucceeded)
_, err = r.Reconcile(ctx, reconcile.Request{NamespacedName: name})
Expect(err).To(HaveOccurred())
Expect(err).NotTo(HaveOccurred())

Expect(cl.Get(ctx, client.ObjectKey{
Name: managedOSVersionChannel.Name,
Expand All @@ -330,7 +329,7 @@ var _ = Describe("reconcile managed os version channel", func() {
Expect(cl.Create(ctx, managedOSVersionChannel)).To(Succeed())

// No error and status updated (no requeue)
_, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: name})
res, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: name})
Expect(err).ToNot(HaveOccurred())

Expect(cl.Get(ctx, client.ObjectKey{
Expand All @@ -349,8 +348,9 @@ var _ = Describe("reconcile managed os version channel", func() {
Expect(pod.Status.Phase).To(Equal(corev1.PodPending))

setPodPhase(pod, corev1.PodSucceeded)
_, err = r.Reconcile(ctx, reconcile.Request{NamespacedName: name})
Expect(err).To(HaveOccurred())
res, err = r.Reconcile(ctx, reconcile.Request{NamespacedName: name})
Expect(err).NotTo(HaveOccurred())
Expect(res.RequeueAfter).To(Equal(1 * time.Hour))

Expect(cl.Get(ctx, client.ObjectKey{
Name: managedOSVersionChannel.Name,
Expand Down Expand Up @@ -395,8 +395,9 @@ var _ = Describe("reconcile managed os version channel", func() {
Expect(pod.Status.Phase).To(Equal(corev1.PodPending))

setPodPhase(pod, corev1.PodFailed)
_, err = r.Reconcile(ctx, reconcile.Request{NamespacedName: name})
Expect(err).To(HaveOccurred())
res, err := r.Reconcile(ctx, reconcile.Request{NamespacedName: name})
Expect(err).NotTo(HaveOccurred())
Expect(res.RequeueAfter).To(Equal(1 * time.Hour))

Expect(cl.Get(ctx, client.ObjectKey{
Name: managedOSVersionChannel.Name,
Expand Down Expand Up @@ -583,4 +584,51 @@ var _ = Describe("managed os version channel controller integration tests", func
}, managedOSVersion)).To(Succeed())
Expect(managedOSVersion.Spec.Version).To(Equal("v0.1.0-patched"))
})

It("should not reconcile again if it errors during pod lifecycle", func() {
ch.Spec.Type = "json"

Expect(cl.Create(ctx, ch)).To(Succeed())

// Pod is created
Eventually(func() bool {
err := cl.Get(ctx, client.ObjectKey{
Name: ch.Name,
Namespace: ch.Namespace,
}, pod)
return err == nil
}, 4*time.Second, 1*time.Second).Should(BeTrue())
setPodPhase(pod, corev1.PodFailed)

Eventually(func() bool {
err := cl.Get(ctx, client.ObjectKey{
Name: ch.Name,
Namespace: ch.Namespace,
}, ch)
return err == nil && ch.Status.Conditions[0].Reason == elementalv1.FailedToSyncReason
}, 4*time.Second, 1*time.Second).Should(BeTrue())

// Pod is deleted
Eventually(func() bool {
err := cl.Get(ctx, client.ObjectKey{
Name: ch.Name,
Namespace: ch.Namespace,
}, pod)
return err != nil && apierrors.IsNotFound(err)
}, 4*time.Second, 1*time.Second).Should(BeTrue())

// Pod is never recreated and and channel stays in failed status
Consistently(func() bool {
podErr := cl.Get(ctx, client.ObjectKey{
Name: ch.Name,
Namespace: ch.Namespace,
}, pod)
cErr := cl.Get(ctx, client.ObjectKey{
Name: ch.Name,
Namespace: ch.Namespace,
}, ch)
return (podErr != nil && apierrors.IsNotFound(podErr)) &&
(cErr == nil && ch.Status.Conditions[0].Reason == elementalv1.FailedToSyncReason)
}, minTime, 1*time.Second).Should(BeTrue())
})
})

0 comments on commit 401ee30

Please sign in to comment.