Skip to content

Commit

Permalink
fix: clear status after hibernating or resuming (#778)
Browse files Browse the repository at this point in the history
Some status fields - most notably, the timestamps that indicate how long
the session has been failing, hibernated or idle should be cleared after
hibernation or resuming. If this is not done then one cannot
successfully resume the session after hibernation (for example).
  • Loading branch information
olevski authored Dec 5, 2024
1 parent 9b60507 commit 541806b
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 7 deletions.
78 changes: 78 additions & 0 deletions internal/controller/amaltheasession_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,5 +421,83 @@ var _ = Describe("AmaltheaSession Controller", func() {
return errors.IsNotFound(err)
}, time.Minute, time.Second).WithContext(ctx).Should(BeTrue())
})

It("should clear statuses after hibernation", func(ctx SpecContext) {
By("Checking if the custom resource was successfully created")
controllerReconciler := newReconciler()
_, err := controllerReconciler.Reconcile(ctx, reconcile.Request{
NamespacedName: typeNamespacedName,
})
Expect(err).NotTo(HaveOccurred())
Expect(k8sClient.Get(ctx, typeNamespacedName, amaltheasession)).To(Succeed())

By("Artificially adding statuses")
newTimestamp := metav1.NewTime(time.Now().Round(time.Second))
amaltheasession.Status.FailingSince = newTimestamp
amaltheasession.Status.IdleSince = newTimestamp
Expect(controllerReconciler.Status().Update(ctx, amaltheasession)).To(Succeed())
Expect(k8sClient.Get(ctx, typeNamespacedName, amaltheasession)).To(Succeed())
Expect(amaltheasession.Status.FailingSince).To(Equal(newTimestamp))
Expect(amaltheasession.Status.IdleSince).To(Equal(newTimestamp))

By("Marking the session as hibernated")
amaltheasession.Spec.Hibernated = true
Expect(controllerReconciler.Update(ctx, amaltheasession)).To(Succeed())

By("Checking that the appropriate timestamps in the status have been reset")
_, err = controllerReconciler.Reconcile(ctx, reconcile.Request{
NamespacedName: typeNamespacedName,
})
Expect(err).NotTo(HaveOccurred())
Expect(k8sClient.Get(ctx, typeNamespacedName, amaltheasession)).To(Succeed())
Expect(amaltheasession.Status.HibernatedSince).ToNot(BeZero())
Expect(amaltheasession.Status.IdleSince).To(BeZero())
Expect(amaltheasession.Status.FailingSince).To(BeZero())
Expect(amaltheasession.Status.State).To(Equal(amaltheadevv1alpha1.Hibernated))
})

It("should clear statuses after resuming", func(ctx SpecContext) {
By("Checking if the custom resource was successfully created")
controllerReconciler := newReconciler()
_, err := controllerReconciler.Reconcile(ctx, reconcile.Request{
NamespacedName: typeNamespacedName,
})
Expect(err).NotTo(HaveOccurred())
Expect(k8sClient.Get(ctx, typeNamespacedName, amaltheasession)).To(Succeed())

By("Marking the session as hibernated")
amaltheasession.Spec.Hibernated = true
Expect(controllerReconciler.Update(ctx, amaltheasession)).To(Succeed())
_, err = controllerReconciler.Reconcile(ctx, reconcile.Request{
NamespacedName: typeNamespacedName,
})
Expect(err).NotTo(HaveOccurred())
Expect(k8sClient.Get(ctx, typeNamespacedName, amaltheasession)).To(Succeed())

By("Artificially adding statuses")
newTimestamp := metav1.NewTime(time.Now().Round(time.Second))
amaltheasession.Status.FailingSince = newTimestamp
amaltheasession.Status.IdleSince = newTimestamp
Expect(controllerReconciler.Status().Update(ctx, amaltheasession)).To(Succeed())
Expect(k8sClient.Get(ctx, typeNamespacedName, amaltheasession)).To(Succeed())
Expect(amaltheasession.Status.HibernatedSince).ToNot(BeZero())
Expect(amaltheasession.Status.FailingSince).To(Equal(newTimestamp))
Expect(amaltheasession.Status.IdleSince).To(Equal(newTimestamp))

By("Resuming the session")
amaltheasession.Spec.Hibernated = false
Expect(controllerReconciler.Update(ctx, amaltheasession)).To(Succeed())
_, err = controllerReconciler.Reconcile(ctx, reconcile.Request{
NamespacedName: typeNamespacedName,
})
Expect(err).NotTo(HaveOccurred())

By("Checking that the appropriate timestamps in the status have been reset")
Expect(k8sClient.Get(ctx, typeNamespacedName, amaltheasession)).To(Succeed())
Expect(amaltheasession.Status.HibernatedSince).To(BeZero())
Expect(amaltheasession.Status.IdleSince).To(BeZero())
Expect(amaltheasession.Status.FailingSince).To(BeZero())
Expect(amaltheasession.Status.State).To(Equal(amaltheadevv1alpha1.NotReady))
})
})
})
57 changes: 50 additions & 7 deletions internal/controller/children.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@ type ChildResource[T ChildResourceType] struct {
}

type ChildResourceUpdate[T ChildResourceType] struct {
Manifest *T
UpdateResult controllerutil.OperationResult
Error error
Manifest *T
UpdateResult controllerutil.OperationResult
Error error
statusCallback func(*amaltheadevv1alpha1.AmaltheaSessionStatus)
}

type ChildResources struct {
Expand Down Expand Up @@ -92,8 +93,9 @@ func (c ChildResource[T]) Reconcile(ctx context.Context, clnt client.Client, cr
}
return nil
})
return ChildResourceUpdate[T]{c.Current, res, err}
return ChildResourceUpdate[T]{c.Current, res, err, nil}
case *appsv1.StatefulSet:
var statusCallback func(*amaltheadevv1alpha1.AmaltheaSessionStatus)
res, err := controllerutil.CreateOrPatch(ctx, clnt, current, func() error {
desired, ok := any(c.Desired).(*appsv1.StatefulSet)
if !ok {
Expand All @@ -106,6 +108,22 @@ func (c ChildResource[T]) Reconcile(ctx context.Context, clnt client.Client, cr
err := ctrl.SetControllerReference(cr, current, clnt.Scheme())
return err
}
if current.Spec.Replicas != nil && desired.Spec.Replicas != nil && *current.Spec.Replicas == 0 && *desired.Spec.Replicas == 1 {
// The session is being resumed
statusCallback = func(status *amaltheadevv1alpha1.AmaltheaSessionStatus) {
status.IdleSince = metav1.Time{}
status.FailingSince = metav1.Time{}
status.HibernatedSince = metav1.Time{}
}
}
if current.Spec.Replicas != nil && desired.Spec.Replicas != nil && *current.Spec.Replicas == 1 && *desired.Spec.Replicas == 0 {
// The session is being hibernated
statusCallback = func(status *amaltheadevv1alpha1.AmaltheaSessionStatus) {
status.IdleSince = metav1.Time{}
status.FailingSince = metav1.Time{}
status.HibernatedSince = metav1.Now()
}
}
current.Spec.Replicas = desired.Spec.Replicas
switch strategy := cr.Spec.ReconcileStrategy; strategy {
case amaltheadevv1alpha1.Never:
Expand All @@ -124,7 +142,7 @@ func (c ChildResource[T]) Reconcile(ctx context.Context, clnt client.Client, cr
}
return nil
})
return ChildResourceUpdate[T]{c.Current, res, err}
return ChildResourceUpdate[T]{c.Current, res, err, statusCallback}
case *v1.PersistentVolumeClaim:
res, err := controllerutil.CreateOrPatch(ctx, clnt, current, func() error {
desired, ok := any(c.Desired).(*v1.PersistentVolumeClaim)
Expand Down Expand Up @@ -157,7 +175,7 @@ func (c ChildResource[T]) Reconcile(ctx context.Context, clnt client.Client, cr
}
return nil
})
return ChildResourceUpdate[T]{c.Current, res, err}
return ChildResourceUpdate[T]{c.Current, res, err, nil}
case *v1.Service:
res, err := controllerutil.CreateOrPatch(ctx, clnt, current, func() error {
desired, ok := any(c.Desired).(*v1.Service)
Expand Down Expand Up @@ -187,7 +205,7 @@ func (c ChildResource[T]) Reconcile(ctx context.Context, clnt client.Client, cr
}
return nil
})
return ChildResourceUpdate[T]{c.Current, res, err}
return ChildResourceUpdate[T]{c.Current, res, err, nil}
default:
return ChildResourceUpdate[T]{Error: fmt.Errorf("Encountered an uknown child resource type")}
}
Expand Down Expand Up @@ -368,6 +386,29 @@ func Conditions(
return conditions
}

// statusCallback modifies an amalthea status based on the reconciliation of different child resources.
// Certain status updates can only be done based on the current and previous values of child resources,
// these should take precedence over other statuses derived only on current values and are applied here.
func (c ChildResourceUpdates) statusCallback(status *amaltheadevv1alpha1.AmaltheaSessionStatus) {
if c.Ingress.statusCallback != nil {
c.Ingress.statusCallback(status)
}
if c.Service.statusCallback != nil {
c.Service.statusCallback(status)
}
if c.PVC.statusCallback != nil {
c.PVC.statusCallback(status)
}
for _, dc := range c.DataSourcesPVCs {
if dc.statusCallback != nil {
dc.statusCallback(status)
}
}
if c.StatefulSet.statusCallback != nil {
c.StatefulSet.statusCallback(status)
}
}

func (c ChildResourceUpdates) Status(
ctx context.Context,
r *AmaltheaSessionReconciler,
Expand Down Expand Up @@ -448,6 +489,8 @@ func (c ChildResourceUpdates) Status(
status.InitContainerCounts.Ready = 0
}

c.statusCallback(&status)

// Used for debugging to ensure the reconcile loop does not needlessly reschdule or update child resources
// log.Info("Update summary", "Ingress", c.Ingress.UpdateResult, "StatefulSet", c.StatefulSet.UpdateResult, "PVC", c.StatefulSet.UpdateResult, "Service", c.Service.UpdateResult)

Expand Down

0 comments on commit 541806b

Please sign in to comment.