Skip to content

Commit

Permalink
Improve error reasons on btpOperator (#934)
Browse files Browse the repository at this point in the history
* Improve error reasons on btpOperator

* Replace function

* Check other approach

* Refactor

* Check for race

* Refactor

* Simplify loop

* Less timeout logic

* Push deployment name

* Return resource name and kind

* Refactor

* Refactor
  • Loading branch information
MarekMichali authored Dec 23, 2024
1 parent 8da0ece commit 66e27e9
Showing 1 changed file with 33 additions and 31 deletions.
64 changes: 33 additions & 31 deletions controllers/btpoperator_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,13 @@ type BtpOperatorReconciler struct {
instanceBindingService InstanceBindingSerivce
}

type ResourceReadiness struct {
Name string
Namespace string
Kind string
Ready bool
}

func NewBtpOperatorReconciler(client client.Client, scheme *runtime.Scheme, instanceBindingSerivice InstanceBindingSerivce, metrics *metrics.Metrics) *BtpOperatorReconciler {
return &BtpOperatorReconciler{
Client: client,
Expand Down Expand Up @@ -615,41 +622,24 @@ func (r *BtpOperatorReconciler) applyOrUpdateResources(ctx context.Context, us [

func (r *BtpOperatorReconciler) waitForResourcesReadiness(ctx context.Context, us []*unstructured.Unstructured) error {
numOfResources := len(us)
resourcesReadinessInformer := make(chan bool, numOfResources)
allReadyInformer := make(chan bool, 1)
resourcesReadinessInformer := make(chan ResourceReadiness, numOfResources)
for _, u := range us {
go r.checkResourceReadiness(ctx, u, resourcesReadinessInformer)
}
go func(c chan bool) {
timeout := time.After(ReadyTimeout)
for i := 0; i < numOfResources; i++ {
select {
case <-resourcesReadinessInformer:
continue
case <-timeout:
return
}
if u.GetKind() == deploymentKind {
go r.checkDeploymentReadiness(ctx, u, resourcesReadinessInformer)
continue
}
allReadyInformer <- true
}(resourcesReadinessInformer)
select {
case <-allReadyInformer:
return nil
case <-time.After(ReadyTimeout):
return errors.New("resources readiness timeout reached")
go r.checkResourceExistence(ctx, u, resourcesReadinessInformer)
}
}

func (r *BtpOperatorReconciler) checkResourceReadiness(ctx context.Context, u *unstructured.Unstructured, c chan<- bool) {
switch u.GetKind() {
case deploymentKind:
r.checkDeploymentReadiness(ctx, u, c)
default:
r.checkResourceExistence(ctx, u, c)
for i := 0; i < numOfResources; i++ {
if resourceReady := <-resourcesReadinessInformer; !resourceReady.Ready {
return fmt.Errorf("%s %s in namespace %s readiness timeout reached", resourceReady.Kind, resourceReady.Name, resourceReady.Namespace)
}
}
return nil
}

func (r *BtpOperatorReconciler) checkDeploymentReadiness(ctx context.Context, u *unstructured.Unstructured, c chan<- bool) {
func (r *BtpOperatorReconciler) checkDeploymentReadiness(ctx context.Context, u *unstructured.Unstructured, c chan<- ResourceReadiness) {
logger := log.FromContext(ctx)
ctxWithTimeout, cancel := context.WithTimeout(ctx, ReadyCheckInterval)
defer cancel()
Expand All @@ -661,6 +651,12 @@ func (r *BtpOperatorReconciler) checkDeploymentReadiness(ctx context.Context, u
for {
if time.Since(now) >= ReadyTimeout {
logger.Error(err, fmt.Sprintf("timed out while checking %s %s readiness", u.GetName(), u.GetKind()))
c <- ResourceReadiness{
Name: u.GetName(),
Namespace: u.GetNamespace(),
Kind: u.GetKind(),
Ready: false,
}
return
}
if err = r.Get(ctxWithTimeout, client.ObjectKey{Name: u.GetName(), Namespace: u.GetNamespace()}, got); err == nil {
Expand All @@ -672,14 +668,14 @@ func (r *BtpOperatorReconciler) checkDeploymentReadiness(ctx context.Context, u
}
}
if progressingConditionStatus == "True" && availableConditionStatus == "True" {
c <- true
c <- ResourceReadiness{Ready: true}
return
}
}
}
}

func (r *BtpOperatorReconciler) checkResourceExistence(ctx context.Context, u *unstructured.Unstructured, c chan<- bool) {
func (r *BtpOperatorReconciler) checkResourceExistence(ctx context.Context, u *unstructured.Unstructured, c chan<- ResourceReadiness) {
logger := log.FromContext(ctx)
ctxWithTimeout, cancel := context.WithTimeout(ctx, ReadyCheckInterval)
defer cancel()
Expand All @@ -691,10 +687,16 @@ func (r *BtpOperatorReconciler) checkResourceExistence(ctx context.Context, u *u
for {
if time.Since(now) >= ReadyTimeout {
logger.Error(err, fmt.Sprintf("timed out while checking %s %s existence", u.GetName(), u.GetKind()))
c <- ResourceReadiness{
Name: u.GetName(),
Namespace: u.GetNamespace(),
Kind: u.GetKind(),
Ready: false,
}
return
}
if err = r.Get(ctxWithTimeout, client.ObjectKey{Name: u.GetName(), Namespace: u.GetNamespace()}, got); err == nil {
c <- true
c <- ResourceReadiness{Ready: true}
return
}
}
Expand Down

0 comments on commit 66e27e9

Please sign in to comment.