Skip to content

Commit

Permalink
Retry helm apply on any error
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel Weiße <[email protected]>
  • Loading branch information
daniel-weisse committed Sep 8, 2023
1 parent 7376c6a commit 93aab8a
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 22 deletions.
2 changes: 1 addition & 1 deletion cli/internal/helm/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,6 @@ go_library(
"//internal/versions",
"@com_github_pkg_errors//:errors",
"@io_k8s_apimachinery//pkg/apis/meta/v1:meta",
"@io_k8s_apimachinery//pkg/util/wait",
"@io_k8s_client_go//kubernetes",
"@io_k8s_client_go//tools/clientcmd",
"@io_k8s_client_go//util/retry",
Expand All @@ -457,6 +456,7 @@ go_test(
srcs = [
"helm_test.go",
"loader_test.go",
"retryaction_test.go",
],
data = glob(["testdata/**"]),
embed = [":helm"],
Expand Down
7 changes: 4 additions & 3 deletions cli/internal/helm/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ import (

const (
// timeout is the maximum time given per helm action.
timeout = 10 * time.Minute
timeout = 10 * time.Minute
applyRetryInterval = 30 * time.Second
)

type applyAction interface {
Expand Down Expand Up @@ -85,7 +86,7 @@ func (a *installAction) Apply(ctx context.Context) error {
return err
}
}
if err := retryApply(ctx, a, a.log); err != nil {
if err := retryApply(ctx, a, applyRetryInterval, a.log); err != nil {
return err
}

Expand Down Expand Up @@ -142,7 +143,7 @@ func (a *upgradeAction) Apply(ctx context.Context) error {
return err
}
}
if err := retryApply(ctx, a, a.log); err != nil {
if err := retryApply(ctx, a, applyRetryInterval, a.log); err != nil {
return err
}
if a.postUpgrade != nil {
Expand Down
28 changes: 10 additions & 18 deletions cli/internal/helm/retryaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@ package helm
import (
"context"
"fmt"
"strings"
"time"

"github.com/edgelesssys/constellation/v2/internal/retry"
"k8s.io/apimachinery/pkg/util/wait"
)

const (
Expand All @@ -28,7 +26,7 @@ type retrieableApplier interface {
}

// retryApply retries the given retriable action.
func retryApply(ctx context.Context, action retrieableApplier, log debugLog) error {
func retryApply(ctx context.Context, action retrieableApplier, retryInterval time.Duration, log debugLog) error {
var retries int
retriable := func(err error) bool {
// abort after maximumRetryAttempts tries.
Expand All @@ -37,20 +35,14 @@ func retryApply(ctx context.Context, action retrieableApplier, log debugLog) err
}
retries++
// only retry if atomic is set
// otherwise helm doesn't uninstall
// the release on failure
if !action.IsAtomic() {
return false
}
// check if error is retriable
return wait.Interrupted(err) ||
strings.Contains(err.Error(), "connection refused")
// otherwise helm doesn't uninstall the release on failure
return action.IsAtomic()
}
doer := applyDoer{
action,
log,
applier: action,
log: log,
}
retrier := retry.NewIntervalRetrier(doer, 30*time.Second, retriable)
retrier := retry.NewIntervalRetrier(doer, retryInterval, retriable)

retryLoopStartTime := time.Now()
if err := retrier.Do(ctx); err != nil {
Expand All @@ -63,15 +55,15 @@ func retryApply(ctx context.Context, action retrieableApplier, log debugLog) err

// applyDoer is a helper struct to enable retrying helm actions.
type applyDoer struct {
Applier retrieableApplier
applier retrieableApplier
log debugLog
}

// Do tries to apply the action.
func (i applyDoer) Do(ctx context.Context) error {
i.log.Debugf("Trying to apply Helm chart %s", i.Applier.ReleaseName())
if err := i.Applier.apply(ctx); err != nil {
i.log.Debugf("Helm chart installation %s failed: %v", i.Applier.ReleaseName(), err)
i.log.Debugf("Trying to apply Helm chart %s", i.applier.ReleaseName())
if err := i.applier.apply(ctx); err != nil {
i.log.Debugf("Helm chart installation %s failed: %v", i.applier.ReleaseName(), err)
return err
}

Expand Down
104 changes: 104 additions & 0 deletions cli/internal/helm/retryaction_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
Copyright (c) Edgeless Systems GmbH
SPDX-License-Identifier: AGPL-3.0-only
*/

package helm

import (
"context"
"testing"
"time"

"github.com/edgelesssys/constellation/v2/internal/logger"
"github.com/stretchr/testify/assert"
)

func TestRetryApply(t *testing.T) {
testCases := map[string]struct {
applier *stubRetriableApplier
wantErr bool
}{
"success": {
applier: &stubRetriableApplier{
atomic: true,
},
},
"two errors": {
applier: &stubRetriableApplier{
applyErrs: []error{
assert.AnError,
assert.AnError,
nil,
},
atomic: true,
},
},
"retries are aborted after maximumRetryAttempts": {
applier: &stubRetriableApplier{
applyErrs: []error{
assert.AnError,
assert.AnError,
assert.AnError,
assert.AnError,
},
atomic: true,
},
wantErr: true,
},
"non atomic actions are not retried": {
applier: &stubRetriableApplier{
atomic: false,
applyErrs: []error{
assert.AnError,
assert.AnError,
nil,
},
},
wantErr: true,
},
}

for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
assert := assert.New(t)

err := retryApply(context.Background(), tc.applier, time.Millisecond, logger.NewTest(t))
if tc.wantErr {
assert.Error(err)
} else {
assert.NoError(err)
}
})
}
}

type stubRetriableApplier struct {
atomic bool
applyErrs []error
}

func (s *stubRetriableApplier) apply(context.Context) error {
if len(s.applyErrs) == 0 {
return nil
}

// return the first error in the list
// and remove it from the list
err := s.applyErrs[0]
if len(s.applyErrs) > 1 {
s.applyErrs = s.applyErrs[1:]
} else {
s.applyErrs = nil
}
return err
}

func (s *stubRetriableApplier) ReleaseName() string {
return ""
}

func (s *stubRetriableApplier) IsAtomic() bool {
return s.atomic
}

0 comments on commit 93aab8a

Please sign in to comment.