Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 add orderKappsValidateErr in crdupgradesafety preflight #1640

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions internal/rukpak/preflights/crdupgradesafety/checks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package crdupgradesafety

import (
"errors"
"fmt"
"testing"

kappcus "carvel.dev/kapp/pkg/kapp/crdupgradesafety"
Expand Down Expand Up @@ -905,3 +906,81 @@ func TestType(t *testing.T) {
})
}
}

func TestOrderKappsValidateErr(t *testing.T) {
testErr1 := errors.New("fallback1")
testErr2 := errors.New("fallback2")

generateErrors := func(n int, base string) []error {
var result []error
for i := n; i >= 0; i-- {
result = append(result, fmt.Errorf("%s%d", base, i))
}
return result
}

joinedAndNested := func(format string, errs ...error) error {
return fmt.Errorf(format, errors.Join(errs...))
}

testCases := []struct {
name string
inpuError error
expectedError error
}{
{
name: "fallback: initial error was not error.Join'ed",
inpuError: testErr1,
expectedError: testErr1,
},
{
name: "fallback: nested error was not wrapped",
inpuError: errors.Join(testErr1),
expectedError: testErr1,
},
{
name: "fallback: multiple nested errors, one was not wrapped",
inpuError: errors.Join(testErr2, fmt.Errorf("%w", testErr1)),
expectedError: errors.Join(testErr2, fmt.Errorf("%w", testErr1)),
},
{
name: "fallback: nested error did not contain \":\"",
inpuError: errors.Join(fmt.Errorf("%w", testErr1)),
expectedError: testErr1,
},
{
name: "fallback: multiple nested errors, one did not contain \":\"",
inpuError: errors.Join(joinedAndNested("fail: %w", testErr2), joinedAndNested("%w", testErr1)),
expectedError: errors.Join(fmt.Errorf("fail: %w", testErr2), testErr1),
},
{
name: "fallback: nested error was not error.Join'ed",
inpuError: errors.Join(fmt.Errorf("fail: %w", testErr1)),
expectedError: fmt.Errorf("fail: %w", testErr1),
},
{
name: "fallback: multiple nested errors, one was not error.Join'ed",
inpuError: errors.Join(joinedAndNested("fail: %w", testErr2), fmt.Errorf("fail: %w", testErr1)),
expectedError: fmt.Errorf("fail: %w\nfail: %w", testErr2, testErr1),
},
{
name: "ensures order for a single group of multiple deeply nested errors",
inpuError: errors.Join(joinedAndNested("fail: %w", testErr2, testErr1)),
expectedError: fmt.Errorf("fail: %w\n%w", testErr1, testErr2),
},
{
name: "ensures order for multiple groups of deeply nested errors",
inpuError: errors.Join(
joinedAndNested("fail: %w", testErr2, testErr1),
joinedAndNested("validation: %w", generateErrors(5, "err")...),
),
expectedError: fmt.Errorf("fail: %w\n%w\nvalidation: err0\nerr1\nerr2\nerr3\nerr4\nerr5", testErr1, testErr2),
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
err := orderKappsValidateErr(tc.inpuError)
require.EqualError(t, err, tc.expectedError.Error())
})
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package crdupgradesafety

import (
"cmp"
"context"
"errors"
"fmt"
"slices"
"strings"

kappcus "carvel.dev/kapp/pkg/kapp/crdupgradesafety"
Expand Down Expand Up @@ -84,7 +86,7 @@ func (p *Preflight) runPreflight(ctx context.Context, rel *release.Release) erro
return fmt.Errorf("parsing release %q objects: %w", rel.Name, err)
}

validateErrors := []error{}
validateErrors := make([]error, 0, len(relObjects))
for _, obj := range relObjects {
if obj.GetObjectKind().GroupVersionKind() != apiextensionsv1.SchemeGroupVersion.WithKind("CustomResourceDefinition") {
continue
Expand Down Expand Up @@ -112,9 +114,71 @@ func (p *Preflight) runPreflight(ctx context.Context, rel *release.Release) erro

err = p.validator.Validate(*oldCrd, *newCrd)
if err != nil {
err = orderKappsValidateErr(err)
validateErrors = append(validateErrors, fmt.Errorf("validating upgrade for CRD %q failed: %w", newCrd.Name, err))
}
}

return errors.Join(validateErrors...)
}

// orderKappsValidateErr is meant as a temporary solution to the problem
// of randomly ordered multi-line validation error returned by kapp's validator.Validate()
//
// The problem is that kapp's field validations are performed in map iteration order, which is not fixed.
// Errors from those validations are then error.Join'ed, fmt.Errorf'ed and error.Join'ed again,
// which means original messages are available at 3rd level of nesting, and this is where we need to
// sort them to ensure we do not enter into constant reconciliation loop because of random order of
// failure message we ultimately set in ClusterExtension's status conditions.
//
// This helper attempts to do that and falls back to original unchanged error message
// in case of any unforeseen issues which likely mean that the internals of validator.Validate
// have changed.
//
// For full context see:
// github.com/operator-framework/operator-controller/issues/1456 (original issue and comments)
// github.com/carvel-dev/kapp/pull/1047 (PR to ensure order in upstream)
//
// TODO: remove this once ordering has been handled by the upstream.
func orderKappsValidateErr(err error) error {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well written (as with the unit tests). Thank you! Qq: is it worth surfacing whether the function had to fallback on the original? My guess is that if we fallback, we may run into the continuous reconcile issue. Wondering if we should make that noisy somehow...?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you @perdasilva I thought about it too and it's a tricky situation, because while the first two steps in the unwrapping logic are true to all internal validators (as of now at least), the last one (most nested error.Join) might only be valid for CheckValidator internal validator. Since fallback might as well be a false negative here, I'd normally log this via Debug, but I noticed Debug isn't being used anywhere in the code so ultimately thought against it.
In case of a non-empty error here and regardless of fallback, we are still logging the full reconciliation error message every time we try to reconcile and that should make spotting the random order possible, though I agree an added hint from failed fallback might definitely help, so maybe it would be ok.

WDYT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah. Ultimately, I guess, it becomes a debug artifice. Which, while it undoubtedly helps with debugging, we'd still be reconciling repeatedly. If we have exp. backoff, then maybe it's not a big issue, and it's also temporary. I was thinking if we should panic (since the library shouldn't change out from under us during runtime), but then we'd need to be very careful about testing each error condition the underlying library checks for, which might not be a good use of time atm. Let's proceed ^^ thanks, dude =D

joinedValidationErrs, ok := err.(interface{ Unwrap() []error })
if !ok {
return err
}

// nolint: prealloc
var errs []error
for _, validationErr := range joinedValidationErrs.Unwrap() {
unwrappedValidationErr := errors.Unwrap(validationErr)
// validator.Validate did not error.Join'ed validation errors
// kapp's internals changed - fallback to original error
if unwrappedValidationErr == nil {
return err
}

prefix, _, ok := strings.Cut(validationErr.Error(), ":")
// kapp's internal error format changed - fallback to original error
if !ok {
return err
}

// attempt to unwrap and sort field errors
joinedFieldErrs, ok := unwrappedValidationErr.(interface{ Unwrap() []error })
// ChangeValidator did not error.Join'ed field validation errors
// kapp's internals changed - fallback to original error
if !ok {
return err
}

// ensure order of the field validation errors
unwrappedFieldErrs := joinedFieldErrs.Unwrap()
slices.SortFunc(unwrappedFieldErrs, func(a, b error) int {
return cmp.Compare(a.Error(), b.Error())
})

// re-join the sorted field errors keeping the original error prefix from kapp
errs = append(errs, fmt.Errorf("%s: %w", prefix, errors.Join(unwrappedFieldErrs...)))
}

return errors.Join(errs...)
}
Loading