From b4601836d0b1968f93b8c984fba9534372d933e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Drzewiecki?= Date: Tue, 17 Dec 2024 15:35:07 +0100 Subject: [PATCH 1/3] retry exceeded infrastructure request rate limits errors --- pkg/gardener/gardener_error_handler.go | 14 +++++++++++++- pkg/gardener/gardener_error_handler_test.go | 14 +++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/pkg/gardener/gardener_error_handler.go b/pkg/gardener/gardener_error_handler.go index 0ae41da9..ef3d5722 100644 --- a/pkg/gardener/gardener_error_handler.go +++ b/pkg/gardener/gardener_error_handler.go @@ -13,12 +13,24 @@ type ErrReason string func IsRetryable(lastErrors []gardener.LastError) bool { if len(lastErrors) > 0 && - !gardenerhelper.HasNonRetryableErrorCode(lastErrors...) { + !gardenerhelper.HasNonRetryableErrorCode(lastErrors...) || + HasErrorInfraRateLimitsExceeded(lastErrors...) { return true } return false } +func HasErrorInfraRateLimitsExceeded(lastErrors ...gardener.LastError) bool { + for _, lastError := range lastErrors { + for _, code := range lastError.Codes { + if code == gardener.ErrorInfraRateLimitsExceeded { + return true + } + } + } + return false +} + func ToErrReason(lastErrors ...gardener.LastError) ErrReason { var codes []gardener.ErrorCode var vals []string diff --git a/pkg/gardener/gardener_error_handler_test.go b/pkg/gardener/gardener_error_handler_test.go index ac83ab22..4a558c87 100644 --- a/pkg/gardener/gardener_error_handler_test.go +++ b/pkg/gardener/gardener_error_handler_test.go @@ -78,6 +78,12 @@ func fixRetryableErrors() []gardener.LastError { gardener.ErrorRetryableInfraDependencies, }, }, + { + Description: "Third description - non-retryable error according to gardener API which we deliberately consider as retryable", + Codes: []gardener.ErrorCode{ + gardener.ErrorInfraRateLimitsExceeded, + }, + }, } } @@ -109,18 +115,12 @@ func fixNonRetryableErrors() []gardener.LastError { }, { Description: "Fifth description - non-retryable", - Codes: []gardener.ErrorCode{ - gardener.ErrorInfraRateLimitsExceeded, - }, - }, - { - Description: "Sixth description - non-retryable", Codes: []gardener.ErrorCode{ gardener.ErrorConfigurationProblem, }, }, { - Description: "Seventh description - non-retryable", + Description: "Sixth description - non-retryable", Codes: []gardener.ErrorCode{ gardener.ErrorProblematicWebhook, }, From 658de9e4bd4fc1233869933041909b4771e52432 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Drzewiecki?= Date: Tue, 17 Dec 2024 15:53:46 +0100 Subject: [PATCH 2/3] requeue when oidc cr creation fails --- internal/controller/runtime/fsm/runtime_fsm_configure_oidc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/controller/runtime/fsm/runtime_fsm_configure_oidc.go b/internal/controller/runtime/fsm/runtime_fsm_configure_oidc.go index 51c6f6bc..60423445 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_configure_oidc.go +++ b/internal/controller/runtime/fsm/runtime_fsm_configure_oidc.go @@ -47,7 +47,7 @@ func sFnConfigureOidc(ctx context.Context, m *fsm, s *systemState) (stateFn, *ct if err != nil { m.log.Error(err, "Failed to create OpenIDConnect resource") updateConditionFailed(&s.instance) - return updateStatusAndStopWithError(err) + return requeue() } m.log.Info("OIDC has been configured", "Name", s.shoot.Name) From 5bda8470a8d287825c28dece8af164e618c3e580 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Drzewiecki?= Date: Tue, 17 Dec 2024 16:03:52 +0100 Subject: [PATCH 3/3] improve log message regrading oidc cr creation failure --- internal/controller/runtime/fsm/runtime_fsm_configure_oidc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/controller/runtime/fsm/runtime_fsm_configure_oidc.go b/internal/controller/runtime/fsm/runtime_fsm_configure_oidc.go index 60423445..e7c23916 100644 --- a/internal/controller/runtime/fsm/runtime_fsm_configure_oidc.go +++ b/internal/controller/runtime/fsm/runtime_fsm_configure_oidc.go @@ -45,8 +45,8 @@ func sFnConfigureOidc(ctx context.Context, m *fsm, s *systemState) (stateFn, *ct err := recreateOpenIDConnectResources(ctx, m, s) if err != nil { - m.log.Error(err, "Failed to create OpenIDConnect resource") updateConditionFailed(&s.instance) + m.log.Error(err, "Failed to create OpenIDConnect resource. Scheduling for retry") return requeue() }