From d921b5687a99c9aadbc12c2cc28722312b8b8c79 Mon Sep 17 00:00:00 2001 From: Dmitriy Kalinin Date: Tue, 5 Oct 2021 22:17:37 -0400 Subject: [PATCH 1/2] disable finding existing non-labeled resources for pkgr reconciliation --- .github/workflows/test-gh.yml | 2 +- config/rbac.yml | 3 +++ hack/install-deps.sh | 6 +++--- pkg/deploy/kapp_restrict.go | 3 +++ pkg/pkgrepository/package_repo_app.go | 20 ++++++++++++++++++++ 5 files changed, 30 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-gh.yml b/.github/workflows/test-gh.yml index a5cdf4499..67dba6842 100644 --- a/.github/workflows/test-gh.yml +++ b/.github/workflows/test-gh.yml @@ -31,7 +31,7 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} only: ytt, kapp, kbld, vendir ytt: v0.35.1 - kapp: v0.39.0 + kapp: v0.42.0 kbld: v0.31.0 vendir: v0.23.0 - name: Run Tests diff --git a/config/rbac.yml b/config/rbac.yml index 56f2dbe3c..dcfea82d2 100644 --- a/config/rbac.yml +++ b/config/rbac.yml @@ -57,6 +57,9 @@ rules: - apiGroups: ["admissionregistration.k8s.io"] resources: ["validatingwebhookconfigurations"] verbs: ["list", "watch"] +- apiGroups: ["authorization.k8s.io"] + resources: ["subjectaccessreviews"] + verbs: ["create"] --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 diff --git a/hack/install-deps.sh b/hack/install-deps.sh index 97bc5e411..337b7b5dc 100755 --- a/hack/install-deps.sh +++ b/hack/install-deps.sh @@ -27,7 +27,7 @@ install() { ytt_version=v0.35.1 kbld_version=v0.31.0 - kapp_version=v0.41.0 + kapp_version=v0.42.0 imgpkg_version=v0.18.0 vendir_version=v0.23.0 @@ -35,14 +35,14 @@ install() { binary_type=darwin-amd64 ytt_checksum=1f2b61d02f6d8184889719d5e0277a1ea82219f96873345157e81075ca59808e kbld_checksum=d3b0a30bf3a79bedeb25d8548a91254954b99cd4a0c03f3a810b331fc4d1f071 - kapp_checksum=928f1269b52bbb2c725a3ccccc0a63925a03d169d8d3341183e23d429530ef2c + kapp_checksum=47102637b9cd541b4ad1d6074f77b7cec1b60c170a0eb5c92df89674207194e7 imgpkg_checksum=6ef71c549deefb1b9b798f31525610c4a7a562401f22b3bdf18e0cb769921d32 vendir_checksum=aef3233128727c01ffac6561533c9f60a49307f41decc1baa18688a4a250b15f else binary_type=linux-amd64 ytt_checksum=0aa78f7b5f5a0a4c39bddfed915172880344270809c26b9844e9d0cbf6437030 kbld_checksum=ba0be56d9e74b067f3e659de0b79100b0b9df86a2e3e0e6ff533b1e019c22c23 - kapp_checksum=ce4c38a6c6c7785d32afbc31ba316a6b89c068903d356a2ef8f54101b4073953 + kapp_checksum=5d5c4274a130f2fd5ad11ddd8fb3e0f647c8598ba25711360207fc6eab72f6be imgpkg_checksum=7e401eab1fbbaad8044cf3d82ff09cb8ec7666444ef91da9591f202e8b2a8a67 vendir_checksum=c6a65e7e8e589e25bf5554e9575ab247e55ae71920d3d366ffd84b1571fe34ac fi diff --git a/pkg/deploy/kapp_restrict.go b/pkg/deploy/kapp_restrict.go index 9a283aa1b..d2c2355a3 100644 --- a/pkg/deploy/kapp_restrict.go +++ b/pkg/deploy/kapp_restrict.go @@ -61,6 +61,9 @@ var ( var ( kappAllowedDeployFlagSet = exec.NewFlagSet(kappAllowedSharedOpts, kappAllowedChangeOpts, []string{ "--dangerous-allow-empty-list-of-resources", + + "--existing-non-labeled-resources-check", + "--existing-non-labeled-resources-check-concurrency", "--dangerous-override-ownership-of-existing-resources", "--into-ns", diff --git a/pkg/pkgrepository/package_repo_app.go b/pkg/pkgrepository/package_repo_app.go index 7a74a1790..914d8277a 100644 --- a/pkg/pkgrepository/package_repo_app.go +++ b/pkg/pkgrepository/package_repo_app.go @@ -4,6 +4,7 @@ package pkgrepository import ( + "os" "time" kcv1alpha1 "github.com/vmware-tanzu/carvel-kapp-controller/pkg/apis/kappctrl/v1alpha1" @@ -11,6 +12,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +var ( + kappDebug = os.Getenv("KAPPCTRL_PKGR_KAPP_DEBUG") == "true" +) + func NewPackageRepoApp(pkgRepository *pkgingv1alpha1.PackageRepository) (*kcv1alpha1.App, error) { desiredApp := &kcv1alpha1.App{} @@ -28,10 +33,25 @@ func NewPackageRepoApp(pkgRepository *pkgingv1alpha1.PackageRepository) (*kcv1al // instead of spinning wheels trying to list, scope to "current" namespace "--dangerous-scope-to-fallback-allowed-namespaces=true", } + + if kappDebug { + kappRawOpts = append(kappRawOpts, "--debug=true") + } + kappDeployRawOpts := append([]string{ "--logs=false", "--app-changes-max-to-keep=5", + // GKE for some reason does not like high volume of GETs for our API server + // and ends up taking very long time to respond to SubjectAccessReviews (SAR). + // We can disable existing check entirely since we do not allow to "take ownership" + // of other Package/PackageMetadata and we do not _need_ to adopt Packages that + // are not owned by kapp already. + "--existing-non-labeled-resources-check=false", + // ... could in theory just lower concurrency, but decided to turn it off entirely. + // (on GKE, 6 was a sweet spot, 10 exhibited hanging behaviour) + // "--existing-non-labeled-resources-check-concurrency=6", }, kappRawOpts...) + kappDeleteRawOpts := append([]string{}, kappRawOpts...) desiredApp.Spec = kcv1alpha1.AppSpec{ From 5b33905629171951df867452e581dc2942e03925 Mon Sep 17 00:00:00 2001 From: Dmitriy Kalinin Date: Tue, 5 Oct 2021 23:15:31 -0400 Subject: [PATCH 2/2] count deletion failures for reconciliation timing logic --- pkg/app/app_reconcile.go | 2 ++ pkg/app/reconcile_timer.go | 4 ++-- pkg/pkgrepository/app_reconcile.go | 2 ++ pkg/pkgrepository/reconcile_timer.go | 4 ++-- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/pkg/app/app_reconcile.go b/pkg/app/app_reconcile.go index 990f4f843..8a4adaf42 100644 --- a/pkg/app/app_reconcile.go +++ b/pkg/app/app_reconcile.go @@ -266,6 +266,8 @@ func (a *App) setDeleteCompleted(result exec.CmdRunResult) { Status: corev1.ConditionTrue, Message: result.ErrorStr(), }) + a.app.Status.ConsecutiveReconcileFailures++ + a.app.Status.ConsecutiveReconcileSuccesses = 0 a.app.Status.FriendlyDescription = fmt.Sprintf("Delete failed: %s", result.ErrorStr()) a.appMetrics.RegisterReconcileDeleteFailed(a.app.Name, a.app.Namespace) a.setUsefulErrorMessage(result) diff --git a/pkg/app/reconcile_timer.go b/pkg/app/reconcile_timer.go index c29e7209c..856b61ad8 100644 --- a/pkg/app/reconcile_timer.go +++ b/pkg/app/reconcile_timer.go @@ -22,7 +22,7 @@ func NewReconcileTimer(app v1alpha1.App) ReconcileTimer { } func (rt ReconcileTimer) DurationUntilReady(err error) time.Duration { - if err != nil || rt.hasReconcileStatus(v1alpha1.ReconcileFailed) { + if err != nil || rt.hasReconcileStatus(v1alpha1.ReconcileFailed) || rt.hasReconcileStatus(v1alpha1.DeleteFailed) { return rt.failureSyncPeriod() } @@ -46,7 +46,7 @@ func (rt ReconcileTimer) IsReadyAt(timeAt time.Time) bool { return true } - if rt.hasReconcileStatus(v1alpha1.ReconcileFailed) { + if rt.hasReconcileStatus(v1alpha1.ReconcileFailed) || rt.hasReconcileStatus(v1alpha1.DeleteFailed) { if timeAt.UTC().Sub(lastReconcileTime) >= rt.failureSyncPeriod() { return true } diff --git a/pkg/pkgrepository/app_reconcile.go b/pkg/pkgrepository/app_reconcile.go index cc39670e7..4138b18a0 100644 --- a/pkg/pkgrepository/app_reconcile.go +++ b/pkg/pkgrepository/app_reconcile.go @@ -236,6 +236,8 @@ func (a *App) setDeleteCompleted(result exec.CmdRunResult) { Status: corev1.ConditionTrue, Message: result.ErrorStr(), }) + a.app.Status.ConsecutiveReconcileFailures++ + a.app.Status.ConsecutiveReconcileSuccesses = 0 a.app.Status.FriendlyDescription = fmt.Sprintf("Delete failed: %s", result.ErrorStr()) a.setUsefulErrorMessage(result) } else { diff --git a/pkg/pkgrepository/reconcile_timer.go b/pkg/pkgrepository/reconcile_timer.go index 8990f2b67..aa67d8e6e 100644 --- a/pkg/pkgrepository/reconcile_timer.go +++ b/pkg/pkgrepository/reconcile_timer.go @@ -22,7 +22,7 @@ func NewReconcileTimer(app v1alpha1.App) ReconcileTimer { } func (rt ReconcileTimer) DurationUntilReady(err error) time.Duration { - if err != nil || rt.hasReconcileStatus(v1alpha1.ReconcileFailed) { + if err != nil || rt.hasReconcileStatus(v1alpha1.ReconcileFailed) || rt.hasReconcileStatus(v1alpha1.DeleteFailed) { return rt.failureSyncPeriod() } @@ -46,7 +46,7 @@ func (rt ReconcileTimer) IsReadyAt(timeAt time.Time) bool { return true } - if rt.hasReconcileStatus(v1alpha1.ReconcileFailed) { + if rt.hasReconcileStatus(v1alpha1.ReconcileFailed) || rt.hasReconcileStatus(v1alpha1.DeleteFailed) { if timeAt.UTC().Sub(lastReconcileTime) >= rt.failureSyncPeriod() { return true }