From 46f69ff891395a864aa4b9ca7ed8cc25b73f2d3a Mon Sep 17 00:00:00 2001 From: Diogo Recharte Date: Wed, 21 Feb 2024 08:12:02 +0000 Subject: [PATCH] EVEREST-838 Workaround waiting for DBs to be deleted (#311) * EVEREST-838 workaround waiting for DBs to be deleted When deleting a DBC CR, the everest operator doesn't wait for the DB operator's CRs to be deleted. Thus, as soon as we delete the DBC CRs, these cease to exist in the cluster and the polling below will return immediately. If we don't wait for the DB operators to process the deletion of the CRs, we may end up deleting the namespaces before the DB operators have a chance to delete the resources they manage, leaving the namespaces in an endless Terminating state waiting for finalizers to be removed. The everest operator should have a Deleting status that waits for the DB operators to delete their DB CRs before removing the corresponting DBC CR. Until this is implemented, we work around this by sleeping for two minutes to give the DB operators a chance to delete the resources they manage before we delete the namespaces. * EVEREST-838 Update everest-operator go mod * EVEREST-838 increase tests timeout to 10 min With the recent changes that introduced the install of the monitoring stack by default, the install command now takes longer so we shall increase the test timeout to avoid intermittent CI failures. * EVEREST-838 fix cli-tests with new uninstall command --- cli-tests/playwright.config.ts | 2 +- cli-tests/tests/flow/all-operators.spec.ts | 9 ++++++--- go.mod | 2 +- go.sum | 4 ++-- pkg/uninstall/uninstall.go | 14 ++++++++++++++ 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/cli-tests/playwright.config.ts b/cli-tests/playwright.config.ts index d01a77c8..53b87fc0 100644 --- a/cli-tests/playwright.config.ts +++ b/cli-tests/playwright.config.ts @@ -31,7 +31,7 @@ export default defineConfig({ forbidOnly: !!process.env.CI, /* Retry on CI only */ retries: process.env.CI ? 2 : 0, - timeout: 300_000, + timeout: 600_000, /* Opt out of parallel tests on CI. */ workers: process.env.CI ? 1 : undefined, /* Reporter to use. See https://playwright.dev/docs/test-reporters */ diff --git a/cli-tests/tests/flow/all-operators.spec.ts b/cli-tests/tests/flow/all-operators.spec.ts index b0cc5de8..2a0a93fd 100644 --- a/cli-tests/tests/flow/all-operators.spec.ts +++ b/cli-tests/tests/flow/all-operators.spec.ts @@ -98,11 +98,14 @@ test.describe('Everest CLI install', async () => { ); await out.assertSuccess(); - // check that the deployment does not exist - out = await cli.exec('kubectl get deploy percona-everest -n everest-system'); + // check that the namespace does not exist + out = await cli.exec('kubectl get ns everest-system everest-monitoring everest-olm everest-all'); await out.outErrContainsNormalizedMany([ - 'Error from server (NotFound): deployments.apps "percona-everest" not found', + 'Error from server (NotFound): namespaces "everest-system" not found', + 'Error from server (NotFound): namespaces "everest-monitoring" not found', + 'Error from server (NotFound): namespaces "everest-olm" not found', + 'Error from server (NotFound): namespaces "everest-all" not found', ]); }); diff --git a/go.mod b/go.mod index 799b85a7..7fe0fe11 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/hashicorp/go-version v1.6.0 github.com/operator-framework/api v0.22.0 github.com/operator-framework/operator-lifecycle-manager v0.26.0 - github.com/percona/everest-operator v0.6.0-dev1.0.20240216145455-647ba87146d4 + github.com/percona/everest-operator v0.6.0-dev1.0.20240220114053-fae6111d9818 github.com/spf13/cobra v1.8.0 github.com/spf13/viper v1.18.2 github.com/stretchr/testify v1.8.4 diff --git a/go.sum b/go.sum index 96a7ee38..8b2e5446 100644 --- a/go.sum +++ b/go.sum @@ -525,8 +525,8 @@ github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/9 github.com/pelletier/go-toml v1.7.0/go.mod h1:vwGMzjaWMwyfHwgIBhI2YUM4fB6nL6lVAvS1LBMMhTE= github.com/pelletier/go-toml/v2 v2.1.0 h1:FnwAJ4oYMvbT/34k9zzHuZNrhlz48GB3/s6at6/MHO4= github.com/pelletier/go-toml/v2 v2.1.0/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc= -github.com/percona/everest-operator v0.6.0-dev1.0.20240216145455-647ba87146d4 h1:nc8t3swfbQAIQeZhOhtDlGrAoP50I7wcJ1VIpK8DJLw= -github.com/percona/everest-operator v0.6.0-dev1.0.20240216145455-647ba87146d4/go.mod h1:45pGpvWrPy495qiQqxNuOJor4wif+vTTTJP4Qee8qZk= +github.com/percona/everest-operator v0.6.0-dev1.0.20240220114053-fae6111d9818 h1:w4E4zlSTRQQk2/tFAFO5WGquvKRg2ocw7hxcbRjUT58= +github.com/percona/everest-operator v0.6.0-dev1.0.20240220114053-fae6111d9818/go.mod h1:45pGpvWrPy495qiQqxNuOJor4wif+vTTTJP4Qee8qZk= github.com/percona/percona-backup-mongodb v1.8.1-0.20230920143330-3b1c2e263901 h1:BDgsZRCjEuxl2/z4yWBqB0s8d20shuIDks7/RVdZiLs= github.com/percona/percona-backup-mongodb v1.8.1-0.20230920143330-3b1c2e263901/go.mod h1:fZRCMpUqkWlLVdRKqqaj001LoVP2eo6F0ZhoMPeXDng= github.com/percona/percona-postgresql-operator v0.0.0-20231220140959-ad5eef722609 h1:+UOK4gcHrRgqjo4smgfwT7/0apF6PhAJdQIdAV4ub/M= diff --git a/pkg/uninstall/uninstall.go b/pkg/uninstall/uninstall.go index 3a8d9520..2329df1f 100644 --- a/pkg/uninstall/uninstall.go +++ b/pkg/uninstall/uninstall.go @@ -216,6 +216,20 @@ func (u *Uninstall) deleteDBs(ctx context.Context) error { // Wait for all database clusters to be deleted, or timeout after 5 minutes. u.l.Info("Waiting for database clusters to be deleted") + // XXX: When deleting a DBC CR, the everest operator doesn't wait for the + // DB operator's CRs to be deleted. Thus, as soon as we delete the DBC CRs, + // these cease to exist in the cluster and the polling below will return + // immediately. If we don't wait for the DB operators to process the + // deletion of the CRs, we may end up deleting the namespaces before the DB + // operators have a chance to delete the resources they manage, leaving the + // namespaces in an endless Terminating state waiting for finalizers to be + // removed. + // The everest operator should have a Deleting status that waits for the DB + // operators to delete their DB CRs before removing the corresponting DBC + // CR. Until this is implemented, we work around this by sleeping for two + // minutes to give the DB operators a chance to delete the resources they + // manage before we delete the namespaces. + time.Sleep(2 * time.Minute) return wait.PollUntilContextTimeout(ctx, 5*time.Second, 5*time.Minute, false, func(ctx context.Context) (bool, error) { allDBs, err := u.getDBs(ctx) if err != nil {