From 8bdd1bdc6d323659cca90b1f757c1fa1c968f642 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Mon, 11 Nov 2024 20:10:57 +0200 Subject: [PATCH] Online DDL: `ANALYZE` the shadow table before cut-over (#17201) Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Signed-off-by: Renan Rangel --- .../scheduler/onlineddl_scheduler_test.go | 11 +++++ .../onlineddl/vrepl/onlineddl_vrepl_test.go | 1 + .../schema/onlineddl/schema_migrations.sql | 1 + go/vt/vttablet/onlineddl/executor.go | 44 ++++++++++++++++--- go/vt/vttablet/onlineddl/schema.go | 3 ++ 5 files changed, 54 insertions(+), 6 deletions(-) diff --git a/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go b/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go index b7e18e9af7c..dca28e52a8c 100644 --- a/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go +++ b/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go @@ -562,6 +562,16 @@ func testScheduler(t *testing.T) { status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, t1uuid, normalWaitTime, schema.OnlineDDLStatusRunning) fmt.Printf("# Migration status (for debug purposes): <%s>\n", status) }) + + t.Run("wait for ready_to_complete", func(t *testing.T) { + waitForReadyToComplete(t, t1uuid, true) + rs := onlineddl.ReadMigrations(t, &vtParams, t1uuid) + require.NotNil(t, rs) + for _, row := range rs.Named().Rows { + assert.True(t, row["shadow_analyzed_timestamp"].IsNull()) + } + }) + t.Run("check postpone_completion", func(t *testing.T) { rs := onlineddl.ReadMigrations(t, &vtParams, t1uuid) require.NotNil(t, rs) @@ -582,6 +592,7 @@ func testScheduler(t *testing.T) { for _, row := range rs.Named().Rows { postponeCompletion := row.AsInt64("postpone_completion", 0) assert.Equal(t, int64(0), postponeCompletion) + assert.False(t, row["shadow_analyzed_timestamp"].IsNull()) } }) }) diff --git a/go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go b/go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go index 161b1566680..92dfa2b0c4a 100644 --- a/go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go +++ b/go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go @@ -304,6 +304,7 @@ func TestVreplSchemaChanges(t *testing.T) { for _, row := range rs.Named().Rows { retainArtifactSeconds := row.AsInt64("retain_artifacts_seconds", 0) assert.Equal(t, int64(-1), retainArtifactSeconds) + assert.False(t, row["shadow_analyzed_timestamp"].IsNull()) } }) t.Run("successful online alter, vtctl", func(t *testing.T) { diff --git a/go/vt/sidecardb/schema/onlineddl/schema_migrations.sql b/go/vt/sidecardb/schema/onlineddl/schema_migrations.sql index 85d87edefc2..e0b4e27b180 100644 --- a/go/vt/sidecardb/schema/onlineddl/schema_migrations.sql +++ b/go/vt/sidecardb/schema/onlineddl/schema_migrations.sql @@ -72,6 +72,7 @@ CREATE TABLE IF NOT EXISTS schema_migrations `is_immediate_operation` tinyint unsigned NOT NULL DEFAULT '0', `reviewed_timestamp` timestamp NULL DEFAULT NULL, `ready_to_complete_timestamp` timestamp NULL DEFAULT NULL, + `shadow_analyzed_timestamp` timestamp NULL DEFAULT NULL, `removed_foreign_key_names` text NOT NULL, `last_cutover_attempt_timestamp` timestamp NULL DEFAULT NULL, `force_cutover` tinyint unsigned NOT NULL DEFAULT '0', diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index af08347c545..97a5597d71f 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -880,10 +880,11 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh } // information about source tablet - onlineDDL, _, err := e.readMigration(ctx, s.workflow) + onlineDDL, row, err := e.readMigration(ctx, s.workflow) if err != nil { return vterrors.Wrapf(err, "cutover: failed reading migration") } + needsShadowTableAnalysis := row["shadow_analyzed_timestamp"].IsNull() isVreplicationTestSuite := onlineDDL.StrategySetting().IsVreplicationTestSuite() e.updateMigrationStage(ctx, onlineDDL.UUID, "starting cut-over") @@ -942,12 +943,43 @@ func (e *Executor) cutOverVReplMigration(ctx context.Context, s *VReplStream, sh // still have a record of the sentry table, and gcArtifacts() will still be able to take // care of it in the future. }() - parsed := sqlparser.BuildParsedQuery(sqlCreateSentryTable, sentryTableName) - if _, err := e.execQuery(ctx, parsed.Query); err != nil { - return vterrors.Wrapf(err, "failed creating sentry table") - } - e.updateMigrationStage(ctx, onlineDDL.UUID, "sentry table created: %s", sentryTableName) + preparation := func() error { + preparationsConn, err := e.pool.Get(ctx, nil) + if err != nil { + return vterrors.Wrap(err, "failed getting preparation connection") + } + defer preparationsConn.Recycle() + // Set large enough `@@lock_wait_timeout` so that it does not interfere with the cut-over operation. + // The code will ensure everything that needs to be terminated by `migrationCutOverThreshold` will be terminated. + preparationConnRestoreLockWaitTimeout, err := e.initConnectionLockWaitTimeout(ctx, preparationsConn.Conn, 3*migrationCutOverThreshold) + if err != nil { + return vterrors.Wrap(err, "failed setting lock_wait_timeout on locking connection") + } + defer preparationConnRestoreLockWaitTimeout() + + if needsShadowTableAnalysis { + // Run `ANALYZE TABLE` on the vreplication table so that it has up-to-date statistics at cut-over + parsed := sqlparser.BuildParsedQuery(sqlAnalyzeTable, vreplTable) + if _, err := preparationsConn.Conn.Exec(ctx, parsed.Query, -1, false); err != nil { + // Best effort only. Do not fail the mgiration if this fails. + _ = e.updateMigrationMessage(ctx, "failed ANALYZE shadow table", s.workflow) + } else { + _ = e.updateMigrationTimestamp(ctx, "shadow_analyzed_timestamp", s.workflow) + } + // This command will have blocked the table for writes, presumably only for a brief time. But this can cause + // vreplication to now lag. Thankfully we're gonna create the sentry table and waitForPos. + } + parsed := sqlparser.BuildParsedQuery(sqlCreateSentryTable, sentryTableName) + if _, err := preparationsConn.Conn.Exec(ctx, parsed.Query, 1, false); err != nil { + return vterrors.Wrapf(err, "failed creating sentry table") + } + e.updateMigrationStage(ctx, onlineDDL.UUID, "sentry table created: %s", sentryTableName) + return nil + } + if err := preparation(); err != nil { + return vterrors.Wrapf(err, "failed preparation") + } postSentryPos, err := e.primaryPosition(ctx) if err != nil { return vterrors.Wrapf(err, "failed getting primary pos after sentry creation") diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index a30ab6b3ed9..1b120dfa58c 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -270,6 +270,7 @@ const ( cancelled_timestamp=NULL, completed_timestamp=NULL, last_cutover_attempt_timestamp=NULL, + shadow_analyzed_timestamp=NULL, cleanup_timestamp=NULL WHERE migration_status IN ('failed', 'cancelled') @@ -291,6 +292,7 @@ const ( cancelled_timestamp=NULL, completed_timestamp=NULL, last_cutover_attempt_timestamp=NULL, + shadow_analyzed_timestamp=NULL, cleanup_timestamp=NULL WHERE migration_status IN ('failed', 'cancelled') @@ -441,6 +443,7 @@ const ( postpone_launch, postpone_completion, is_immediate_operation, + shadow_analyzed_timestamp, reviewed_timestamp FROM _vt.schema_migrations WHERE