From 9372926b5cc9b2c1c51689f9e4b0e46148cbba7c Mon Sep 17 00:00:00 2001 From: "vitess-bot[bot]" <108069721+vitess-bot[bot]@users.noreply.github.com> Date: Thu, 7 Sep 2023 11:49:21 +0300 Subject: [PATCH] [release-17.0] OnlineDDL: fix nil 'completed_timestamp' for cancelled migrations (#13928) (#13937) Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Co-authored-by: vitess-bot[bot] <108069721+vitess-bot[bot]@users.noreply.github.com> Co-authored-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../scheduler/onlineddl_scheduler_test.go | 3 ++ .../onlineddl/vrepl/onlineddl_vrepl_test.go | 3 ++ go/test/endtoend/onlineddl/vtgate_util.go | 36 +++++++++++++++++++ go/vt/vttablet/onlineddl/executor.go | 19 ++++++++++ go/vt/vttablet/onlineddl/schema.go | 5 +-- 5 files changed, 64 insertions(+), 2 deletions(-) diff --git a/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go b/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go index 4030b3a9184..74aebf69e15 100644 --- a/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go +++ b/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go @@ -274,6 +274,9 @@ func TestSchemaChange(t *testing.T) { t.Run("summary: validate sequential migration IDs", func(t *testing.T) { onlineddl.ValidateSequentialMigrationIDs(t, &vtParams, shards) }) + t.Run("summary: validate completed_timestamp", func(t *testing.T) { + onlineddl.ValidateCompletedTimestamp(t, &vtParams) + }) } func testScheduler(t *testing.T) { diff --git a/go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go b/go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go index b012cd4f074..ce0908897eb 100644 --- a/go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go +++ b/go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go @@ -896,6 +896,9 @@ func TestSchemaChange(t *testing.T) { t.Run("summary: validate sequential migration IDs", func(t *testing.T) { onlineddl.ValidateSequentialMigrationIDs(t, &vtParams, shards) }) + t.Run("summary: validate completed_timestamp", func(t *testing.T) { + onlineddl.ValidateCompletedTimestamp(t, &vtParams) + }) } func insertRow(t *testing.T) { diff --git a/go/test/endtoend/onlineddl/vtgate_util.go b/go/test/endtoend/onlineddl/vtgate_util.go index e59e5759a75..c0ffc120025 100644 --- a/go/test/endtoend/onlineddl/vtgate_util.go +++ b/go/test/endtoend/onlineddl/vtgate_util.go @@ -40,6 +40,14 @@ import ( "github.com/stretchr/testify/require" ) +var ( + testsStartupTime time.Time +) + +func init() { + testsStartupTime = time.Now() +} + // VtgateExecQuery runs a query on VTGate using given query params func VtgateExecQuery(t *testing.T, vtParams *mysql.ConnParams, query string, expectError string) *sqltypes.Result { t.Helper() @@ -429,3 +437,31 @@ func ValidateSequentialMigrationIDs(t *testing.T, vtParams *mysql.ConnParams, sh assert.Equalf(t, count, shardMax[shard]-shardMin[shard]+1, "mismatch: shared=%v, count=%v, min=%v, max=%v", shard, count, shardMin[shard], shardMax[shard]) } } + +// ValidateCompletedTimestamp ensures that any migration in `cancelled`, `completed`, `failed` statuses +// has a non-nil and valid `completed_timestamp` value. +func ValidateCompletedTimestamp(t *testing.T, vtParams *mysql.ConnParams) { + require.False(t, testsStartupTime.IsZero()) + r := VtgateExecQuery(t, vtParams, "show vitess_migrations", "") + + completedTimestampNumValidations := 0 + for _, row := range r.Named().Rows { + migrationStatus := row.AsString("migration_status", "") + require.NotEmpty(t, migrationStatus) + switch migrationStatus { + case string(schema.OnlineDDLStatusComplete), + string(schema.OnlineDDLStatusFailed), + string(schema.OnlineDDLStatusCancelled): + { + assert.False(t, row["completed_timestamp"].IsNull()) + // Also make sure the timestamp is "real", and that it is recent. + timestamp := row.AsString("completed_timestamp", "") + completedTime, err := time.Parse(sqltypes.TimestampFormat, timestamp) + assert.NoError(t, err) + assert.Greater(t, completedTime.Unix(), testsStartupTime.Unix()) + completedTimestampNumValidations++ + } + } + } + assert.NotZero(t, completedTimestampNumValidations) +} diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 6c112063eb8..0bf1043a6fa 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -76,6 +76,14 @@ var ( ErrMigrationNotFound = errors.New("migration not found") ) +var ( + // fixCompletedTimestampDone fixes a nil `completed_tiemstamp` columns, see + // https://github.com/vitessio/vitess/issues/13927 + // The fix is in release-18.0 + // TODO: remove in release-19.0 + fixCompletedTimestampDone bool +) + var emptyResult = &sqltypes.Result{} var acceptableDropTableIfExistsErrorCodes = []mysql.ErrorCode{mysql.ERCantFindFile, mysql.ERNoSuchTable} var copyAlgorithm = sqlparser.AlgorithmValue(sqlparser.CopyStr) @@ -3810,6 +3818,17 @@ func (e *Executor) gcArtifacts(ctx context.Context) error { e.migrationMutex.Lock() defer e.migrationMutex.Unlock() + // v18 fix. Remove in v19 + if !fixCompletedTimestampDone { + if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil { + // This query fixes a bug where stale migrations were marked as 'cancelled' or 'failed' without updating 'completed_timestamp' + // Running this query retroactively sets completed_timestamp + // This fix is created in v18 and can be removed in v19 + return err + } + fixCompletedTimestampDone = true + } + query, err := sqlparser.ParseAndBind(sqlSelectUncollectedArtifacts, sqltypes.Int64BindVariable(int64((retainOnlineDDLTables).Seconds())), ) diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index 39288248cdb..5bd02821d35 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -66,7 +66,8 @@ const ( migration_uuid=%a ` sqlUpdateMigrationStatusFailedOrCancelled = `UPDATE _vt.schema_migrations - SET migration_status=IF(cancelled_timestamp IS NULL, 'failed', 'cancelled') + SET migration_status=IF(cancelled_timestamp IS NULL, 'failed', 'cancelled'), + completed_timestamp=NOW(6) WHERE migration_uuid=%a ` @@ -355,7 +356,7 @@ const ( SET completed_timestamp=NOW(6) WHERE - migration_status='failed' + migration_status IN ('cancelled', 'failed') AND cleanup_timestamp IS NULL AND completed_timestamp IS NULL `