Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-15.0] OnlineDDL: fix nil 'completed_timestamp' for cancelled migrations (#13928) #13935

Merged
merged 7 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,9 @@ func TestSchemaChange(t *testing.T) {
}
})
})
t.Run("summary: validate completed_timestamp", func(t *testing.T) {
onlineddl.ValidateCompletedTimestamp(t, &vtParams)
})
}

// testOnlineDDLStatement runs an online DDL, ALTER statement
Expand Down
3 changes: 3 additions & 0 deletions go/test/endtoend/onlineddl/vrepl/onlineddl_vrepl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,9 @@ func TestSchemaChange(t *testing.T) {
}
})
})
t.Run("summary: validate completed_timestamp", func(t *testing.T) {
onlineddl.ValidateCompletedTimestamp(t, &vtParams)
})
}

func insertRow(t *testing.T) {
Expand Down
73 changes: 73 additions & 0 deletions go/test/endtoend/onlineddl/vtgate_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ import (
"github.com/stretchr/testify/require"
)

var (
testsStartupTime time.Time
)

func init() {
testsStartupTime = time.Now()
}

// VtgateExecQuery runs a query on VTGate using given query params
func VtgateExecQuery(t *testing.T, vtParams *mysql.ConnParams, query string, expectError string) *sqltypes.Result {
t.Helper()
Expand Down Expand Up @@ -344,3 +352,68 @@ func WaitForThrottledTimestamp(t *testing.T, vtParams *mysql.ConnParams, uuid st
t.Error("timeout waiting for last_throttled_timestamp to have nonempty value")
return
}

// ValidateSequentialMigrationIDs validates that schem_migrations.id column, which is an AUTO_INCREMENT, does
// not have gaps
func ValidateSequentialMigrationIDs(t *testing.T, vtParams *mysql.ConnParams, shards []cluster.Shard) {
r := VtgateExecQuery(t, vtParams, "show vitess_migrations", "")
shardMin := map[string]uint64{}
shardMax := map[string]uint64{}
shardCount := map[string]uint64{}

for _, row := range r.Named().Rows {
id := row.AsUint64("id", 0)
require.NotZero(t, id)

shard := row.AsString("shard", "")
require.NotEmpty(t, shard)

if _, ok := shardMin[shard]; !ok {
shardMin[shard] = id
shardMax[shard] = id
}
if id < shardMin[shard] {
shardMin[shard] = id
}
if id > shardMax[shard] {
shardMax[shard] = id
}
shardCount[shard]++
}
require.NotEmpty(t, shards)
assert.Equal(t, len(shards), len(shardMin))
assert.Equal(t, len(shards), len(shardMax))
assert.Equal(t, len(shards), len(shardCount))
for shard, count := range shardCount {
assert.NotZero(t, count)
assert.Equalf(t, count, shardMax[shard]-shardMin[shard]+1, "mismatch: shared=%v, count=%v, min=%v, max=%v", shard, count, shardMin[shard], shardMax[shard])
}
}

// ValidateCompletedTimestamp ensures that any migration in `cancelled`, `completed`, `failed` statuses
// has a non-nil and valid `completed_timestamp` value.
func ValidateCompletedTimestamp(t *testing.T, vtParams *mysql.ConnParams) {
require.False(t, testsStartupTime.IsZero())
r := VtgateExecQuery(t, vtParams, "show vitess_migrations", "")

completedTimestampNumValidations := 0
for _, row := range r.Named().Rows {
migrationStatus := row.AsString("migration_status", "")
require.NotEmpty(t, migrationStatus)
switch migrationStatus {
case string(schema.OnlineDDLStatusComplete),
string(schema.OnlineDDLStatusFailed),
string(schema.OnlineDDLStatusCancelled):
{
assert.False(t, row["completed_timestamp"].IsNull())
// Also make sure the timestamp is "real", and that it is recent.
timestamp := row.AsString("completed_timestamp", "")
completedTime, err := time.Parse(sqltypes.TimestampFormat, timestamp)
assert.NoError(t, err)
assert.Greater(t, completedTime.Unix(), testsStartupTime.Unix())
completedTimestampNumValidations++
}
}
}
assert.NotZero(t, completedTimestampNumValidations)
}
2 changes: 1 addition & 1 deletion go/test/endtoend/onlineddl/vttablet_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
)

// WaitForVReplicationStatus waits for a vreplication stream to be in one of given states, or timeout
func WaitForVReplicationStatus(t *testing.T, vtParams *mysql.ConnParams, shards []cluster.Shard, uuid string, timeout time.Duration, expectStatuses ...string) (status string) {
func WaitForVReplicationStatus(t *testing.T, vtParams *mysql.ConnParams, shards []cluster.Shard, uuid string, timeout time.Duration, expectStatuses ...string) (status string) { // nolint:revive

query, err := sqlparser.ParseAndBind("select workflow, state from _vt.vreplication where workflow=%a",
sqltypes.StringBindVariable(uuid),
Expand Down
23 changes: 17 additions & 6 deletions go/vt/vttablet/onlineddl/executor.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@ var vexecInsertTemplates = []string{
'val1', 'val2', 'val3', 'val4', 'val5', 'val6', 'val7', 'val8', 'val9', FROM_UNIXTIME(0), 'vala', 'valb'
)`,
}
var (
// fixCompletedTimestampDone fixes a nil `completed_tiemstamp` columns, see
// https://github.com/vitessio/vitess/issues/13927
// The fix is in release-18.0
// TODO: remove in release-19.0
fixCompletedTimestampDone bool
)

var emptyResult = &sqltypes.Result{}
var acceptableDropTableIfExistsErrorCodes = []int{mysql.ERCantFindFile, mysql.ERNoSuchTable}
Expand Down Expand Up @@ -3754,13 +3761,17 @@ func (e *Executor) gcArtifacts(ctx context.Context) error {
e.migrationMutex.Lock()
defer e.migrationMutex.Unlock()

if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil {
// This query fixes a bug where stale migrations were marked as 'failed' without updating 'completed_timestamp'
// see https://github.com/vitessio/vitess/issues/8499
// Running this query retroactively sets completed_timestamp
// This 'if' clause can be removed in version v13
return err
// v18 fix. Remove in v19
if !fixCompletedTimestampDone {
if _, err := e.execQuery(ctx, sqlFixCompletedTimestamp); err != nil {
// This query fixes a bug where stale migrations were marked as 'cancelled' or 'failed' without updating 'completed_timestamp'
// Running this query retroactively sets completed_timestamp
// This fix is created in v18 and can be removed in v19
return err
}
fixCompletedTimestampDone = true
}

query, err := sqlparser.ParseAndBind(sqlSelectUncollectedArtifacts,
sqltypes.Int64BindVariable(int64((retainOnlineDDLTables).Seconds())),
)
Expand Down
5 changes: 3 additions & 2 deletions go/vt/vttablet/onlineddl/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ const (
migration_uuid=%a
`
sqlUpdateMigrationStatusFailedOrCancelled = `UPDATE _vt.schema_migrations
SET migration_status=IF(cancelled_timestamp IS NULL, 'failed', 'cancelled')
SET migration_status=IF(cancelled_timestamp IS NULL, 'failed', 'cancelled'),
completed_timestamp=NOW(6)
WHERE
migration_uuid=%a
`
Expand Down Expand Up @@ -414,7 +415,7 @@ const (
SET
completed_timestamp=NOW()
WHERE
migration_status='failed'
migration_status IN ('cancelled', 'failed')
AND cleanup_timestamp IS NULL
AND completed_timestamp IS NULL
`
Expand Down