From 70a73fb55b70b0f13688da0568b93b066e677c35 Mon Sep 17 00:00:00 2001 From: "vitess-bot[bot]" <108069721+vitess-bot[bot]@users.noreply.github.com> Date: Wed, 20 Sep 2023 13:10:09 +0300 Subject: [PATCH] [release-17.0] OnlineDDL: cleanup cancelled migration artifacts; support `--retain-artifacts=` DDL strategy flag (#14029) (#14037) Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Co-authored-by: vitess-bot[bot] <108069721+vitess-bot[bot]@users.noreply.github.com> Co-authored-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../scheduler/onlineddl_scheduler_test.go | 54 +++++++++++++ go/vt/schema/ddl_strategy.go | 37 ++++++++- go/vt/schema/ddl_strategy_test.go | 77 ++++++++++++++++++- go/vt/vttablet/onlineddl/executor.go | 5 ++ go/vt/vttablet/onlineddl/schema.go | 2 +- 5 files changed, 172 insertions(+), 3 deletions(-) diff --git a/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go b/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go index 74aebf69e15..841229fbfc3 100644 --- a/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go +++ b/go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go @@ -878,6 +878,60 @@ func testScheduler(t *testing.T) { }) }) + t.Run("Cleanup artifacts", func(t *testing.T) { + // Create a migration with a low --retain-artifacts value. + // We will cancel the migration and expect the artifact to be cleaned. + t.Run("start migration", func(t *testing.T) { + t1uuid = testOnlineDDLStatement(t, createParams(trivialAlterT1Statement, ddlStrategy+" --postpone-completion --retain-artifacts=1s", "vtctl", "", "", true)) // skip wait + onlineddl.WaitForMigrationStatus(t, &vtParams, shards, t1uuid, normalWaitTime, schema.OnlineDDLStatusRunning) + }) + var artifacts []string + t.Run("validate artifact exists", func(t *testing.T) { + rs := onlineddl.ReadMigrations(t, &vtParams, t1uuid) + require.NotNil(t, rs) + row := rs.Named().Row() + require.NotNil(t, row) + + artifacts = textutil.SplitDelimitedList(row.AsString("artifacts", "")) + assert.NotEmpty(t, artifacts) + assert.Equal(t, 1, len(artifacts)) + checkTable(t, artifacts[0], true) + + retainArtifactsSeconds := row.AsInt64("retain_artifacts_seconds", 0) + assert.Equal(t, int64(1), retainArtifactsSeconds) // due to --retain-artifacts=1s + }) + t.Run("cancel migration", func(t *testing.T) { + onlineddl.CheckCancelMigration(t, &vtParams, shards, t1uuid, true) + status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, t1uuid, normalWaitTime, schema.OnlineDDLStatusFailed, schema.OnlineDDLStatusCancelled) + fmt.Printf("# Migration status (for debug purposes): <%s>\n", status) + onlineddl.CheckMigrationStatus(t, &vtParams, shards, t1uuid, schema.OnlineDDLStatusCancelled) + }) + t.Run("wait for cleanup", func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), normalWaitTime) + defer cancel() + + for { + rs := onlineddl.ReadMigrations(t, &vtParams, t1uuid) + require.NotNil(t, rs) + row := rs.Named().Row() + require.NotNil(t, row) + if !row["cleanup_timestamp"].IsNull() { + // This is what we've been waiting for + break + } + select { + case <-ctx.Done(): + assert.Fail(t, "timeout waiting for cleanup") + return + case <-time.After(time.Second): + } + } + }) + t.Run("validate artifact does not exist", func(t *testing.T) { + checkTable(t, artifacts[0], false) + }) + }) + // INSTANT DDL instantDDLCapable, err := capableOf(mysql.InstantAddLastColumnFlavorCapability) require.NoError(t, err) diff --git a/go/vt/schema/ddl_strategy.go b/go/vt/schema/ddl_strategy.go index 0a1ab8b5888..440165b9167 100644 --- a/go/vt/schema/ddl_strategy.go +++ b/go/vt/schema/ddl_strategy.go @@ -28,6 +28,7 @@ import ( var ( strategyParserRegexp = regexp.MustCompile(`^([\S]+)\s+(.*)$`) cutOverThresholdFlagRegexp = regexp.MustCompile(fmt.Sprintf(`^[-]{1,2}%s=(.*?)$`, cutOverThresholdFlag)) + retainArtifactsFlagRegexp = regexp.MustCompile(fmt.Sprintf(`^[-]{1,2}%s=(.*?)$`, retainArtifactsFlag)) ) const ( @@ -43,6 +44,7 @@ const ( preferInstantDDL = "prefer-instant-ddl" fastRangeRotationFlag = "fast-range-rotation" cutOverThresholdFlag = "cut-over-threshold" + retainArtifactsFlag = "retain-artifacts" vreplicationTestSuite = "vreplication-test-suite" allowForeignKeysFlag = "unsafe-allow-foreign-keys" ) @@ -109,6 +111,9 @@ func ParseDDLStrategy(strategyVariable string) (*DDLStrategySetting, error) { if _, err := setting.CutOverThreshold(); err != nil { return nil, err } + if _, err := setting.RetainArtifactsDuration(); err != nil { + return nil, err + } return setting, nil } @@ -193,7 +198,16 @@ func isCutOverThresholdFlag(opt string) (string, bool) { return submatch[1], true } -// CutOverThreshold returns a list of shards specified in '--shards=...', or an empty slice if unspecified +// isRetainArtifactsFlag returns true when given option denotes a `--retain-artifacts=[...]` flag +func isRetainArtifactsFlag(opt string) (string, bool) { + submatch := retainArtifactsFlagRegexp.FindStringSubmatch(opt) + if len(submatch) == 0 { + return "", false + } + return submatch[1], true +} + +// CutOverThreshold returns a the duration threshold indicated by --cut-over-threshold func (setting *DDLStrategySetting) CutOverThreshold() (d time.Duration, err error) { // We do some ugly manual parsing of --cut-over-threshold value opts, _ := shlex.Split(setting.Options) @@ -211,6 +225,24 @@ func (setting *DDLStrategySetting) CutOverThreshold() (d time.Duration, err erro return d, err } +// RetainArtifactsDuration returns a the duration indicated by --retain-artifacts +func (setting *DDLStrategySetting) RetainArtifactsDuration() (d time.Duration, err error) { + // We do some ugly manual parsing of --retain-artifacts + opts, _ := shlex.Split(setting.Options) + for _, opt := range opts { + if val, isRetainArtifacts := isRetainArtifactsFlag(opt); isRetainArtifacts { + // value is possibly quoted + if s, err := strconv.Unquote(val); err == nil { + val = s + } + if val != "" { + d, err = time.ParseDuration(val) + } + } + } + return d, err +} + // IsVreplicationTestSuite checks if strategy options include --vreplicatoin-test-suite func (setting *DDLStrategySetting) IsVreplicationTestSuite() bool { return setting.hasFlag(vreplicationTestSuite) @@ -229,6 +261,9 @@ func (setting *DDLStrategySetting) RuntimeOptions() []string { if _, ok := isCutOverThresholdFlag(opt); ok { continue } + if _, ok := isRetainArtifactsFlag(opt); ok { + continue + } switch { case isFlag(opt, declarativeFlag): case isFlag(opt, skipTopoFlag): diff --git a/go/vt/schema/ddl_strategy_test.go b/go/vt/schema/ddl_strategy_test.go index 4ec1a1535ea..62d8e605dda 100644 --- a/go/vt/schema/ddl_strategy_test.go +++ b/go/vt/schema/ddl_strategy_test.go @@ -89,7 +89,7 @@ func TestIsCutOverThresholdFlag(t *testing.T) { setting, err := ParseDDLStrategy("online " + ts.s) assert.NoError(t, err) - val, isCutOver := isCutOverThresholdFlag((ts.s)) + val, isCutOver := isCutOverThresholdFlag(ts.s) assert.Equal(t, ts.expect, isCutOver) assert.Equal(t, ts.val, val) @@ -102,6 +102,69 @@ func TestIsCutOverThresholdFlag(t *testing.T) { } } +func TestIsExpireArtifactsFlag(t *testing.T) { + tt := []struct { + s string + expect bool + val string + d time.Duration + }{ + { + s: "something", + }, + { + s: "-retain-artifacts", + }, + { + s: "--retain-artifacts", + }, + { + s: "--retain-artifacts=", + expect: true, + }, + { + s: "--retain-artifacts=0", + expect: true, + val: "0", + d: 0, + }, + { + s: "-retain-artifacts=0", + expect: true, + val: "0", + d: 0, + }, + { + s: "--retain-artifacts=1m", + expect: true, + val: "1m", + d: time.Minute, + }, + { + s: `--retain-artifacts="1m"`, + expect: true, + val: `"1m"`, + d: time.Minute, + }, + } + for _, ts := range tt { + t.Run(ts.s, func(t *testing.T) { + setting, err := ParseDDLStrategy("online " + ts.s) + assert.NoError(t, err) + + val, isRetainArtifacts := isRetainArtifactsFlag(ts.s) + assert.Equal(t, ts.expect, isRetainArtifacts) + assert.Equal(t, ts.val, val) + + if ts.expect { + d, err := setting.RetainArtifactsDuration() + assert.NoError(t, err) + assert.Equal(t, ts.d, d) + } + }) + } +} + func TestParseDDLStrategy(t *testing.T) { tt := []struct { strategyVariable string @@ -117,6 +180,7 @@ func TestParseDDLStrategy(t *testing.T) { fastRangeRotation bool allowForeignKeys bool cutOverThreshold time.Duration + expireArtifacts time.Duration runtimeOptions string err error }{ @@ -238,6 +302,13 @@ func TestParseDDLStrategy(t *testing.T) { runtimeOptions: "", cutOverThreshold: 5 * time.Minute, }, + { + strategyVariable: "vitess --retain-artifacts=4m", + strategy: DDLStrategyVitess, + options: "--retain-artifacts=4m", + runtimeOptions: "", + expireArtifacts: 4 * time.Minute, + }, } for _, ts := range tt { t.Run(ts.strategyVariable, func(t *testing.T) { @@ -273,4 +344,8 @@ func TestParseDDLStrategy(t *testing.T) { _, err := ParseDDLStrategy("online --cut-over-threshold=3") assert.Error(t, err) } + { + _, err := ParseDDLStrategy("online --retain-artifacts=3") + assert.Error(t, err) + } } diff --git a/go/vt/vttablet/onlineddl/executor.go b/go/vt/vttablet/onlineddl/executor.go index 0bf1043a6fa..9b3c502c004 100644 --- a/go/vt/vttablet/onlineddl/executor.go +++ b/go/vt/vttablet/onlineddl/executor.go @@ -4703,6 +4703,11 @@ func (e *Executor) SubmitMigration( revertedUUID, _ := onlineDDL.GetRevertUUID() // Empty value if the migration is not actually a REVERT. Safe to ignore error. retainArtifactsSeconds := int64((retainOnlineDDLTables).Seconds()) + if retainArtifacts, _ := onlineDDL.StrategySetting().RetainArtifactsDuration(); retainArtifacts != 0 { + // Explicit retention indicated by `--retain-artifact` DDL strategy flag for this migration. Override! + retainArtifactsSeconds = int64((retainArtifacts).Seconds()) + } + _, allowConcurrentMigration := e.allowConcurrentMigration(onlineDDL) submitQuery, err := sqlparser.ParseAndBind(sqlInsertMigration, sqltypes.StringBindVariable(onlineDDL.UUID), diff --git a/go/vt/vttablet/onlineddl/schema.go b/go/vt/vttablet/onlineddl/schema.go index 5bd02821d35..abad3097211 100644 --- a/go/vt/vttablet/onlineddl/schema.go +++ b/go/vt/vttablet/onlineddl/schema.go @@ -345,7 +345,7 @@ const ( log_path FROM _vt.schema_migrations WHERE - migration_status IN ('complete', 'failed') + migration_status IN ('complete', 'cancelled', 'failed') AND cleanup_timestamp IS NULL AND completed_timestamp <= IF(retain_artifacts_seconds=0, NOW() - INTERVAL %a SECOND,